From b18d571895813c54df413daf84531b0e4fab2ebc Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Thu, 25 Jun 2026 17:28:57 +0800 Subject: [PATCH 01/23] feat: add CollectiveX benchmark dashboard --- .github/workflows/update-collectivex-data.yml | 83 + .gitignore | 1 + packages/app/cypress/component/tab-nav.cy.tsx | 6 + packages/app/cypress/e2e/collectivex.cy.ts | 160 + .../app/cypress/fixtures/api/collectivex.json | 279 ++ packages/app/package.json | 3 +- packages/app/public/data/collectivex.json | 3115 +++++++++++++++++ .../app/scripts/generate-collectivex-data.ts | 38 + .../src/app/(dashboard)/collectivex/page.tsx | 10 + packages/app/src/app/sitemap.ts | 1 + .../collectivex/CollectiveXChart.tsx | 252 ++ .../collectivex/CollectiveXDisplay.tsx | 693 ++++ .../src/components/collectivex/data.test.ts | 279 ++ .../app/src/components/collectivex/data.ts | 388 ++ .../app/src/components/collectivex/types.ts | 92 + packages/app/src/components/header/header.tsx | 1 + packages/app/src/components/tab-nav.tsx | 1 + packages/app/src/hooks/api/use-collectivex.ts | 11 + packages/app/src/lib/api.test.ts | 14 + packages/app/src/lib/api.ts | 15 +- .../app/src/lib/collectivex-snapshot.test.ts | 236 ++ packages/app/src/lib/collectivex-snapshot.ts | 212 ++ .../app/src/lib/d3-chart/layers/lines.test.ts | 22 + packages/app/src/lib/d3-chart/layers/lines.ts | 2 + packages/app/src/lib/tab-meta.ts | 6 + 25 files changed, 5917 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/update-collectivex-data.yml create mode 100644 packages/app/cypress/e2e/collectivex.cy.ts create mode 100644 packages/app/cypress/fixtures/api/collectivex.json create mode 100644 packages/app/public/data/collectivex.json create mode 100644 packages/app/scripts/generate-collectivex-data.ts create mode 100644 packages/app/src/app/(dashboard)/collectivex/page.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXChart.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXDisplay.tsx create mode 100644 packages/app/src/components/collectivex/data.test.ts create mode 100644 packages/app/src/components/collectivex/data.ts create mode 100644 packages/app/src/components/collectivex/types.ts create mode 100644 packages/app/src/hooks/api/use-collectivex.ts create mode 100644 packages/app/src/lib/collectivex-snapshot.test.ts create mode 100644 packages/app/src/lib/collectivex-snapshot.ts diff --git a/.github/workflows/update-collectivex-data.yml b/.github/workflows/update-collectivex-data.yml new file mode 100644 index 00000000..3bc75204 --- /dev/null +++ b/.github/workflows/update-collectivex-data.yml @@ -0,0 +1,83 @@ +name: Update CollectiveX Data + +on: + workflow_dispatch: + inputs: + source_run_id: + description: Optional CollectiveX workflow run ID to include before it reaches completed status + type: string + required: false + default: '' + repository_dispatch: + types: [update-collectivex-data] + +concurrency: + group: update-collectivex-data + cancel-in-progress: false + +permissions: + contents: write + +jobs: + update: + timeout-minutes: 15 + runs-on: ubuntu-latest + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + SOURCE_RUN_ID: ${{ github.event.client_payload.source_run_id || inputs.source_run_id || '' }} + steps: + - name: Checkout app repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + ref: ${{ github.event.repository.default_branch }} + token: ${{ secrets.PAT }} + fetch-depth: 0 + + - name: Setup pnpm + uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8 + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: '24' + cache: pnpm + + - name: Install app dependencies + run: pnpm install --frozen-lockfile --filter @semianalysisai/inferencex-app... + env: + CYPRESS_INSTALL_BINARY: '0' + + - name: Generate static CollectiveX snapshot + env: + GITHUB_TOKEN: ${{ secrets.INFX_MAIN_PAT }} + run: | + set -euo pipefail + if [ -n "$SOURCE_RUN_ID" ]; then + pnpm --filter @semianalysisai/inferencex-app generate:collectivex -- \ + --source-run-id "$SOURCE_RUN_ID" + else + pnpm --filter @semianalysisai/inferencex-app generate:collectivex + fi + + - name: Commit snapshot when data changed + run: | + set -euo pipefail + SNAPSHOT=packages/app/public/data/collectivex.json + git add "$SNAPSHOT" + if git diff --cached --quiet; then + echo "CollectiveX snapshot is already current." >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + + git config user.name "InferenceX Data Bot" + git config user.email "actions@users.noreply.github.com" + git commit -m "chore: update CollectiveX data" + git pull --rebase origin "$DEFAULT_BRANCH" + git push origin "HEAD:$DEFAULT_BRANCH" + + { + echo "Updated \`$SNAPSHOT\`." + if [ -n "$SOURCE_RUN_ID" ]; then + echo "Included source run: \`$SOURCE_RUN_ID\`." + fi + } >> "$GITHUB_STEP_SUMMARY" diff --git a/.gitignore b/.gitignore index a86f6e23..18acd7ef 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ **/public/data/* !**/public/data/github-stars.json !**/public/data/evaluation/dummy_eval_data.json +!**/public/data/collectivex.json # cypress **/cypress/videos/ diff --git a/packages/app/cypress/component/tab-nav.cy.tsx b/packages/app/cypress/component/tab-nav.cy.tsx index 2c24d256..31229ac7 100644 --- a/packages/app/cypress/component/tab-nav.cy.tsx +++ b/packages/app/cypress/component/tab-nav.cy.tsx @@ -70,6 +70,11 @@ describe('TabNav — unofficialrun URL preservation (issue #319)', () => { 'href', '/submissions?unofficialruns=12345', ); + cy.get('[data-testid="tab-trigger-collectivex"]').should( + 'have.attr', + 'href', + '/collectivex?unofficialruns=12345', + ); cy.get('[data-testid="tab-trigger-historical"]').should( 'have.attr', 'href', @@ -109,6 +114,7 @@ describe('TabNav — Hidden popover for gated tabs', () => { mountTabNav({}); cy.get('[data-testid="tab-trigger-inference"]').should('exist'); cy.get('[data-testid="tab-trigger-gpu-specs"]').should('exist'); + cy.get('[data-testid="tab-trigger-collectivex"]').should('exist'); cy.get('[data-testid="tab-trigger-submissions"]').should('exist'); cy.get('[data-testid="tab-trigger-hidden"]').should('not.exist'); cy.get('[data-testid="tab-trigger-feedback"]').should('not.exist'); diff --git a/packages/app/cypress/e2e/collectivex.cy.ts b/packages/app/cypress/e2e/collectivex.cy.ts new file mode 100644 index 00000000..38dc6d33 --- /dev/null +++ b/packages/app/cypress/e2e/collectivex.cy.ts @@ -0,0 +1,160 @@ +function expectToggleOptions(testId: string, labels: string[]) { + cy.get(`[data-testid="${testId}"]`) + .find('button') + .then(($buttons) => { + expect($buttons.toArray().map((button) => button.textContent?.trim())).to.deep.equal(labels); + }); +} + +describe('CollectiveX', () => { + beforeEach(() => { + cy.intercept('GET', '/data/collectivex.json', { fixture: 'api/collectivex.json' }).as( + 'collectivexData', + ); + cy.visit('/collectivex'); + cy.wait('@collectivexData'); + }); + + it('renders the artifact-backed explorer and latency overview', () => { + cy.get('[data-testid="collectivex-display"]').should('contain.text', 'CollectiveX'); + cy.get('[data-testid="collectivex-explorer-chart"] svg').should('be.visible'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 4); + cy.get('[data-testid="collectivex-comparison-warning"]') + .should('contain.text', 'Not directly comparable') + .and('contain.text', 'resource mode'); + cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 12); + cy.get('[data-testid="collectivex-overview-decode-ep4"]').should('exist'); + cy.get('[data-testid="collectivex-overview-decode-ep8"]').should('exist'); + cy.get('[data-testid="collectivex-overview-prefill-ep4"]').should('exist'); + cy.get('[data-testid="collectivex-overview-prefill-ep8"]').should('exist'); + }); + + it('exposes and applies every control from the generated v3 report', () => { + expectToggleOptions('collectivex-operation-toggle', ['Dispatch', 'Combine', 'Serial']); + expectToggleOptions('collectivex-phase-toggle', ['Decode', 'Prefill']); + expectToggleOptions('collectivex-percentile-toggle', ['p50', 'p90', 'p99']); + expectToggleOptions('collectivex-suite-toggle', [ + 'All', + 'Backend default', + 'Resource constrained', + ]); + expectToggleOptions('collectivex-x-scale-toggle', ['Log', 'Linear']); + expectToggleOptions('collectivex-y-scale-toggle', ['Log', 'Linear']); + + cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Combine').click(); + cy.get('[data-testid="collectivex-main-chart"]').should( + 'contain.text', + 'Combine · decode · p50', + ); + cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Serial').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Serial'); + cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Dispatch').click(); + + cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Dispatch · prefill'); + cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Decode').click(); + + cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p90').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'decode · p90'); + cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p99').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'decode · p99'); + cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p50').click(); + + cy.get('[data-testid="collectivex-suite-toggle"]') + .contains('button', 'Backend default') + .click(); + cy.get('[data-testid="collectivex-suite-toggle"]') + .contains('button', 'Resource constrained') + .click(); + cy.get('[data-testid="collectivex-suite-toggle"]').contains('button', 'All').click(); + + cy.get('[data-testid="collectivex-x-axis-select"]').click(); + cy.get('[role="option"]').then(($options) => { + expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ + 'Source tokens / rank', + 'Global source tokens', + ]); + }); + cy.contains('[role="option"]', 'Global source tokens').click(); + cy.get('[data-testid="collectivex-x-axis-select"]').should( + 'contain.text', + 'Global source tokens', + ); + cy.get('[data-testid="collectivex-x-axis-select"]').click(); + cy.contains('[role="option"]', 'Source tokens / rank').click(); + + cy.get('[data-testid="collectivex-y-axis-select"]').click(); + cy.get('[role="option"]').then(($options) => { + expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ + 'Latency', + 'Tokens / second', + 'Logical routed payload rate', + ]); + }); + cy.contains('[role="option"]', 'Tokens / second').click(); + cy.get('[data-testid="collectivex-main-chart"]').should( + 'contain.text', + 'Tokens / second versus', + ); + cy.get('[data-testid="collectivex-y-axis-select"]').click(); + cy.contains('[role="option"]', 'Logical routed payload rate').click(); + cy.get('[data-testid="collectivex-main-chart"]').should( + 'contain.text', + 'Logical payload rate versus', + ); + cy.get('[data-testid="collectivex-y-axis-select"]').click(); + cy.contains('[role="option"]', 'Latency').click(); + + cy.get('[data-testid="collectivex-x-scale-toggle"]').contains('button', 'Linear').click(); + cy.get('[data-testid="collectivex-x-scale-toggle"]') + .contains('button', 'Linear') + .should('have.attr', 'aria-selected', 'true'); + cy.get('[data-testid="collectivex-x-scale-toggle"]').contains('button', 'Log').click(); + cy.get('[data-testid="collectivex-y-scale-toggle"]').contains('button', 'Linear').click(); + cy.get('[data-testid="collectivex-y-scale-toggle"]') + .contains('button', 'Linear') + .should('have.attr', 'aria-selected', 'true'); + cy.get('[data-testid="collectivex-y-scale-toggle"]').contains('button', 'Log').click(); + }); + + it('updates the rendered curve when the percentile changes', () => { + cy.get('[data-testid="collectivex-explorer-chart"] .line-path') + .first() + .invoke('attr', 'd') + .then((p50Path) => { + cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p99').click(); + cy.get('[data-testid="collectivex-main-chart"]').should( + 'contain.text', + 'Dispatch · decode · p99', + ); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path') + .first() + .invoke('attr', 'd') + .should('not.eq', p50Path); + }); + }); + + it('stitches matching decode points into the prefill curves', () => { + cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Dispatch · prefill'); + cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 8); + }); + + it('filters to the resource-constrained comparison suite', () => { + cy.get('[data-testid="collectivex-suite-toggle"]') + .contains('button', 'Resource constrained') + .click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); + cy.get('[data-testid="collectivex-comparison-warning"]').should('not.exist'); + }); + + it('legend toggles remove and restore a rendered series', () => { + cy.get('[data-testid="collectivex-main-chart"]').within(() => { + cy.contains('label', 'H100 · deepep · bf16 · EP4 · comm only').click(); + }); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); + cy.get('[data-testid="collectivex-main-chart"]').contains('button', 'Reset filter').click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + }); +}); diff --git a/packages/app/cypress/fixtures/api/collectivex.json b/packages/app/cypress/fixtures/api/collectivex.json new file mode 100644 index 00000000..05bc6e92 --- /dev/null +++ b/packages/app/cypress/fixtures/api/collectivex.json @@ -0,0 +1,279 @@ +{ + "scannedRuns": 12, + "contributingRuns": 4, + "generatedAt": "2026-06-25T08:31:09.000Z", + "series": [ + { + "id": "cx-h100-decode", + "identity": "h100|deepep|decode", + "stitchKey": "h100|deepep|fixed", + "colorKey": "h100_fixture", + "schemaVersion": 3, + "generatedAt": "2026-06-24T22:50:09.000Z", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 4, + "epSize": 4, + "label": "H100 · deepep · bf16 · EP4 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": true, + "traceSignature": "h100-trace", + "backendVersion": "1.2.1", + "run": { + "id": "28134642131", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", + "createdAt": "2026-06-24T22:49:12Z", + "sha": "h100sha" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { "p50": 60, "p90": 72, "p99": 90 }, + "combine": { "p50": 70, "p90": 82, "p99": 100 }, + "serial": { "p50": 130, "p90": 154, "p99": 190 }, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { "p50": 90, "p90": 110, "p99": 145 }, + "combine": { "p50": 105, "p90": 130, "p99": 170 }, + "serial": { "p50": 195, "p90": 240, "p99": 315 }, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.31, + "recvTokensMax": 367, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-h100-prefill", + "identity": "h100|deepep|prefill", + "stitchKey": "h100|deepep|fixed", + "colorKey": "h100_fixture", + "schemaVersion": 3, + "generatedAt": "2026-06-24T22:50:10.000Z", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 4, + "epSize": 4, + "label": "H100 · deepep · bf16 · EP4 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": true, + "traceSignature": "h100-trace", + "backendVersion": "1.2.1", + "run": { + "id": "28134642131", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", + "createdAt": "2026-06-24T22:49:12Z", + "sha": "h100sha" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { "p50": 125, "p90": 150, "p99": 190 }, + "combine": { "p50": 150, "p90": 180, "p99": 230 }, + "serial": { "p50": 275, "p90": 330, "p99": 420 }, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.29, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { "p50": 220, "p90": 260, "p99": 320 }, + "combine": { "p50": 300, "p90": 360, "p99": 440 }, + "serial": { "p50": 520, "p90": 620, "p99": 760 }, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.32, + "recvTokensMax": 2779, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-mi355x-decode", + "identity": "mi355x|mori|decode", + "stitchKey": "mi355x|mori|fixed", + "colorKey": "mi355x_fixture", + "schemaVersion": 3, + "generatedAt": "2026-06-25T08:31:09.000Z", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X · mori · bf16 · EP8 · normalized · layout + dispatch", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": true, + "traceSignature": "mi355x-trace", + "backendVersion": "mori-0227", + "run": { + "id": "28156624181", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28156624181", + "createdAt": "2026-06-25T08:17:23Z", + "sha": "mi355xsha" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { "p50": 41, "p90": 45, "p99": 63 }, + "combine": { "p50": 18, "p90": 20, "p99": 24 }, + "serial": { "p50": 59, "p90": 65, "p99": 87 }, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { "p50": 70, "p90": 80, "p99": 95 }, + "combine": { "p50": 45, "p90": 52, "p99": 70 }, + "serial": { "p50": 115, "p90": 132, "p99": 165 }, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.31, + "recvTokensMax": 367, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-mi355x-prefill", + "identity": "mi355x|mori|prefill", + "stitchKey": "mi355x|mori|fixed", + "colorKey": "mi355x_fixture", + "schemaVersion": 3, + "generatedAt": "2026-06-24T01:59:40.000Z", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X · mori · bf16 · EP8 · normalized · layout + dispatch", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": true, + "traceSignature": "mi355x-trace", + "backendVersion": "mori-0227", + "run": { + "id": "28069889124", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069889124", + "createdAt": "2026-06-24T01:57:55Z", + "sha": "mi355xprefillsha" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { "p50": 95, "p90": 110, "p99": 135 }, + "combine": { "p50": 75, "p90": 90, "p99": 115 }, + "serial": { "p50": 170, "p90": 200, "p99": 250 }, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.29, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { "p50": 180, "p90": 210, "p99": 260 }, + "combine": { "p50": 230, "p90": 275, "p99": 340 }, + "serial": { "p50": 410, "p90": 485, "p99": 600 }, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.32, + "recvTokensMax": 2779, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + } + ] +} diff --git a/packages/app/package.json b/packages/app/package.json index d1a41ae9..b743a80b 100644 --- a/packages/app/package.json +++ b/packages/app/package.json @@ -26,7 +26,8 @@ "clean:all": "rimraf .next out cypress/videos cypress/screenshots coverage", "cache:invalidate": "dotenv -e ../../.env -- tsx scripts/invalidate-cache.ts", "cache:warmup": "dotenv -e ../../.env -- tsx scripts/warmup-cache.ts", - "capture:fixtures": "tsx scripts/capture-cypress-fixtures.ts" + "capture:fixtures": "tsx scripts/capture-cypress-fixtures.ts", + "generate:collectivex": "tsx scripts/generate-collectivex-data.ts" }, "dependencies": { "@chenglou/pretext": "^0.0.8", diff --git a/packages/app/public/data/collectivex.json b/packages/app/public/data/collectivex.json new file mode 100644 index 00000000..9a3b491d --- /dev/null +++ b/packages/app/public/data/collectivex.json @@ -0,0 +1,3115 @@ +{ + "series": [ + { + "id": "cx-4bc828ab3634ef77", + "identity": "b200|deepep|decode|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", + "stitchKey": "b200|deepep|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", + "colorKey": "b200_418405a4", + "schemaVersion": 1, + "generatedAt": "2026-06-24T01:53:08.683564+00:00", + "status": "valid", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "deepep-normal-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 · deepep · bf16 · EP8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "unknown", + "run": { + "id": "28069683835", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069683835", + "createdAt": "2026-06-24T01:52:05Z", + "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 76.9599974155426, + "p90": 76.9599974155426, + "p99": 106.55999928712845 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 67.61600077152252, + "p99": 93.05600076913834 + }, + "serial": { + "p50": 126.46399438381195, + "p90": 126.46399438381195, + "p99": 156.6080003976822 + }, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 8, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.78400319814682, + "p90": 74.78400319814682, + "p99": 104.86400127410889 + }, + "combine": { + "p50": 76.4480009675026, + "p90": 76.4480009675026, + "p99": 87.00799942016602 + }, + "serial": { + "p50": 127.26399302482605, + "p90": 127.26399302482605, + "p99": 155.2640050649643 + }, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 15, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.32799988985062, + "p90": 75.32799988985062, + "p99": 102.08000242710114 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 77.05599814653397, + "p99": 100.25600343942642 + }, + "serial": { + "p50": 139.0720009803772, + "p90": 139.0720009803772, + "p99": 158.11200439929962 + }, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 26, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.78400319814682, + "p90": 74.78400319814682, + "p99": 99.35999661684036 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 77.88799703121185, + "p99": 94.52799707651138 + }, + "serial": { + "p50": 136.19199395179749, + "p90": 136.19199395179749, + "p99": 169.76000368595123 + }, + "dispatchLogicalBytes": 673792, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 47, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.73600316047668, + "p90": 76.73600316047668, + "p99": 108.0000028014183 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 78.52800190448761, + "p99": 96.47999703884125 + }, + "serial": { + "p50": 136.25599443912506, + "p90": 136.25599443912506, + "p99": 172.5119948387146 + }, + "dispatchLogicalBytes": 1333248, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 93, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.28800040483475, + "p90": 80.28800040483475, + "p99": 109.0880036354065 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 79.48800176382065, + "p99": 99.2640033364296 + }, + "serial": { + "p50": 143.8719928264618, + "p90": 143.8719928264618, + "p99": 174.112007021904 + }, + "dispatchLogicalBytes": 2680832, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 187, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.45600283145905, + "p90": 87.45600283145905, + "p99": 112.0000034570694 + }, + "combine": { + "p50": 101.18400305509567, + "p90": 101.18400305509567, + "p99": 126.20800733566284 + }, + "serial": { + "p50": 168.89600455760956, + "p90": 168.89600455760956, + "p99": 189.98399376869202 + }, + "dispatchLogicalBytes": 5089280, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 355, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.34399747848511, + "p90": 105.34399747848511, + "p99": 125.5359947681427 + }, + "combine": { + "p50": 115.23199826478958, + "p90": 115.23199826478958, + "p99": 150.43200552463531 + }, + "serial": { + "p50": 198.43199849128723, + "p90": 198.43199849128723, + "p99": 219.200000166893 + }, + "dispatchLogicalBytes": 9906176, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 691, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-ccc512683c0a4e2e", + "identity": "b200|deepep|prefill|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", + "stitchKey": "b200|deepep|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", + "colorKey": "b200_418405a4", + "schemaVersion": 1, + "generatedAt": "2026-06-24T01:53:06.799084+00:00", + "status": "valid", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "deepep-normal-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 · deepep · bf16 · EP8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "unknown", + "run": { + "id": "28069683835", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069683835", + "createdAt": "2026-06-24T01:52:05Z", + "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.61600053310394, + "p90": 111.61600053310394, + "p99": 154.59200739860535 + }, + "combine": { + "p50": 118.14399808645248, + "p90": 118.14399808645248, + "p99": 136.1600011587143 + }, + "serial": { + "p50": 208.12800526618958, + "p90": 208.12800526618958, + "p99": 244.7039932012558 + }, + "dispatchLogicalBytes": 9977856, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 696, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 140.70400595664978, + "p90": 140.70400595664978, + "p99": 164.76799547672272 + }, + "combine": { + "p50": 152.73599326610565, + "p90": 152.73599326610565, + "p99": 173.24799299240112 + }, + "serial": { + "p50": 270.01601457595825, + "p90": 270.01601457595825, + "p99": 344.5119857788086 + }, + "dispatchLogicalBytes": 19841024, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 1384, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.59999895095825, + "p90": 193.59999895095825, + "p99": 220.96000611782074 + }, + "combine": { + "p50": 248.22400510311127, + "p90": 248.22400510311127, + "p99": 265.50400257110596 + }, + "serial": { + "p50": 419.5519983768463, + "p90": 419.5519983768463, + "p99": 437.824010848999 + }, + "dispatchLogicalBytes": 39380992, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 2747, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 298.335999250412, + "p90": 298.335999250412, + "p99": 331.9680094718933 + }, + "combine": { + "p50": 413.34399580955505, + "p90": 413.34399580955505, + "p99": 439.9360120296478 + }, + "serial": { + "p50": 692.7679777145386, + "p90": 692.7679777145386, + "p99": 731.3920259475708 + }, + "dispatchLogicalBytes": 79077376, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 5516, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 507.04002380371094, + "p90": 507.04002380371094, + "p99": 531.1040282249451 + }, + "combine": { + "p50": 708.1279754638672, + "p90": 708.1279754638672, + "p99": 725.055992603302 + }, + "serial": { + "p50": 1193.6960220336914, + "p90": 1193.6960220336914, + "p99": 1213.1199836730957 + }, + "dispatchLogicalBytes": 156864512, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 10942, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 925.5679845809937, + "p90": 925.5679845809937, + "p99": 948.8000273704529 + }, + "combine": { + "p50": 1307.1680068969727, + "p90": 1307.1680068969727, + "p99": 1337.0239734649658 + }, + "serial": { + "p50": 2205.2481174468994, + "p90": 2205.2481174468994, + "p99": 2238.879919052124 + }, + "dispatchLogicalBytes": 312395776, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 21791, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-cd8f8fb6c8b34ff2", + "identity": "b300|deepep|decode|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", + "stitchKey": "b300|deepep|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", + "colorKey": "b300_b219a378", + "schemaVersion": 2, + "generatedAt": "2026-06-24T23:33:09.035182+00:00", + "status": "valid", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 · deepep · bf16 · EP8 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "1.2.1", + "run": { + "id": "28135639401", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135639401", + "createdAt": "2026-06-24T23:12:52Z", + "sha": "4e217f93fda64a43d32a46f1e57325ff848148d8" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.992001831531525, + "p90": 56.992001831531525, + "p99": 81.24800026416779 + }, + "combine": { + "p50": 66.75200164318085, + "p90": 66.75200164318085, + "p99": 73.02399724721909 + }, + "serial": { + "p50": 123.74400347471237, + "p90": 123.74400347471237, + "p99": 154.27199751138687 + }, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.60799890756607, + "p90": 56.60799890756607, + "p99": 64.96000289916992 + }, + "combine": { + "p50": 66.97600334882736, + "p90": 66.97600334882736, + "p99": 90.2400016784668 + }, + "serial": { + "p50": 123.58400225639343, + "p90": 123.58400225639343, + "p99": 155.20000457763672 + }, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.37600103020668, + "p90": 57.37600103020668, + "p99": 78.27199995517731 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 67.1359971165657, + "p99": 79.3600007891655 + }, + "serial": { + "p50": 124.51199814677238, + "p90": 124.51199814677238, + "p99": 157.6320007443428 + }, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.61599946022034, + "p90": 59.61599946022034, + "p99": 90.36800265312195 + }, + "combine": { + "p50": 69.43999975919724, + "p90": 69.43999975919724, + "p99": 79.55200225114822 + }, + "serial": { + "p50": 129.05599921941757, + "p90": 129.05599921941757, + "p99": 169.92000490427017 + }, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.26399743556976, + "p90": 63.26399743556976, + "p99": 78.23999971151352 + }, + "combine": { + "p50": 69.43999975919724, + "p90": 69.43999975919724, + "p99": 79.83999699354172 + }, + "serial": { + "p50": 132.703997194767, + "p90": 132.703997194767, + "p99": 158.07999670505524 + }, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.89600306749344, + "p90": 68.89600306749344, + "p99": 88.22400122880936 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 78.62400263547897, + "p99": 86.2400010228157 + }, + "serial": { + "p50": 147.5200057029724, + "p90": 147.5200057029724, + "p99": 174.46400225162506 + }, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.7279971241951, + "p90": 85.7279971241951, + "p99": 98.01600128412247 + }, + "combine": { + "p50": 91.36000275611877, + "p90": 91.36000275611877, + "p99": 102.01600193977356 + }, + "serial": { + "p50": 177.08799988031387, + "p90": 177.08799988031387, + "p99": 200.03200322389603 + }, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.40800344944, + "p90": 93.40800344944, + "p99": 103.45599800348282 + }, + "combine": { + "p50": 115.03999680280685, + "p90": 115.03999680280685, + "p99": 126.91199779510498 + }, + "serial": { + "p50": 208.44800025224686, + "p90": 208.44800025224686, + "p99": 230.3679957985878 + }, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-43f1fda706ca7bdd", + "identity": "b300|deepep|prefill|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", + "stitchKey": "b300|deepep|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", + "colorKey": "b300_b219a378", + "schemaVersion": 2, + "generatedAt": "2026-06-24T23:33:10.949608+00:00", + "status": "valid", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 · deepep · bf16 · EP8 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "1.2.1", + "run": { + "id": "28135639401", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135639401", + "createdAt": "2026-06-24T23:12:52Z", + "sha": "4e217f93fda64a43d32a46f1e57325ff848148d8" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.7519987821579, + "p90": 94.7519987821579, + "p99": 267.13600754737854 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 115.52000045776367, + "p99": 118.81600320339203 + }, + "serial": { + "p50": 210.27199923992157, + "p90": 210.27199923992157, + "p99": 385.95201075077057 + }, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.3839954137802, + "p90": 136.3839954137802, + "p99": 144.54400539398193 + }, + "combine": { + "p50": 153.60000729560852, + "p90": 153.60000729560852, + "p99": 165.47200083732605 + }, + "serial": { + "p50": 289.98400270938873, + "p90": 289.98400270938873, + "p99": 310.016006231308 + }, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.59199905395508, + "p90": 194.59199905395508, + "p99": 215.7440036535263 + }, + "combine": { + "p50": 273.50398898124695, + "p90": 273.50398898124695, + "p99": 280.19198775291443 + }, + "serial": { + "p50": 468.095988035202, + "p90": 468.095988035202, + "p99": 495.93599140644073 + }, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 325.3439962863922, + "p90": 325.3439962863922, + "p99": 330.7519853115082 + }, + "combine": { + "p50": 459.1679871082306, + "p90": 459.1679871082306, + "p99": 482.08001255989075 + }, + "serial": { + "p50": 784.5119833946228, + "p90": 784.5119833946228, + "p99": 812.8319978713989 + }, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 575.2320289611816, + "p90": 575.2320289611816, + "p99": 587.8400206565857 + }, + "combine": { + "p50": 817.5039887428284, + "p90": 817.5039887428284, + "p99": 831.8079710006714 + }, + "serial": { + "p50": 1392.73601770401, + "p90": 1392.73601770401, + "p99": 1419.647991657257 + }, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1068.3200359344482, + "p90": 1068.3200359344482, + "p99": 1088.1279706954956 + }, + "combine": { + "p50": 1529.312014579773, + "p90": 1529.312014579773, + "p99": 1618.3359622955322 + }, + "serial": { + "p50": 2597.632050514221, + "p90": 2597.632050514221, + "p99": 2706.463932991028 + }, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-a26390df5f1e4196", + "identity": "gb200|deepep|decode|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", + "stitchKey": "gb200|deepep|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", + "colorKey": "gb200_21efa99d", + "schemaVersion": 1, + "generatedAt": "2026-06-24T03:45:16.336112+00:00", + "status": "valid", + "sku": "gb200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "deepep-normal-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB200 · deepep · bf16 · EP4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "unknown", + "run": { + "id": "28069684997", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069684997", + "createdAt": "2026-06-24T01:52:08Z", + "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 99.42399710416794, + "p90": 99.42399710416794, + "p99": 147.32800424098969 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 72.73600250482559, + "p99": 18259.39178466797 + }, + "serial": { + "p50": 148.83199334144592, + "p90": 148.83199334144592, + "p99": 168.19199919700623 + }, + "dispatchLogicalBytes": 57344, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 4, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 101.50399804115295, + "p90": 101.50399804115295, + "p99": 123.99999797344208 + }, + "combine": { + "p50": 79.68000322580338, + "p90": 79.68000322580338, + "p99": 6393.280029296875 + }, + "serial": { + "p50": 149.63200688362122, + "p90": 149.63200688362122, + "p99": 176.41599476337433 + }, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 8, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 98.78399968147278, + "p90": 98.78399968147278, + "p99": 118.56000125408173 + }, + "combine": { + "p50": 76.28799974918365, + "p90": 76.28799974918365, + "p99": 92.12800115346909 + }, + "serial": { + "p50": 152.38399803638458, + "p90": 152.38399803638458, + "p99": 24719.839096069336 + }, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 16, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 105.40799796581268, + "p90": 105.40799796581268, + "p99": 265.4080092906952 + }, + "combine": { + "p50": 82.68799632787704, + "p90": 82.68799632787704, + "p99": 95.551997423172 + }, + "serial": { + "p50": 163.68000209331512, + "p90": 163.68000209331512, + "p99": 15888.832092285156 + }, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 29, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 103.74400019645691, + "p90": 103.74400019645691, + "p99": 131.8719983100891 + }, + "combine": { + "p50": 83.26400071382523, + "p90": 83.26400071382523, + "p99": 4964.223861694336 + }, + "serial": { + "p50": 157.05600380897522, + "p90": 157.05600380897522, + "p99": 193.50400567054749 + }, + "dispatchLogicalBytes": 874496, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 61, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 103.5199984908104, + "p90": 103.5199984908104, + "p99": 142.84799993038177 + }, + "combine": { + "p50": 83.20000022649765, + "p90": 83.20000022649765, + "p99": 108.2879975438118 + }, + "serial": { + "p50": 163.26400637626648, + "p90": 163.26400637626648, + "p99": 4763.391971588135 + }, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 118, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 108.19199681282043, + "p90": 108.19199681282043, + "p99": 15079.903602600098 + }, + "combine": { + "p50": 83.74399691820145, + "p90": 83.74399691820145, + "p99": 99.71199929714203 + }, + "serial": { + "p50": 167.84000396728516, + "p90": 167.84000396728516, + "p99": 193.1840032339096 + }, + "dispatchLogicalBytes": 3411968, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 238, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 121.44000083208084, + "p90": 121.44000083208084, + "p99": 16422.271728515625 + }, + "combine": { + "p50": 100.832000374794, + "p90": 100.832000374794, + "p99": 150.68799257278442 + }, + "serial": { + "p50": 200.8959949016571, + "p90": 200.8959949016571, + "p99": 270.9119915962219 + }, + "dispatchLogicalBytes": 6852608, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 478, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-6bf6c38771ee141b", + "identity": "gb200|deepep|prefill|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", + "stitchKey": "gb200|deepep|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", + "colorKey": "gb200_21efa99d", + "schemaVersion": 1, + "generatedAt": "2026-06-24T04:09:18.525148+00:00", + "status": "valid", + "sku": "gb200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "deepep-normal-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB200 · deepep · bf16 · EP4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "unknown", + "run": { + "id": "28069684997", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069684997", + "createdAt": "2026-06-24T01:52:08Z", + "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 111.16799712181091, + "p90": 111.16799712181091, + "p99": 135.74400544166565 + }, + "combine": { + "p50": 93.98400038480759, + "p90": 93.98400038480759, + "p99": 105.6319996714592 + }, + "serial": { + "p50": 177.59999632835388, + "p90": 177.59999632835388, + "p99": 198.59200716018677 + }, + "dispatchLogicalBytes": 6780928, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 473, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 137.7599984407425, + "p90": 137.7599984407425, + "p99": 164.35199975967407 + }, + "combine": { + "p50": 114.56000059843063, + "p90": 114.56000059843063, + "p99": 123.4240010380745 + }, + "serial": { + "p50": 226.8799990415573, + "p90": 226.8799990415573, + "p99": 237.7920001745224 + }, + "dispatchLogicalBytes": 13389824, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 934, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 184.38400328159332, + "p90": 184.38400328159332, + "p99": 206.62400126457214 + }, + "combine": { + "p50": 152.8320014476776, + "p90": 152.8320014476776, + "p99": 162.91199624538422 + }, + "serial": { + "p50": 311.7760121822357, + "p90": 311.7760121822357, + "p99": 325.6640136241913 + }, + "dispatchLogicalBytes": 26736640, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 1865, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 282.8480005264282, + "p90": 282.8480005264282, + "p99": 300.0960052013397 + }, + "combine": { + "p50": 272.2879946231842, + "p90": 272.2879946231842, + "p99": 288.4159982204437 + }, + "serial": { + "p50": 528.7359952926636, + "p90": 528.7359952926636, + "p99": 545.7599759101868 + }, + "dispatchLogicalBytes": 53358592, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 3722, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 476.063996553421, + "p90": 476.063996553421, + "p99": 494.52799558639526 + }, + "combine": { + "p50": 453.72799038887024, + "p90": 453.72799038887024, + "p99": 476.25601291656494 + }, + "serial": { + "p50": 910.3040099143982, + "p90": 910.3040099143982, + "p99": 965.9519791603088 + }, + "dispatchLogicalBytes": 106373120, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 7420, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 866.4960265159607, + "p90": 866.4960265159607, + "p99": 879.7439932823181 + }, + "combine": { + "p50": 834.6880078315735, + "p90": 834.6880078315735, + "p99": 848.6080169677734 + }, + "serial": { + "p50": 1678.3039569854736, + "p90": 1678.3039569854736, + "p99": 1692.031979560852 + }, + "dispatchLogicalBytes": 212774912, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 14842, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-b9f1a317815a742c", + "identity": "h100|deepep|decode|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", + "stitchKey": "h100|deepep|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", + "colorKey": "h100_a6184024", + "schemaVersion": 2, + "generatedAt": "2026-06-24T22:50:09.306878+00:00", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 · deepep · bf16 · EP8 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "1.2.1", + "run": { + "id": "28134642131", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", + "createdAt": "2026-06-24T22:49:12Z", + "sha": "9f85d054303e23b24e720ca6cb472b6a8eba3754" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 100.16000270843506, + "p90": 100.16000270843506, + "p99": 121.37600034475327 + }, + "combine": { + "p50": 79.96799796819687, + "p90": 79.96799796819687, + "p99": 88.41600269079208 + }, + "serial": { + "p50": 180.12800067663193, + "p90": 180.12800067663193, + "p99": 209.79200303554535 + }, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.01600128412247, + "p90": 98.01600128412247, + "p99": 121.8239963054657 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 79.8719972372055, + "p99": 96.57599776983261 + }, + "serial": { + "p50": 177.88799852132797, + "p90": 177.88799852132797, + "p99": 218.3999940752983 + }, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.2640033364296, + "p90": 99.2640033364296, + "p99": 119.58400160074234 + }, + "combine": { + "p50": 80.22399991750717, + "p90": 80.22399991750717, + "p99": 90.01599997282028 + }, + "serial": { + "p50": 179.48800325393677, + "p90": 179.48800325393677, + "p99": 209.60000157356262 + }, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.24000298976898, + "p90": 98.24000298976898, + "p99": 115.99999666213989 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 80.70400357246399, + "p99": 88.67199718952179 + }, + "serial": { + "p50": 178.94400656223297, + "p90": 178.94400656223297, + "p99": 204.67199385166168 + }, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.59199821949005, + "p90": 98.59199821949005, + "p99": 147.5200057029724 + }, + "combine": { + "p50": 87.74399757385254, + "p90": 87.74399757385254, + "p99": 89.37600255012512 + }, + "serial": { + "p50": 186.3359957933426, + "p90": 186.3359957933426, + "p99": 236.89600825309753 + }, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.29600358009338, + "p90": 99.29600358009338, + "p99": 127.36000120639801 + }, + "combine": { + "p50": 88.48000317811966, + "p90": 88.48000317811966, + "p99": 97.08800166845322 + }, + "serial": { + "p50": 187.77600675821304, + "p90": 187.77600675821304, + "p99": 224.44800287485123 + }, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.73600161075592, + "p90": 112.73600161075592, + "p99": 122.27199971675873 + }, + "combine": { + "p50": 98.30400347709656, + "p90": 98.30400347709656, + "p99": 106.27199709415436 + }, + "serial": { + "p50": 211.04000508785248, + "p90": 211.04000508785248, + "p99": 228.5439968109131 + }, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.50399518013, + "p90": 129.50399518013, + "p99": 141.85599982738495 + }, + "combine": { + "p50": 119.64800208806992, + "p90": 119.64800208806992, + "p99": 123.9359974861145 + }, + "serial": { + "p50": 249.15199726819992, + "p90": 249.15199726819992, + "p99": 265.79199731349945 + }, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-6de3ad32139f9200", + "identity": "h100|deepep|decode|ll|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", + "stitchKey": "h100|deepep|ll|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", + "colorKey": "h100_9d00efc8", + "schemaVersion": 2, + "generatedAt": "2026-06-24T23:09:09.861657+00:00", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 · deepep · fp8 · EP8 · LL · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "fp8" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "1.2.1", + "run": { + "id": "28135444762", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135444762", + "createdAt": "2026-06-24T23:08:05Z", + "sha": "e71ef3c2a0465a357771c14935dd0807dc1da165" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 52.06400156021118, + "p90": 52.06400156021118, + "p99": 65.8240020275116 + }, + "combine": { + "p50": 49.8879998922348, + "p90": 49.8879998922348, + "p99": 74.68800246715546 + }, + "serial": { + "p50": 101.95200145244598, + "p90": 101.95200145244598, + "p99": 140.51200449466705 + }, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 917504, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 52.352000027894974, + "p90": 52.352000027894974, + "p99": 61.47199869155884 + }, + "combine": { + "p50": 37.151999771595, + "p90": 37.151999771595, + "p99": 55.03999814391136 + }, + "serial": { + "p50": 89.50399979948997, + "p90": 89.50399979948997, + "p99": 116.5119968354702 + }, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 1835008, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 38.11199963092804, + "p90": 38.11199963092804, + "p99": 45.43999955058098 + }, + "combine": { + "p50": 35.679999738931656, + "p90": 35.679999738931656, + "p99": 141.50400459766388 + }, + "serial": { + "p50": 73.7919993698597, + "p90": 73.7919993698597, + "p99": 186.94400414824486 + }, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 3670016, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.94399917125702, + "p90": 42.94399917125702, + "p99": 46.52800038456917 + }, + "combine": { + "p50": 36.99199855327606, + "p90": 36.99199855327606, + "p99": 45.3759990632534 + }, + "serial": { + "p50": 79.93599772453308, + "p90": 79.93599772453308, + "p99": 91.90399944782257 + }, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 7340032, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 43.42399910092354, + "p90": 43.42399910092354, + "p99": 59.20000001788139 + }, + "combine": { + "p50": 39.48799893260002, + "p90": 39.48799893260002, + "p99": 49.375999718904495 + }, + "serial": { + "p50": 82.91199803352356, + "p90": 82.91199803352356, + "p99": 108.57599973678589 + }, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 14680064, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 43.935999274253845, + "p90": 43.935999274253845, + "p99": 51.93600058555603 + }, + "combine": { + "p50": 45.27999833226204, + "p90": 45.27999833226204, + "p99": 63.58399987220764 + }, + "serial": { + "p50": 89.21599760651588, + "p90": 89.21599760651588, + "p99": 115.52000045776367 + }, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 29360128, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 51.19999870657921, + "p90": 51.19999870657921, + "p99": 54.59199845790863 + }, + "combine": { + "p50": 57.5999990105629, + "p90": 57.5999990105629, + "p99": 63.968002796173096 + }, + "serial": { + "p50": 108.7999977171421, + "p90": 108.7999977171421, + "p99": 118.56000125408173 + }, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 58720256, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 61.344001442193985, + "p90": 61.344001442193985, + "p99": 68.60800087451935 + }, + "combine": { + "p50": 84.3840017914772, + "p90": 84.3840017914772, + "p99": 88.92799913883209 + }, + "serial": { + "p50": 145.7280032336712, + "p90": 145.7280032336712, + "p99": 157.53600001335144 + }, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 117440512, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-e75ae4f0ebd83f3c", + "identity": "h100|deepep|prefill|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", + "stitchKey": "h100|deepep|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", + "colorKey": "h100_a6184024", + "schemaVersion": 2, + "generatedAt": "2026-06-24T22:50:10.363193+00:00", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 · deepep · bf16 · EP8 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "1.2.1", + "run": { + "id": "28134642131", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", + "createdAt": "2026-06-24T22:49:12Z", + "sha": "9f85d054303e23b24e720ca6cb472b6a8eba3754" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.76800513267517, + "p90": 132.76800513267517, + "p99": 144.67200636863708 + }, + "combine": { + "p50": 113.0559965968132, + "p90": 113.0559965968132, + "p99": 124.51200187206268 + }, + "serial": { + "p50": 245.82400172948837, + "p90": 245.82400172948837, + "p99": 269.18400824069977 + }, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.02400422096252, + "p90": 161.02400422096252, + "p99": 182.20800161361694 + }, + "combine": { + "p50": 160.5760008096695, + "p90": 160.5760008096695, + "p99": 170.0800061225891 + }, + "serial": { + "p50": 321.600005030632, + "p90": 321.600005030632, + "p99": 352.28800773620605 + }, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 213.34399282932281, + "p90": 213.34399282932281, + "p99": 228.09599339962006 + }, + "combine": { + "p50": 236.64000630378723, + "p90": 236.64000630378723, + "p99": 244.57600712776184 + }, + "serial": { + "p50": 449.98399913311005, + "p90": 449.98399913311005, + "p99": 472.6720005273819 + }, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 320.8959996700287, + "p90": 320.8959996700287, + "p99": 335.58401465415955 + }, + "combine": { + "p50": 368.3199882507324, + "p90": 368.3199882507324, + "p99": 377.21601128578186 + }, + "serial": { + "p50": 689.2159879207611, + "p90": 689.2159879207611, + "p99": 712.8000259399414 + }, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 519.2639827728271, + "p90": 519.2639827728271, + "p99": 541.1520004272461 + }, + "combine": { + "p50": 632.9600214958191, + "p90": 632.9600214958191, + "p99": 643.8400149345398 + }, + "serial": { + "p50": 1152.2240042686462, + "p90": 1152.2240042686462, + "p99": 1184.992015361786 + }, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1002.560019493103, + "p90": 1002.560019493103, + "p99": 1047.1359491348267 + }, + "combine": { + "p50": 1161.5040302276611, + "p90": 1161.5040302276611, + "p99": 1181.3440322875977 + }, + "serial": { + "p50": 2164.064049720764, + "p90": 2164.064049720764, + "p99": 2228.4799814224243 + }, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-18e4deb42cbe9c55", + "identity": "h100|deepep|prefill|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", + "stitchKey": "h100|deepep|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", + "colorKey": "h100_426025cb", + "schemaVersion": 2, + "generatedAt": "2026-06-24T23:10:20.402829+00:00", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 · deepep · fp8 · EP8 · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "fp8" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "1.2.1", + "run": { + "id": "28135446264", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135446264", + "createdAt": "2026-06-24T23:08:07Z", + "sha": "e71ef3c2a0465a357771c14935dd0807dc1da165" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.58400028944016, + "p90": 111.58400028944016, + "p99": 128.7039965391159 + }, + "combine": { + "p50": 104.5759990811348, + "p90": 104.5759990811348, + "p99": 111.07199639081955 + }, + "serial": { + "p50": 216.15999937057495, + "p90": 216.15999937057495, + "p99": 239.77599292993546 + }, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 127.48800218105316, + "p90": 127.48800218105316, + "p99": 147.35999703407288 + }, + "combine": { + "p50": 148.95999431610107, + "p90": 148.95999431610107, + "p99": 158.01599621772766 + }, + "serial": { + "p50": 276.44799649715424, + "p90": 276.44799649715424, + "p99": 305.37599325180054 + }, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 167.55199432373047, + "p90": 167.55199432373047, + "p99": 179.45599555969238 + }, + "combine": { + "p50": 231.00799322128296, + "p90": 231.00799322128296, + "p99": 240.60800671577454 + }, + "serial": { + "p50": 398.5599875450134, + "p90": 398.5599875450134, + "p99": 420.0640022754669 + }, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 238.75199258327484, + "p90": 238.75199258327484, + "p99": 276.95998549461365 + }, + "combine": { + "p50": 366.8479919433594, + "p90": 366.8479919433594, + "p99": 377.1840035915375 + }, + "serial": { + "p50": 605.5999845266342, + "p90": 605.5999845266342, + "p99": 654.1439890861511 + }, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 371.9039857387543, + "p90": 371.9039857387543, + "p99": 398.81598949432373 + }, + "combine": { + "p50": 628.607988357544, + "p90": 628.607988357544, + "p99": 644.5119976997375 + }, + "serial": { + "p50": 1000.5119740962982, + "p90": 1000.5119740962982, + "p99": 1043.3279871940613 + }, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 652.6399850845337, + "p90": 652.6399850845337, + "p99": 670.304000377655 + }, + "combine": { + "p50": 1157.3439836502075, + "p90": 1157.3439836502075, + "p99": 1173.0560064315796 + }, + "serial": { + "p50": 1809.9839687347412, + "p90": 1809.9839687347412, + "p99": 1843.3600068092346 + }, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-e49b5d3ba04f7fb1", + "identity": "mi355x|mori|decode|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", + "stitchKey": "mi355x|mori|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", + "colorKey": "mi355x_f3b49abd", + "schemaVersion": 1, + "generatedAt": "2026-06-24T01:58:52.129674+00:00", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "mori-normal-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X · mori · bf16 · EP8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "unknown", + "run": { + "id": "28069889124", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069889124", + "createdAt": "2026-06-24T01:57:55Z", + "sha": "e2717a341cf1514d4be6393db16121889db7bf19" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.160998702049255, + "p90": 40.160998702049255, + "p99": 126.64000689983368 + }, + "combine": { + "p50": 17.160000279545784, + "p90": 17.160000279545784, + "p99": 22.840000689029694 + }, + "serial": { + "p50": 54.91999909281731, + "p90": 54.91999909281731, + "p99": 78.47999781370163 + }, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 8, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.281001806259155, + "p90": 42.281001806259155, + "p99": 74.60100203752518 + }, + "combine": { + "p50": 17.35999993979931, + "p90": 17.35999993979931, + "p99": 22.80000038444996 + }, + "serial": { + "p50": 58.52099880576134, + "p90": 58.52099880576134, + "p99": 77.4800032377243 + }, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 14, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.48100146651268, + "p90": 42.48100146651268, + "p99": 62.36099824309349 + }, + "combine": { + "p50": 19.801000133156776, + "p90": 19.801000133156776, + "p99": 29.839999973773956 + }, + "serial": { + "p50": 61.921000480651855, + "p90": 61.921000480651855, + "p99": 77.7209997177124 + }, + "dispatchLogicalBytes": 358400, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 25, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.27999970316887, + "p90": 42.27999970316887, + "p99": 76.68100297451019 + }, + "combine": { + "p50": 21.43999934196472, + "p90": 21.43999934196472, + "p99": 28.960000723600388 + }, + "serial": { + "p50": 61.88099831342697, + "p90": 61.88099831342697, + "p99": 90.40100127458572 + }, + "dispatchLogicalBytes": 673792, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 47, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.281001806259155, + "p90": 42.281001806259155, + "p99": 92.56000071763992 + }, + "combine": { + "p50": 25.200000032782555, + "p90": 25.200000032782555, + "p99": 30.280999839305878 + }, + "serial": { + "p50": 66.96099787950516, + "p90": 66.96099787950516, + "p99": 90.96100181341171 + }, + "dispatchLogicalBytes": 1304576, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 91, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 42.59999841451645, + "p90": 42.59999841451645, + "p99": 66.08100235462189 + }, + "combine": { + "p50": 32.760001718997955, + "p90": 32.760001718997955, + "p99": 37.79999911785126 + }, + "serial": { + "p50": 75.48099756240845, + "p90": 75.48099756240845, + "p99": 99.60100054740906 + }, + "dispatchLogicalBytes": 2566144, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 179, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 45.40000110864639, + "p90": 45.40000110864639, + "p99": 123.52100014686584 + }, + "combine": { + "p50": 41.00099951028824, + "p90": 41.00099951028824, + "p99": 45.841000974178314 + }, + "serial": { + "p50": 84.88000184297562, + "p90": 84.88000184297562, + "p99": 110.96099764108658 + }, + "dispatchLogicalBytes": 5060608, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 353, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 59.67999994754791, + "p90": 59.67999994754791, + "p99": 113.52100223302841 + }, + "combine": { + "p50": 52.68099904060364, + "p90": 52.68099904060364, + "p99": 61.68099865317345 + }, + "serial": { + "p50": 111.40099912881851, + "p90": 111.40099912881851, + "p99": 135.76200604438782 + }, + "dispatchLogicalBytes": 10106880, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 705, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-f9fac8841a429302", + "identity": "mi355x|mori|decode|normal|normalized|standardized|comm-only-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", + "stitchKey": "mi355x|mori|normal|normalized|standardized|comm-only-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", + "colorKey": "mi355x_d20dd52c", + "schemaVersion": 2, + "generatedAt": "2026-06-24T23:43:48.833883+00:00", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "comm-only-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X · mori · bf16 · EP8 · normalized · comm only", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "run": { + "id": "28136838021", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28136838021", + "createdAt": "2026-06-24T23:42:57Z", + "sha": "bbe05780a0a0a73656024f4f9eb566db593b6d18" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.481001138687134, + "p90": 40.481001138687134, + "p99": 65.16099721193314 + }, + "combine": { + "p50": 15.440000221133232, + "p90": 15.440000221133232, + "p99": 25.60099959373474 + }, + "serial": { + "p50": 55.921001359820366, + "p90": 55.921001359820366, + "p99": 90.76199680566788 + }, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.04000160098076, + "p90": 42.04000160098076, + "p99": 50.9210005402565 + }, + "combine": { + "p50": 16.039999201893806, + "p90": 16.039999201893806, + "p99": 21.239999681711197 + }, + "serial": { + "p50": 58.080000802874565, + "p90": 58.080000802874565, + "p99": 72.1610002219677 + }, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.79999977350235, + "p90": 41.79999977350235, + "p99": 50.20099878311157 + }, + "combine": { + "p50": 19.64000053703785, + "p90": 19.64000053703785, + "p99": 24.080000817775726 + }, + "serial": { + "p50": 61.4400003105402, + "p90": 61.4400003105402, + "p99": 74.2809996008873 + }, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.44000092148781, + "p90": 42.44000092148781, + "p99": 58.320000767707825 + }, + "combine": { + "p50": 19.76099982857704, + "p90": 19.76099982857704, + "p99": 24.879999458789825 + }, + "serial": { + "p50": 62.20100075006485, + "p90": 62.20100075006485, + "p99": 83.20000022649765 + }, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.67999902367592, + "p90": 42.67999902367592, + "p99": 49.320999532938 + }, + "combine": { + "p50": 25.880999863147736, + "p90": 25.880999863147736, + "p99": 30.479999259114265 + }, + "serial": { + "p50": 68.56099888682365, + "p90": 68.56099888682365, + "p99": 79.80099879205227 + }, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + }, + { + "id": "cx-dd40bab3980be4a6", + "identity": "mi355x|mori|decode|normal|normalized|standardized|layout-and-dispatch-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", + "stitchKey": "mi355x|mori|normal|normalized|standardized|layout-and-dispatch-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", + "colorKey": "mi355x_ddffacd2", + "schemaVersion": 3, + "generatedAt": "2026-06-25T09:08:45.533564+00:00", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X · mori · bf16 · EP8 · normalized · layout + dispatch", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "dispatchDtype": "bf16" + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "run": { + "id": "28157318258", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28157318258", + "createdAt": "2026-06-25T08:30:18Z", + "sha": "e97bc8b22556293fe74207c68d4d0ea1cf8c7b4c" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.39900004863739, + "p90": 43.83999854326248, + "p99": 50.599999725818634 + }, + "combine": { + "p50": 16.039999201893806, + "p90": 18.120000138878822, + "p99": 22.360000759363174 + }, + "serial": { + "p50": 56.4389992505312, + "p90": 61.959998682141304, + "p99": 72.96000048518181 + }, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.24000126123428, + "p90": 45.32000049948692, + "p99": 50.48000067472458 + }, + "combine": { + "p50": 15.839999541640282, + "p90": 18.519999459385872, + "p99": 23.000000044703484 + }, + "serial": { + "p50": 58.080000802874565, + "p90": 63.839999958872795, + "p99": 73.48000071942806 + }, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.080000042915344, + "p90": 44.95999962091446, + "p99": 52.880000323057175 + }, + "combine": { + "p50": 18.75999942421913, + "p90": 21.27999998629093, + "p99": 25.839999318122864 + }, + "serial": { + "p50": 60.839999467134476, + "p90": 66.23999960720539, + "p99": 78.71999964118004 + }, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.1999990940094, + "p90": 45.40000110864639, + "p99": 50.08000135421753 + }, + "combine": { + "p50": 20.0399998575449, + "p90": 22.1599992364645, + "p99": 26.079999282956123 + }, + "serial": { + "p50": 62.2399989515543, + "p90": 67.5600003451109, + "p99": 76.16000063717365 + }, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.27999970316887, + "p90": 45.48000171780586, + "p99": 50.1599982380867 + }, + "combine": { + "p50": 25.359999388456345, + "p90": 27.480000630021095, + "p99": 32.55999833345413 + }, + "serial": { + "p50": 67.63999909162521, + "p90": 72.96000234782696, + "p99": 82.71999657154083 + }, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ea1e13cdaf24bc7b", + "identity": "mi355x|mori|prefill|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", + "stitchKey": "mi355x|mori|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", + "colorKey": "mi355x_f3b49abd", + "schemaVersion": 1, + "generatedAt": "2026-06-24T01:59:40.183797+00:00", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "mori-normal-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X · mori · bf16 · EP8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "dispatchDtype": "bf16" + }, + "routingConsistent": null, + "traceSignature": null, + "backendVersion": "unknown", + "run": { + "id": "28069889124", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069889124", + "createdAt": "2026-06-24T01:57:55Z", + "sha": "e2717a341cf1514d4be6393db16121889db7bf19" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.24000093340874, + "p90": 40.24000093340874, + "p99": 135.68000495433807 + }, + "combine": { + "p50": 16.07999950647354, + "p90": 16.07999950647354, + "p99": 36.80000081658363 + }, + "serial": { + "p50": 55.1999993622303, + "p90": 55.1999993622303, + "p99": 74.12099838256836 + }, + "dispatchLogicalBytes": 100352, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 7, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 43.04099828004837, + "p90": 43.04099828004837, + "p99": 83.15999805927277 + }, + "combine": { + "p50": 16.24000072479248, + "p90": 16.24000072479248, + "p99": 27.35999971628189 + }, + "serial": { + "p50": 58.72099846601486, + "p90": 58.72099846601486, + "p99": 82.04100281000137 + }, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 14, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.80000180006027, + "p90": 42.80000180006027, + "p99": 59.31999906897545 + }, + "combine": { + "p50": 19.23999935388565, + "p90": 19.23999935388565, + "p99": 28.48000079393387 + }, + "serial": { + "p50": 62.07999959588051, + "p90": 62.07999959588051, + "p99": 89.6809995174408 + }, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 26, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.59999841451645, + "p90": 42.59999841451645, + "p99": 73.2010006904602 + }, + "combine": { + "p50": 21.28100022673607, + "p90": 21.28100022673607, + "p99": 29.759999364614487 + }, + "serial": { + "p50": 62.401000410318375, + "p90": 62.401000410318375, + "p99": 88.96099776029587 + }, + "dispatchLogicalBytes": 716800, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 50, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.681001126766205, + "p90": 42.681001126766205, + "p99": 91.92000329494476 + }, + "combine": { + "p50": 25.919999927282333, + "p90": 25.919999927282333, + "p99": 28.880000114440918 + }, + "serial": { + "p50": 67.35999882221222, + "p90": 67.35999882221222, + "p99": 102.9210016131401 + }, + "dispatchLogicalBytes": 1347584, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 94, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 42.92000085115433, + "p90": 42.92000085115433, + "p99": 81.60100132226944 + }, + "combine": { + "p50": 31.76100179553032, + "p90": 31.76100179553032, + "p99": 35.92099994421005 + }, + "serial": { + "p50": 74.60000365972519, + "p90": 74.60000365972519, + "p99": 113.08100074529648 + }, + "dispatchLogicalBytes": 2508800, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 175, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 45.20000144839287, + "p90": 45.20000144839287, + "p99": 244.24199759960175 + }, + "combine": { + "p50": 39.48099911212921, + "p90": 39.48099911212921, + "p99": 50.519999116659164 + }, + "serial": { + "p50": 84.84099805355072, + "p90": 84.84099805355072, + "p99": 126.40100717544556 + }, + "dispatchLogicalBytes": 5089280, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 355, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 60.08100137114525, + "p90": 60.08100137114525, + "p99": 90.04099667072296 + }, + "combine": { + "p50": 52.60099843144417, + "p90": 52.60099843144417, + "p99": 56.481000036001205 + }, + "serial": { + "p50": 111.08099669218063, + "p90": 111.08099669218063, + "p99": 145.28100192546844 + }, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 687, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 88.76100182533264, + "p90": 88.76100182533264, + "p99": 166.8809950351715 + }, + "combine": { + "p50": 100.96099972724915, + "p90": 100.96099972724915, + "p99": 105.00100255012512 + }, + "serial": { + "p50": 191.68099761009216, + "p90": 191.68099761009216, + "p99": 227.44199633598328 + }, + "dispatchLogicalBytes": 19826688, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 1383, + "correct": true, + "samplesPooled": null, + "trials": null + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 145.76099812984467, + "p90": 145.76099812984467, + "p99": 398.40400218963623 + }, + "combine": { + "p50": 110.76100170612335, + "p90": 110.76100170612335, + "p99": 115.64099788665771 + }, + "serial": { + "p50": 266.5629982948303, + "p90": 266.5629982948303, + "p99": 285.84301471710205 + }, + "dispatchLogicalBytes": 39438336, + "combineLogicalBytes": 0, + "fanoutMean": null, + "recvTokensMax": 2751, + "correct": true, + "samplesPooled": null, + "trials": null + } + ] + } + ], + "scannedRuns": 30, + "contributingRuns": 9, + "generatedAt": "2026-06-25T09:08:45.533Z" +} diff --git a/packages/app/scripts/generate-collectivex-data.ts b/packages/app/scripts/generate-collectivex-data.ts new file mode 100644 index 00000000..849e8374 --- /dev/null +++ b/packages/app/scripts/generate-collectivex-data.ts @@ -0,0 +1,38 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { resolve } from 'node:path'; + +import { generateCollectiveXSnapshot } from '../src/lib/collectivex-snapshot'; + +function argumentValue(name: string): string | undefined { + const index = process.argv.indexOf(name); + return index === -1 ? undefined : process.argv[index + 1]; +} + +async function main() { + const token = process.env.GITHUB_TOKEN || process.env.GH_TOKEN; + if (!token) throw new Error('GITHUB_TOKEN or GH_TOKEN is required'); + + const sourceRunId = + argumentValue('--source-run-id') || process.env.COLLECTIVEX_SOURCE_RUN_ID || undefined; + const snapshot = await generateCollectiveXSnapshot({ token, sourceRunId }); + const outputPath = resolve(__dirname, '..', 'public', 'data', 'collectivex.json'); + const contents = `${JSON.stringify(snapshot, null, 2)}\n`; + + await mkdir(resolve(outputPath, '..'), { recursive: true }); + const previous = await readFile(outputPath, 'utf8').catch(() => null); + if (previous === contents) { + console.log(`CollectiveX snapshot is unchanged: ${outputPath}`); + return; + } + + await writeFile(outputPath, contents, 'utf8'); + const rowCount = snapshot.series.reduce((total, series) => total + series.rows.length, 0); + console.log( + `Wrote ${snapshot.series.length} CollectiveX series (${rowCount} rows) from ${snapshot.contributingRuns} runs to ${outputPath}`, + ); +} + +main().catch((error: unknown) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/packages/app/src/app/(dashboard)/collectivex/page.tsx b/packages/app/src/app/(dashboard)/collectivex/page.tsx new file mode 100644 index 00000000..d3380bd9 --- /dev/null +++ b/packages/app/src/app/(dashboard)/collectivex/page.tsx @@ -0,0 +1,10 @@ +import type { Metadata } from 'next'; + +import CollectiveXDisplay from '@/components/collectivex/CollectiveXDisplay'; +import { tabMetadata } from '@/lib/tab-meta'; + +export const metadata: Metadata = tabMetadata('collectivex'); + +export default function CollectiveXPage() { + return ; +} diff --git a/packages/app/src/app/sitemap.ts b/packages/app/src/app/sitemap.ts index d1717aa3..57cbda8c 100644 --- a/packages/app/src/app/sitemap.ts +++ b/packages/app/src/app/sitemap.ts @@ -12,6 +12,7 @@ const TABS = [ 'reliability', 'gpu-specs', 'gpu-metrics', + 'collectivex', ] as const; export default async function sitemap(): Promise { diff --git a/packages/app/src/components/collectivex/CollectiveXChart.tsx b/packages/app/src/components/collectivex/CollectiveXChart.tsx new file mode 100644 index 00000000..2796ab94 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXChart.tsx @@ -0,0 +1,252 @@ +'use client'; + +import * as d3 from 'd3'; +import { useMemo } from 'react'; + +import { D3Chart } from '@/lib/d3-chart/D3Chart'; + +import { chartPoints } from './data'; +import type { + CollectiveXChartPoint, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXSeries, + CollectiveXScale, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +interface CollectiveXChartProps { + chartId: string; + series: CollectiveXSeries[]; + colors: Record; + operation: CollectiveXOperation; + percentile: CollectiveXPercentile; + xAxis: CollectiveXXAxis; + yAxis: CollectiveXYAxis; + xScaleType: CollectiveXScale; + yScaleType: CollectiveXScale; + compact?: boolean; + caption?: React.ReactNode; + legendElement?: React.ReactNode; + testId?: string; +} + +const OPERATION_LABELS: Record = { + dispatch: 'Dispatch', + combine: 'Combine', + serial: 'Serial (sum of isolated percentiles)', +}; + +const X_AXIS_LABELS: Record = { + 'tokens-per-rank': 'Source tokens / rank', + 'global-tokens': 'Global source tokens', +}; + +const Y_AXIS_LABELS: Record = { + latency: 'Latency (µs)', + 'tokens-per-second': 'Tokens / s', + 'payload-rate': 'Logical routed payload rate (GB/s)', +}; + +function paddedDomain(values: number[], scaleType: CollectiveXScale): [number, number] { + if (values.length === 0) return scaleType === 'log' ? [1, 10] : [0, 1]; + const min = d3.min(values) ?? 0; + const max = d3.max(values) ?? 1; + if (min === max) { + if (scaleType === 'log') return [Math.max(min / 2, Number.MIN_VALUE), max * 2]; + const padding = Math.max(Math.abs(min) * 0.1, 1); + return [min - padding, max + padding]; + } + if (scaleType === 'log') return [min / 1.08, max * 1.08]; + const padding = (max - min) * 0.06; + return [Math.max(0, min - padding), max + padding]; +} + +function formatCompact(value: number): string { + if (value >= 1e9) return `${(value / 1e9).toFixed(value < 1e10 ? 1 : 0)}G`; + if (value >= 1e6) return `${(value / 1e6).toFixed(value < 1e7 ? 1 : 0)}M`; + if (value >= 1e3) return `${(value / 1e3).toFixed(value < 1e4 ? 1 : 0)}k`; + if (value >= 10) return value.toFixed(0); + if (value >= 1) return value.toFixed(value < 3 ? 1 : 0); + return value.toFixed(2); +} + +function formatMetric(value: number, yAxis: CollectiveXYAxis): string { + if (yAxis === 'latency') return `${value.toFixed(value >= 100 ? 0 : 1)} µs`; + if (yAxis === 'tokens-per-second') return `${formatCompact(value)} tok/s`; + return `${value.toFixed(value >= 100 ? 0 : 2)} GB/s`; +} + +function escapeHtml(value: string): string { + return value + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"') + .replaceAll("'", '''); +} + +export function CollectiveXChart({ + chartId, + series, + colors, + operation, + percentile, + xAxis, + yAxis, + xScaleType, + yScaleType, + compact = false, + caption, + legendElement, + testId, +}: CollectiveXChartProps) { + const points = useMemo( + () => chartPoints(series, operation, percentile, xAxis, yAxis), + [series, operation, percentile, xAxis, yAxis], + ); + const seriesById = useMemo(() => new Map(series.map((item) => [item.id, item])), [series]); + const lines = useMemo(() => { + const result: Record = {}; + for (const point of points) { + (result[point.seriesId] ??= []).push({ x: point.x, y: point.y }); + } + for (const line of Object.values(result)) { + line.sort((a, b) => a.x - b.x); + } + return result; + }, [points]); + + const xDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.x), + xScaleType, + ), + [points, xScaleType], + ); + const yDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.y), + yScaleType, + ), + [points, yScaleType], + ); + + const noDataOverlay = + points.length === 0 ? ( +
+

No matching CollectiveX series.

+
+ ) : undefined; + + return ( + + chartId={chartId} + data={points} + height={compact ? 260 : 560} + margin={ + compact + ? { top: 16, right: 12, bottom: 48, left: 62 } + : { top: 24, right: 20, bottom: 62, left: 78 } + } + watermark={compact ? 'none' : 'logo'} + testId={testId} + grabCursor={!compact} + instructions={ + compact + ? '' + : 'Shift+Scroll to zoom · Drag to pan · Double-click to reset · Click a point to pin tooltip' + } + xScale={{ type: xScaleType, domain: xDomain, nice: true }} + yScale={{ type: yScaleType, domain: yDomain, nice: true }} + xAxis={{ + label: X_AXIS_LABELS[xAxis], + tickCount: compact ? 5 : 8, + tickFormat: (value) => formatCompact(Number(value)), + }} + yAxis={{ + label: Y_AXIS_LABELS[yAxis], + tickCount: compact ? 5 : 7, + tickFormat: (value) => formatCompact(Number(value)), + }} + layers={[ + { + type: 'line', + key: 'collectivex-lines', + lines, + config: { + getColor: (key) => colors[seriesById.get(key)?.colorKey ?? ''] ?? '#888', + getStrokeDasharray: (key) => + seriesById.get(key)?.shape.dispatchDtype === 'bf16' ? null : '6 4', + strokeWidth: compact ? 1.75 : 2.25, + curve: d3.curveMonotoneX, + }, + }, + { + type: 'point', + key: 'collectivex-points', + data: points, + config: { + getCx: () => 0, + getCy: () => 0, + getX: (point) => point.x, + getY: (point) => point.y, + getColor: (point) => colors[point.colorKey] ?? '#888', + getRadius: () => (compact ? 2.5 : 3.5), + stroke: 'var(--background)', + strokeWidth: compact ? 0.75 : 1, + keyFn: (point) => `${point.seriesId}-${point.x}`, + maxPoints: Infinity, + }, + }, + ]} + zoom={ + compact + ? undefined + : { + enabled: true, + axes: 'both', + scaleExtent: [1, 20], + resetEventName: `collectivex_zoom_reset_${chartId}`, + } + } + tooltip={{ + rulerType: 'crosshair', + attachToLayer: 1, + content: (point, isPinned) => { + const color = colors[point.colorKey] ?? '#888'; + const row = point.row; + const runLabel = point.series.run.id ? `Run ${point.series.run.id}` : 'Run unavailable'; + return `
+ ${isPinned ? '
Click elsewhere to dismiss
' : ''} +
${escapeHtml(point.seriesLabel)}
+
${escapeHtml(OPERATION_LABELS[operation])} ${percentile}: ${formatMetric(point.y, yAxis)}
+
${row.tokensPerRank} tokens/rank · ${row.globalTokens} global tokens
+
Dispatch p50/p90/p99: ${row.dispatch.p50.toFixed(1)} / ${row.dispatch.p90.toFixed(1)} / ${row.dispatch.p99.toFixed(1)} µs
+
Combine p50/p90/p99: ${row.combine.p50.toFixed(1)} / ${row.combine.p90.toFixed(1)} / ${row.combine.p99.toFixed(1)} µs
+
Fan-out: ${row.fanoutMean?.toFixed(2) ?? 'n/a'} · recv max: ${row.recvTokensMax ?? 'n/a'} · correctness: ${row.correct ? 'pass' : 'fail'}
+
${escapeHtml(point.series.suite)} · ${escapeHtml(point.series.topologyClass)}
+
${escapeHtml(runLabel)} · ${escapeHtml(point.series.measurementContract)}
+ ${row.stitchedFromDecode ? '
Decode-range point stitched into the prefill curve
' : ''} +
`; + }, + getRulerX: (point, scale) => + (scale as d3.ScaleLinear | d3.ScaleLogarithmic)(point.x), + getRulerY: (point, scale) => scale(point.y), + onHoverStart: (selection) => { + selection.attr('r', compact ? 4 : 6); + }, + onHoverEnd: (selection) => { + selection.attr('r', compact ? 2.5 : 3.5); + }, + }} + transitionDuration={200} + legendElement={legendElement} + noDataOverlay={noDataOverlay} + caption={caption} + /> + ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx new file mode 100644 index 00000000..b3d04b49 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx @@ -0,0 +1,693 @@ +'use client'; + +import { ExternalLink, Loader2, RefreshCw } from 'lucide-react'; +import { useCallback, useEffect, useMemo, useState } from 'react'; + +import { track } from '@/lib/analytics'; +import { useCollectiveX } from '@/hooks/api/use-collectivex'; +import { useThemeColors } from '@/hooks/useThemeColors'; +import { Button } from '@/components/ui/button'; +import { Card } from '@/components/ui/card'; +import ChartLegend from '@/components/ui/chart-legend'; +import { Label } from '@/components/ui/label'; +import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; + +import { CollectiveXChart } from './CollectiveXChart'; +import { comparisonDifferences, stitchCollectiveXPrefillSeries } from './data'; +import type { + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXPhase, + CollectiveXScale, + CollectiveXSeries, + CollectiveXSuite, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +const OPERATION_OPTIONS: SegmentedToggleOption[] = [ + { value: 'dispatch', label: 'Dispatch' }, + { value: 'combine', label: 'Combine' }, + { value: 'serial', label: 'Serial' }, +]; + +const PHASE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'decode', label: 'Decode' }, + { value: 'prefill', label: 'Prefill' }, +]; + +const PERCENTILE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'p50', label: 'p50' }, + { value: 'p90', label: 'p90' }, + { value: 'p99', label: 'p99' }, +]; + +const SUITE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'all', label: 'All' }, + { value: 'backend-default', label: 'Backend default' }, + { value: 'resource-constrained', label: 'Resource constrained' }, +]; + +const SCALE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'log', label: 'Log' }, + { value: 'linear', label: 'Linear' }, +]; + +const OPERATION_LABELS: Record = { + dispatch: 'Dispatch', + combine: 'Combine', + serial: 'Serial (sum of isolated percentiles)', +}; + +const Y_AXIS_LABELS: Record = { + latency: 'Latency', + 'tokens-per-second': 'Tokens / second', + 'payload-rate': 'Logical payload rate', +}; + +function formatDate(value: string): string { + const date = new Date(value); + if (Number.isNaN(date.getTime())) return value; + return new Intl.DateTimeFormat('en', { + dateStyle: 'medium', + timeStyle: 'short', + timeZone: 'UTC', + }).format(date); +} + +function uniqueRuns(series: CollectiveXSeries[]) { + const byId = new Map(); + for (const item of series) { + if (item.run.id) byId.set(item.run.id, item.run); + } + return [...byId.values()].toSorted( + (a, b) => Date.parse(b.createdAt ?? '') - Date.parse(a.createdAt ?? ''), + ); +} + +function ControlGroup({ label, children }: { label: string; children: React.ReactNode }) { + return ( +
+ + {children} +
+ ); +} + +function InlineLegend({ + series, + colors, +}: { + series: CollectiveXSeries[]; + colors: Record; +}) { + const entries = [ + ...new Map(series.map((item) => [`${item.colorKey}|${item.label}`, item])).values(), + ]; + return ( +
+ {entries.map((item) => ( + + + {item.label} + + ))} +
+ ); +} + +function LineStyleKey() { + return ( +
+ + + BF16 + + + + FP8 + +
+ ); +} + +export default function CollectiveXDisplay() { + const { data, error, isLoading, isFetching, refetch } = useCollectiveX(); + const [operation, setOperation] = useState('dispatch'); + const [phase, setPhase] = useState('decode'); + const [percentile, setPercentile] = useState('p50'); + const [suite, setSuite] = useState('all'); + const [xAxis, setXAxis] = useState('tokens-per-rank'); + const [yAxis, setYAxis] = useState('latency'); + const [xScaleType, setXScaleType] = useState('log'); + const [yScaleType, setYScaleType] = useState('log'); + const [activeSeriesIds, setActiveSeriesIds] = useState>(new Set()); + const [isLegendExpanded, setIsLegendExpanded] = useState(true); + const [highContrast, setHighContrast] = useState(false); + + const series = useMemo(() => stitchCollectiveXPrefillSeries(data?.series ?? []), [data?.series]); + + useEffect(() => { + if (series.length > 0) { + setActiveSeriesIds(new Set(series.map((item) => item.id))); + } + }, [series]); + + const suiteSeries = useMemo( + () => series.filter((item) => suite === 'all' || item.suite === suite), + [series, suite], + ); + const phaseSeries = useMemo( + () => suiteSeries.filter((item) => item.phase === phase), + [suiteSeries, phase], + ); + const activePhaseSeries = useMemo( + () => phaseSeries.filter((item) => activeSeriesIds.has(item.id)), + [phaseSeries, activeSeriesIds], + ); + const colorKeys = useMemo( + () => [...new Set(suiteSeries.map((item) => item.colorKey))], + [suiteSeries], + ); + const { resolveColor, getCssColor } = useThemeColors({ + highContrast, + activeKeys: colorKeys, + hcKeys: colorKeys, + hcVendorKeyFor: (key) => key.split('_')[0], + }); + const colors = useMemo( + () => Object.fromEntries(colorKeys.map((key) => [key, getCssColor(resolveColor(key, key))])), + [colorKeys, getCssColor, resolveColor], + ); + + const toggleSeries = useCallback((id: string) => { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + track('collectivex_series_toggled', { series: id }); + }, []); + + const comparisonWarnings = useMemo( + () => comparisonDifferences(activePhaseSeries), + [activePhaseSeries], + ); + const runs = useMemo(() => uniqueRuns(data?.series ?? []), [data?.series]); + const hardwareCount = new Set(activePhaseSeries.map((item) => item.sku)).size; + const availablePhases = useMemo( + () => + PHASE_OPTIONS.map((option) => option.value).filter((candidate) => + suiteSeries.some((item) => item.phase === candidate), + ), + [suiteSeries], + ); + const overviewGroups = useMemo( + () => + availablePhases.flatMap((overviewPhase) => { + const phaseCandidates = suiteSeries.filter((item) => item.phase === overviewPhase); + const epSizes = [...new Set(phaseCandidates.map((item) => item.epSize))].toSorted( + (a, b) => { + if (a === b) return 0; + if (a === null) return 1; + if (b === null) return -1; + return a - b; + }, + ); + + return epSizes.map((epSize) => ({ + phase: overviewPhase, + epSize, + series: phaseCandidates.filter( + (item) => item.epSize === epSize && activeSeriesIds.has(item.id), + ), + })); + }), + [activeSeriesIds, availablePhases, suiteSeries], + ); + const hasLegacyP90Fallback = + percentile === 'p90' && activePhaseSeries.some((item) => item.schemaVersion < 3); + const routingIdentityProven = + series.length > 0 && series.every((item) => item.routingConsistent === true); + const pooledSamples = [ + ...new Set( + series.flatMap((item) => + item.rows + .map((row) => row.samplesPooled) + .filter((value): value is number => value !== null), + ), + ), + ].toSorted((a, b) => a - b); + + const legendItems = useMemo( + () => + phaseSeries.map((item) => ({ + name: item.id, + label: item.label, + color: colors[item.colorKey] ?? 'var(--muted-foreground)', + isActive: activeSeriesIds.has(item.id), + title: `${item.topologyClass} · ${item.measurementContract}`, + onClick: () => toggleSeries(item.id), + })), + [phaseSeries, colors, activeSeriesIds, toggleSeries], + ); + + const handleRefresh = useCallback(() => { + track('collectivex_data_refreshed'); + void refetch(); + }, [refetch]); + + if (isLoading) { + return ( + + +

Loading CollectiveX artifacts...

+
+ ); + } + + if (error || !data) { + return ( + +

CollectiveX data unavailable

+

+ {error instanceof Error ? error.message : 'Failed to load CollectiveX artifacts.'} +

+ +
+ ); + } + + return ( +
+ +
+
+
+

CollectiveX

+ + Experimental + +
+

+ Cross-vendor MoE expert-parallel dispatch and combine benchmarks, rendered from a + generated snapshot of successful GitHub Actions artifacts produced by the{' '} + collectivex branch. +

+

+ These are experimental communication microbenchmarks, not official serving results. + Hardware topology, routing, EP degree, resource budget, and timing contract remain + part of every line's identity. +

+
+ +
+ +
+
+

{activePhaseSeries.length}

+

Visible configurations

+
+
+

{hardwareCount}

+

Hardware platforms

+
+
+

{data.contributingRuns}

+

Source workflow runs

+
+
+

{formatDate(data.generatedAt)}

+

Newest result (UTC)

+
+
+
+ + +
+ + { + setOperation(value); + track('collectivex_operation_changed', { operation: value }); + }} + ariaLabel="CollectiveX operation" + testId="collectivex-operation-toggle" + /> + + + { + setPhase(value); + track('collectivex_phase_changed', { phase: value }); + }} + ariaLabel="CollectiveX phase" + testId="collectivex-phase-toggle" + /> + + + { + setPercentile(value); + track('collectivex_percentile_changed', { percentile: value }); + }} + ariaLabel="CollectiveX percentile" + testId="collectivex-percentile-toggle" + /> + + + { + setSuite(value); + track('collectivex_suite_changed', { suite: value }); + }} + ariaLabel="CollectiveX comparison suite" + testId="collectivex-suite-toggle" + className="flex-wrap" + /> + + + + + + + + + { + setXScaleType(value); + track('collectivex_x_scale_changed', { scale: value }); + }} + ariaLabel="CollectiveX x scale" + testId="collectivex-x-scale-toggle" + /> + + + { + setYScaleType(value); + track('collectivex_y_scale_changed', { scale: value }); + }} + ariaLabel="CollectiveX y scale" + testId="collectivex-y-scale-toggle" + /> + +
+
+ + + +

+ {OPERATION_LABELS[operation]} · {phase} · {percentile} +

+

+ {Y_AXIS_LABELS[yAxis]} versus{' '} + {xAxis === 'tokens-per-rank' ? 'source tokens per rank' : 'global source tokens'}. + FP8 lines are dashed. +

+ + } + legendElement={ + { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + next.delete(id); + return next; + }); + track('collectivex_series_hidden', { series: id }); + }} + isLegendExpanded={isLegendExpanded} + onExpandedChange={(expanded) => { + setIsLegendExpanded(expanded); + track('collectivex_legend_expanded', { expanded }); + }} + switches={[ + { + id: 'collectivex-high-contrast', + label: 'High Contrast', + checked: highContrast, + onCheckedChange: (checked) => { + setHighContrast(checked); + track('collectivex_high_contrast_toggled', { enabled: checked }); + }, + }, + ]} + keyIndicators={} + actions={ + activePhaseSeries.length < phaseSeries.length + ? [ + { + id: 'collectivex-reset-filter', + label: 'Reset filter', + onClick: () => { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + for (const item of phaseSeries) next.add(item.id); + return next; + }); + track('collectivex_legend_filter_reset'); + }, + }, + ] + : [] + } + /> + } + /> + + {comparisonWarnings.length > 0 && ( +

+ Not directly comparable: selected lines vary in{' '} + {comparisonWarnings.join(', ')}. Use the suite selector and legend to isolate a + like-for-like subset before ranking hardware. +

+ )} + {hasLegacyP90Fallback && ( +

+ Some schema v1/v2 artifacts did not record p90; those points use their p50 value for the + p90 view. +

+ )} + {operation === 'serial' && ( +

+ Serial is the sum of independently measured dispatch and combine percentiles, not a + measured chained operation. +

+ )} + {yAxis === 'payload-rate' && ( +

+ Logical payload rate counts routed activation bytes divided by latency. It is not wire, + algorithmic, or bus bandwidth and excludes indices, scales, metadata, padding, and + protocol overhead. +

+ )} +
+ +
+
+

Latency Overview

+

+ Dispatch, combine, and serial latency panels mirror the generated CollectiveX report. + Prefill curves include lower-token decode points from the same fixed configuration. +

+
+ {overviewGroups.map((group) => { + const epLabel = group.epSize === null ? 'EP unknown' : `EP${group.epSize}`; + return ( + +

+ {group.phase} · {epLabel} · {percentile} latency +

+ +
+ {OPERATION_OPTIONS.map((option) => ( +
+

{option.label}

+ +
+ ))} +
+
+ ); + })} +
+ + +

Provenance

+

+ The generated snapshot scanned {data.scannedRuns} recent workflow runs and kept the newest + valid artifact for each fixed configuration. Raw environment captures and reproduction + commands remain in GitHub artifacts; this page exposes only chart and provenance fields. + Use the Update data workflow button to regenerate this file manually. +

+

+ Routing identity is{' '} + {routingIdentityProven ? 'proven across ranks' : 'not proven for every series'}. Latency + values are pooled per-iteration cross-rank maxima + {pooledSamples.length > 0 ? ` (${pooledSamples.join('/')} samples per point)` : ''}. + Correctness is a round-trip reconstruction smoke check, not a complete per-token routing + proof. Backend, topology, EP degree, dtype, mode, resource budget, and measurement + contract remain part of each line's identity. +

+ +
+
+ ); +} diff --git a/packages/app/src/components/collectivex/data.test.ts b/packages/app/src/components/collectivex/data.test.ts new file mode 100644 index 00000000..4d1f6361 --- /dev/null +++ b/packages/app/src/components/collectivex/data.test.ts @@ -0,0 +1,279 @@ +import { describe, expect, it } from 'vitest'; + +import { + chartPoints, + comparisonDifferences, + metricValue, + normalizeCollectiveXDocument, + selectLatestCollectiveXSeries, + stitchCollectiveXPrefillSeries, +} from './data'; +import type { CollectiveXSeries } from './types'; + +function rawDocument(overrides: Record = {}) { + return { + schema_version: 3, + family: 'moe', + generated_at: '2026-06-25T08:31:09Z', + status: 'valid', + comparison_key: 'abc123', + runner: 'mi355x-amds_04', + backend: 'mori', + phase: 'decode', + mode: 'normal', + resource_mode: 'normalized', + comparison_class: 'standardized', + measurement_contract: 'layout-and-dispatch-v1', + topology_class: 'mi355x-xgmi', + transport: 'xgmi', + world_size: 8, + ep_size: 8, + shape: { + hidden: 7168, + topk: 8, + experts: 256, + experts_per_rank: 32, + dispatch_dtype: 'bf16', + routing: 'uniform', + }, + backend_provenance: { + mori_commit: 'deadbeef', + }, + reproduction: { + git_run: { + run_id: '28156624181', + source_sha: 'cad380a65a01254ab5a470402ef247b8745d4243', + }, + }, + routing_identity: { + consistent_across_ranks: true, + trace_signature: 'trace-1', + }, + rows: [ + { + tokens_per_rank: 1, + global_tokens: 8, + dispatch_us_p50: 40, + dispatch_us_p90: 45, + dispatch_us_p99: 50, + combine_us_p50: 20, + combine_us_p90: 25, + combine_us_p99: 30, + serial_us_p50: 60, + serial_us_p90: 70, + serial_us_p99: 80, + dispatch_logical_bytes: 8000, + combine_logical_bytes: 4000, + fanout_mean: 5.5, + recv_tokens_max: 7, + samples_pooled: 600, + trials: 3, + correct: true, + }, + { + tokens_per_rank: 2, + global_tokens: 16, + dispatch_us_p50: 50, + dispatch_us_p90: 55, + dispatch_us_p99: 60, + combine_us_p50: 25, + combine_us_p90: 30, + combine_us_p99: 35, + serial_us_p50: 75, + serial_us_p90: 85, + serial_us_p99: 95, + dispatch_logical_bytes: 16_000, + combine_logical_bytes: 8000, + correct: true, + }, + ], + ...overrides, + }; +} + +function normalized(overrides: Record = {}): CollectiveXSeries { + const result = normalizeCollectiveXDocument(rawDocument(overrides)); + if (!result) throw new Error('test fixture failed to normalize'); + return result; +} + +describe('normalizeCollectiveXDocument', () => { + it('normalizes v3 percentiles, provenance, bytes, and comparison metadata', () => { + const series = normalized(); + + expect(series).toMatchObject({ + id: 'cx-abc123', + sku: 'mi355x', + backend: 'mori', + phase: 'decode', + resourceMode: 'normalized', + suite: 'resource-constrained', + routingConsistent: true, + traceSignature: 'trace-1', + backendVersion: 'deadbeef', + }); + expect(series.label).toContain('MI355X'); + expect(series.label).toContain('EP8'); + expect(series.label).toContain('layout + dispatch'); + expect(series.run.id).toBe('28156624181'); + expect(series.run.url).toBe( + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28156624181', + ); + expect(series.rows[0]).toMatchObject({ + dispatch: { p50: 40, p90: 45, p99: 50 }, + combine: { p50: 20, p90: 25, p99: 30 }, + serial: { p50: 60, p90: 70, p99: 80 }, + dispatchLogicalBytes: 8000, + combineLogicalBytes: 4000, + samplesPooled: 600, + trials: 3, + }); + }); + + it('supports v1 rows by falling back from p90 to p50 and roundtrip fields', () => { + const series = normalized({ + schema_version: 1, + comparison_key: 'legacy', + resource_mode: undefined, + phase: 'prefill', + reproduction: undefined, + routing_identity: undefined, + rows: [ + { + tokens_per_rank: 128, + global_tokens: 1024, + dispatch_us_p50: 100, + dispatch_us_p99: 150, + combine_us_p50: 80, + combine_us_p99: 120, + roundtrip_us_p50: 180, + roundtrip_us_p99: 270, + dispatch_bytes: 2048, + correct: true, + }, + ], + }); + + expect(series.resourceMode).toBe('tuned'); + expect(series.suite).toBe('backend-default'); + expect(series.rows[0].dispatch).toEqual({ p50: 100, p90: 100, p99: 150 }); + expect(series.rows[0].serial).toEqual({ p50: 180, p90: 180, p99: 270 }); + expect(series.rows[0].dispatchLogicalBytes).toBe(2048); + expect(series.rows[0].combineLogicalBytes).toBe(0); + }); + + it('does not treat a missing correctness flag as a pass', () => { + const document = rawDocument(); + const rows = document.rows as Record[]; + delete rows[0].correct; + + const series = normalizeCollectiveXDocument(document); + + expect(series?.rows[0].correct).toBe(false); + }); + + it('uses the workflow context when older documents lack run linkage', () => { + const series = normalizeCollectiveXDocument(rawDocument({ reproduction: undefined }), { + run: { + id: '123', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/123', + createdAt: '2026-06-24T00:00:00Z', + sha: 'feedface', + }, + }); + + expect(series?.run).toEqual({ + id: '123', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/123', + createdAt: '2026-06-24T00:00:00Z', + sha: 'feedface', + }); + }); + + it('rejects non-MoE documents, missing phases, and empty rows', () => { + expect(normalizeCollectiveXDocument(rawDocument({ family: 'nccl' }))).toBeNull(); + expect(normalizeCollectiveXDocument(rawDocument({ phase: undefined }))).toBeNull(); + expect(normalizeCollectiveXDocument(rawDocument({ rows: [] }))).toBeNull(); + }); +}); + +describe('CollectiveX series transforms', () => { + it('keeps only the newest valid result for an identical configuration', () => { + const older = normalized({ generated_at: '2026-06-24T00:00:00Z' }); + const newer = normalized({ generated_at: '2026-06-25T00:00:00Z' }); + const invalid = normalized({ + generated_at: '2026-06-26T00:00:00Z', + status: 'invalid', + }); + + expect(selectLatestCollectiveXSeries([older, invalid, newer])).toEqual([newer]); + }); + + it('prepends matching decode points below the prefill boundary', () => { + const decode = normalized(); + const prefill = normalized({ + comparison_key: 'prefill', + phase: 'prefill', + rows: [ + { + tokens_per_rank: 128, + global_tokens: 1024, + dispatch_us_p50: 100, + dispatch_us_p90: 110, + dispatch_us_p99: 120, + combine_us_p50: 80, + combine_us_p90: 90, + combine_us_p99: 100, + serial_us_p50: 180, + serial_us_p90: 200, + serial_us_p99: 220, + correct: true, + }, + ], + }); + + const stitched = stitchCollectiveXPrefillSeries([decode, prefill]); + const stitchedPrefill = stitched.find((item) => item.phase === 'prefill'); + + expect(stitchedPrefill?.rows.map((row) => row.tokensPerRank)).toEqual([1, 2, 128]); + expect(stitchedPrefill?.rows[0].stitchedFromDecode).toBe(true); + expect(stitchedPrefill?.rows[2].stitchedFromDecode).toBeUndefined(); + }); +}); + +describe('CollectiveX chart helpers', () => { + it('calculates latency, tokens/s, and logical payload rate from the selected operation', () => { + const row = normalized().rows[0]; + + expect(metricValue(row, 'dispatch', 'p50', 'latency')).toBe(40); + expect(metricValue(row, 'combine', 'p50', 'tokens-per-second')).toBeCloseTo(400_000); + expect(metricValue(row, 'serial', 'p50', 'payload-rate')).toBe(0.2); + }); + + it('builds chart points with the selected global-token x axis', () => { + const points = chartPoints([normalized()], 'dispatch', 'p99', 'global-tokens', 'latency'); + + expect(points).toHaveLength(2); + expect(points[0]).toMatchObject({ x: 8, y: 50, operation: 'dispatch', percentile: 'p99' }); + }); + + it('reports fields that make selected lines non-comparable', () => { + const base = normalized(); + const different = normalized({ + comparison_key: 'other', + runner: 'h100-dgxc_01', + resource_mode: 'tuned', + measurement_contract: 'cached-layout-comm-only-v1', + ep_size: 4, + world_size: 4, + shape: { + ...rawDocument().shape, + routing: 'balanced', + }, + }); + + expect(comparisonDifferences([base, different])).toEqual( + expect.arrayContaining(['EP degree', 'routing', 'resource mode', 'measurement contract']), + ); + }); +}); diff --git a/packages/app/src/components/collectivex/data.ts b/packages/app/src/components/collectivex/data.ts new file mode 100644 index 00000000..a3af0c35 --- /dev/null +++ b/packages/app/src/components/collectivex/data.ts @@ -0,0 +1,388 @@ +import type { + CollectiveXChartPoint, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXRow, + CollectiveXRunSource, + CollectiveXSeries, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +interface NormalizeContext { + run?: Partial; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringValue(value: unknown, fallback = ''): string { + return typeof value === 'string' ? value : fallback; +} + +function finiteNumber(value: unknown): number | null { + return typeof value === 'number' && Number.isFinite(value) ? value : null; +} + +function positiveNumber(value: unknown): number | null { + const number = finiteNumber(value); + return number !== null && number > 0 ? number : null; +} + +function nullableBoolean(value: unknown): boolean | null { + return typeof value === 'boolean' ? value : null; +} + +function percentile( + row: Record, + operation: 'dispatch' | 'combine' | 'serial', + percentileKey: CollectiveXPercentile, +): number | null { + const direct = finiteNumber(row[`${operation}_us_${percentileKey}`]); + if (direct !== null) return direct; + + if (operation === 'serial') { + const legacyRoundtrip = finiteNumber(row[`roundtrip_us_${percentileKey}`]); + if (legacyRoundtrip !== null) return legacyRoundtrip; + } + + if (percentileKey !== 'p50') { + return percentile(row, operation, 'p50'); + } + return null; +} + +function normalizeRow(raw: unknown, epSize: number | null): CollectiveXRow | null { + if (!isRecord(raw)) return null; + + const tokensPerRank = positiveNumber(raw.tokens_per_rank); + const globalTokens = + positiveNumber(raw.global_tokens) ?? + (tokensPerRank !== null && epSize !== null ? tokensPerRank * epSize : null); + if (tokensPerRank === null || globalTokens === null) return null; + + const dispatchP50 = percentile(raw, 'dispatch', 'p50'); + const combineP50 = percentile(raw, 'combine', 'p50'); + const serialP50 = percentile(raw, 'serial', 'p50'); + if (dispatchP50 === null || combineP50 === null || serialP50 === null) return null; + + const routedBytes = + finiteNumber(raw.dispatch_logical_bytes) ?? + finiteNumber(raw.routed_bytes_total) ?? + finiteNumber(raw.dispatch_bytes) ?? + 0; + const combineBytes = + finiteNumber(raw.combine_logical_bytes) ?? finiteNumber(raw.combine_bytes_total) ?? 0; + + return { + tokensPerRank, + globalTokens, + dispatch: { + p50: dispatchP50, + p90: percentile(raw, 'dispatch', 'p90') ?? dispatchP50, + p99: percentile(raw, 'dispatch', 'p99') ?? dispatchP50, + }, + combine: { + p50: combineP50, + p90: percentile(raw, 'combine', 'p90') ?? combineP50, + p99: percentile(raw, 'combine', 'p99') ?? combineP50, + }, + serial: { + p50: serialP50, + p90: percentile(raw, 'serial', 'p90') ?? serialP50, + p99: percentile(raw, 'serial', 'p99') ?? serialP50, + }, + dispatchLogicalBytes: Math.max(0, routedBytes), + combineLogicalBytes: Math.max(0, combineBytes), + fanoutMean: finiteNumber(raw.fanout_mean), + recvTokensMax: finiteNumber(raw.recv_tokens_max) ?? finiteNumber(raw.recv_tokens), + correct: raw.correct === true, + samplesPooled: finiteNumber(raw.samples_pooled), + trials: finiteNumber(raw.trials), + }; +} + +function skuFromRunner(runner: string): string { + return runner.split('_')[0]?.split('-')[0]?.toLowerCase() || 'unknown'; +} + +function backendVersion(raw: Record): string | null { + const provenance = isRecord(raw.backend_provenance) ? raw.backend_provenance : {}; + return ( + stringValue(provenance.deepep_version) || + stringValue(provenance.deepep_commit) || + stringValue(provenance.mori_commit) || + null + ); +} + +function stableHash(value: string): string { + let hash = 2166136261; + for (const character of value) { + hash ^= character.codePointAt(0) ?? 0; + hash = Math.imul(hash, 16777619); + } + const unsignedHash = hash < 0 ? hash + 0x1_0000_0000 : hash; + return unsignedHash.toString(16).padStart(8, '0'); +} + +function buildLabel(input: { + sku: string; + backend: string; + dtype: string; + mode: string; + resourceMode: string; + contract: string; + epSize: number | null; +}): string { + const parts = [input.sku.toUpperCase(), input.backend, input.dtype]; + if (input.epSize !== null) parts.push(`EP${input.epSize}`); + if (input.mode === 'll') parts.push('LL'); + if (input.resourceMode === 'normalized') parts.push('normalized'); + if (input.resourceMode === 'default') parts.push('default budget'); + if (input.contract === 'comm-only-v1') parts.push('comm only'); + if (input.contract === 'layout-and-dispatch-v1') parts.push('layout + dispatch'); + if (input.contract === 'cached-layout-comm-only-v1') parts.push('cached layout'); + return parts.join(' · '); +} + +export function normalizeCollectiveXDocument( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXSeries | null { + if (!isRecord(raw) || raw.family !== 'moe' || !Array.isArray(raw.rows)) return null; + + const phase = raw.phase === 'decode' || raw.phase === 'prefill' ? raw.phase : null; + if (!phase || raw.rows.length === 0) return null; + + const schemaVersion = finiteNumber(raw.schema_version) ?? 1; + const runner = stringValue(raw.runner, 'unknown'); + const sku = skuFromRunner(runner); + const backend = stringValue(raw.backend, 'unknown'); + const mode = stringValue(raw.mode, 'normal'); + const resourceMode = stringValue(raw.resource_mode) || 'tuned'; + const suite = resourceMode === 'normalized' ? 'resource-constrained' : 'backend-default'; + const comparisonClass = stringValue(raw.comparison_class, 'standardized'); + const measurementContract = stringValue(raw.measurement_contract, 'unknown'); + const topologyClass = stringValue(raw.topology_class, 'unknown'); + const transport = stringValue(raw.transport, 'unknown'); + const worldSize = positiveNumber(raw.world_size); + const epSize = positiveNumber(raw.ep_size) ?? worldSize; + const shape = isRecord(raw.shape) ? raw.shape : {}; + const dtype = stringValue(shape.dispatch_dtype, 'unknown'); + const routing = stringValue(shape.routing, 'unknown'); + const hidden = positiveNumber(shape.hidden); + const topk = positiveNumber(shape.topk); + const experts = positiveNumber(shape.experts); + const generatedAt = + stringValue(raw.generated_at) || context.run?.createdAt || new Date(0).toISOString(); + const status = stringValue(raw.status, 'unknown'); + + const identity = [ + sku, + backend, + phase, + mode, + resourceMode, + comparisonClass, + measurementContract, + topologyClass, + worldSize ?? '', + epSize ?? '', + dtype, + routing, + hidden ?? '', + topk ?? '', + experts ?? '', + ].join('|'); + const stitchKey = [ + sku, + backend, + mode, + resourceMode, + comparisonClass, + measurementContract, + topologyClass, + worldSize ?? '', + epSize ?? '', + dtype, + routing, + hidden ?? '', + topk ?? '', + experts ?? '', + ].join('|'); + const comparisonKey = stringValue(raw.comparison_key); + const id = `cx-${comparisonKey || stableHash(identity)}`; + const colorKey = `${sku}_${stableHash(stitchKey)}`; + + const rows = raw.rows + .map((row) => normalizeRow(row, epSize)) + .filter((row): row is CollectiveXRow => row !== null) + .toSorted((a, b) => a.tokensPerRank - b.tokensPerRank); + if (rows.length === 0) return null; + + const reproduction = isRecord(raw.reproduction) ? raw.reproduction : {}; + const gitRun = isRecord(reproduction.git_run) ? reproduction.git_run : {}; + const runId = stringValue(gitRun.run_id) || context.run?.id || null; + const sourceSha = stringValue(gitRun.source_sha) || context.run?.sha || null; + const routingIdentity = isRecord(raw.routing_identity) ? raw.routing_identity : {}; + + return { + id, + identity, + stitchKey, + colorKey, + schemaVersion, + generatedAt, + status, + sku, + backend, + phase, + mode, + resourceMode, + suite, + comparisonClass, + measurementContract, + topologyClass, + transport, + worldSize, + epSize, + label: buildLabel({ + sku, + backend, + dtype, + mode, + resourceMode, + contract: measurementContract, + epSize, + }), + shape: { + hidden, + topk, + experts, + routing, + dispatchDtype: dtype, + }, + routingConsistent: nullableBoolean(routingIdentity.consistent_across_ranks), + traceSignature: stringValue(routingIdentity.trace_signature) || null, + backendVersion: backendVersion(raw), + run: { + id: runId, + url: + context.run?.url || + (runId + ? `https://github.com/SemiAnalysisAI/InferenceX/actions/runs/${encodeURIComponent(runId)}` + : null), + createdAt: context.run?.createdAt || generatedAt || null, + sha: sourceSha, + }, + rows, + }; +} + +export function selectLatestCollectiveXSeries(series: CollectiveXSeries[]): CollectiveXSeries[] { + const latestByIdentity = new Map(); + for (const item of series) { + if (item.status !== 'valid') continue; + const previous = latestByIdentity.get(item.identity); + if ( + !previous || + Date.parse(item.generatedAt) > Date.parse(previous.generatedAt) || + (item.generatedAt === previous.generatedAt && item.schemaVersion > previous.schemaVersion) + ) { + latestByIdentity.set(item.identity, item); + } + } + return [...latestByIdentity.values()].toSorted( + (a, b) => + a.sku.localeCompare(b.sku) || + a.backend.localeCompare(b.backend) || + a.phase.localeCompare(b.phase) || + a.label.localeCompare(b.label), + ); +} + +export function stitchCollectiveXPrefillSeries(series: CollectiveXSeries[]): CollectiveXSeries[] { + const decodeByKey = new Map( + series.filter((item) => item.phase === 'decode').map((item) => [item.stitchKey, item]), + ); + + return series.map((item) => { + if (item.phase !== 'prefill' || item.rows.length === 0) return item; + const decode = decodeByKey.get(item.stitchKey); + if (!decode) return item; + + const minPrefill = item.rows[0].tokensPerRank; + const prefix = decode.rows + .filter((row) => row.tokensPerRank < minPrefill) + .map((row) => ({ ...row, stitchedFromDecode: true })); + if (prefix.length === 0) return item; + return { ...item, rows: [...prefix, ...item.rows] }; + }); +} + +export function metricValue( + row: CollectiveXRow, + operation: CollectiveXOperation, + percentileKey: CollectiveXPercentile, + yAxis: CollectiveXYAxis, +): number { + const latencyUs = row[operation][percentileKey]; + if (yAxis === 'latency') return latencyUs; + if (yAxis === 'tokens-per-second') { + return latencyUs > 0 ? row.globalTokens / (latencyUs * 1e-6) : 0; + } + + const bytes = + operation === 'dispatch' + ? row.dispatchLogicalBytes + : operation === 'combine' + ? row.combineLogicalBytes + : row.dispatchLogicalBytes + row.combineLogicalBytes; + return latencyUs > 0 ? bytes / (latencyUs * 1e3) : 0; +} + +export function chartPoints( + series: CollectiveXSeries[], + operation: CollectiveXOperation, + percentileKey: CollectiveXPercentile, + xAxis: CollectiveXXAxis, + yAxis: CollectiveXYAxis, +): CollectiveXChartPoint[] { + return series.flatMap((item) => + item.rows + .map((row) => ({ + seriesId: item.id, + seriesLabel: item.label, + colorKey: item.colorKey, + x: xAxis === 'tokens-per-rank' ? row.tokensPerRank : row.globalTokens, + y: metricValue(row, operation, percentileKey, yAxis), + operation, + percentile: percentileKey, + row, + series: item, + })) + .filter((point) => point.x > 0 && point.y > 0), + ); +} + +export function comparisonDifferences(series: CollectiveXSeries[]): string[] { + if (series.length < 2) return []; + const fields: [label: string, value: (item: CollectiveXSeries) => unknown][] = [ + ['EP degree', (item) => item.epSize], + ['routing', (item) => item.shape.routing], + ['dispatch dtype', (item) => item.shape.dispatchDtype], + ['kernel mode', (item) => item.mode], + ['resource mode', (item) => item.resourceMode], + ['measurement contract', (item) => item.measurementContract], + ['comparison class', (item) => item.comparisonClass], + ['hidden size', (item) => item.shape.hidden], + ['top-k', (item) => item.shape.topk], + ['expert count', (item) => item.shape.experts], + ]; + + return fields + .filter(([, getValue]) => new Set(series.map(getValue)).size > 1) + .map(([label]) => label); +} diff --git a/packages/app/src/components/collectivex/types.ts b/packages/app/src/components/collectivex/types.ts new file mode 100644 index 00000000..f914656b --- /dev/null +++ b/packages/app/src/components/collectivex/types.ts @@ -0,0 +1,92 @@ +export type CollectiveXPhase = 'decode' | 'prefill'; +export type CollectiveXOperation = 'dispatch' | 'combine' | 'serial'; +export type CollectiveXPercentile = 'p50' | 'p90' | 'p99'; +export type CollectiveXXAxis = 'tokens-per-rank' | 'global-tokens'; +export type CollectiveXYAxis = 'latency' | 'tokens-per-second' | 'payload-rate'; +export type CollectiveXScale = 'log' | 'linear'; +export type CollectiveXSuite = 'all' | 'backend-default' | 'resource-constrained'; + +export interface CollectiveXPercentiles { + p50: number; + p90: number; + p99: number; +} + +export interface CollectiveXRow { + tokensPerRank: number; + globalTokens: number; + dispatch: CollectiveXPercentiles; + combine: CollectiveXPercentiles; + serial: CollectiveXPercentiles; + dispatchLogicalBytes: number; + combineLogicalBytes: number; + fanoutMean: number | null; + recvTokensMax: number | null; + correct: boolean; + samplesPooled: number | null; + trials: number | null; + stitchedFromDecode?: boolean; +} + +export interface CollectiveXShape { + hidden: number | null; + topk: number | null; + experts: number | null; + routing: string; + dispatchDtype: string; +} + +export interface CollectiveXRunSource { + id: string | null; + url: string | null; + createdAt: string | null; + sha: string | null; +} + +export interface CollectiveXSeries { + id: string; + identity: string; + stitchKey: string; + colorKey: string; + schemaVersion: number; + generatedAt: string; + status: string; + sku: string; + backend: string; + phase: CollectiveXPhase; + mode: string; + resourceMode: string; + suite: Exclude; + comparisonClass: string; + measurementContract: string; + topologyClass: string; + transport: string; + worldSize: number | null; + epSize: number | null; + label: string; + shape: CollectiveXShape; + routingConsistent: boolean | null; + traceSignature: string | null; + backendVersion: string | null; + run: CollectiveXRunSource; + rows: CollectiveXRow[]; +} + +export interface CollectiveXApiResponse { + series: CollectiveXSeries[]; + scannedRuns: number; + contributingRuns: number; + generatedAt: string; +} + +export interface CollectiveXChartPoint { + seriesId: string; + seriesLabel: string; + colorKey: string; + x: number; + y: number; + operation: CollectiveXOperation; + percentile: CollectiveXPercentile; + row: CollectiveXRow; + series: CollectiveXSeries; +} diff --git a/packages/app/src/components/header/header.tsx b/packages/app/src/components/header/header.tsx index 57965518..896fd40c 100644 --- a/packages/app/src/components/header/header.tsx +++ b/packages/app/src/components/header/header.tsx @@ -22,6 +22,7 @@ const DASHBOARD_TABS = [ '/reliability', '/gpu-specs', '/gpu-metrics', + '/collectivex', '/submissions', '/current-inferencex-image', ]; diff --git a/packages/app/src/components/tab-nav.tsx b/packages/app/src/components/tab-nav.tsx index ce5f3257..b631e910 100644 --- a/packages/app/src/components/tab-nav.tsx +++ b/packages/app/src/components/tab-nav.tsx @@ -29,6 +29,7 @@ const VISIBLE_TABS = [ { href: '/historical', label: 'Historical Trends', testId: 'tab-trigger-historical' }, { href: '/calculator', label: 'TCO Calculator', testId: 'tab-trigger-calculator' }, { href: '/gpu-specs', label: 'GPU Specs', testId: 'tab-trigger-gpu-specs' }, + { href: '/collectivex', label: 'CollectiveX', testId: 'tab-trigger-collectivex' }, { href: '/submissions', label: 'Submissions', testId: 'tab-trigger-submissions' }, ] as const; diff --git a/packages/app/src/hooks/api/use-collectivex.ts b/packages/app/src/hooks/api/use-collectivex.ts new file mode 100644 index 00000000..865c4259 --- /dev/null +++ b/packages/app/src/hooks/api/use-collectivex.ts @@ -0,0 +1,11 @@ +import { useQuery } from '@tanstack/react-query'; + +import { fetchCollectiveX } from '@/lib/api'; + +export function useCollectiveX() { + return useQuery({ + queryKey: ['collectivex'], + queryFn: ({ signal }) => fetchCollectiveX(signal), + staleTime: 5 * 60 * 1000, + }); +} diff --git a/packages/app/src/lib/api.test.ts b/packages/app/src/lib/api.test.ts index a1f29006..d8d60ffa 100644 --- a/packages/app/src/lib/api.test.ts +++ b/packages/app/src/lib/api.test.ts @@ -4,6 +4,7 @@ import { fetchBenchmarks, fetchWorkflowInfo, fetchAvailability, + fetchCollectiveX, fetchReliability, fetchEvaluations, } from './api'; @@ -126,3 +127,16 @@ describe('fetchEvaluations', () => { expect(result[0].task).toBe('gsm8k'); }); }); + +describe('fetchCollectiveX', () => { + it('fetches the generated static snapshot', async () => { + mockOk({ series: [], scannedRuns: 0, contributingRuns: 0, generatedAt: '' }); + + await fetchCollectiveX(); + + expect(mockFetch).toHaveBeenCalledWith( + '/data/collectivex.json', + expect.objectContaining({ cache: 'no-store' }), + ); + }); +}); diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts index 0dac5883..15ba197b 100644 --- a/packages/app/src/lib/api.ts +++ b/packages/app/src/lib/api.ts @@ -4,6 +4,7 @@ */ import type { WorkerPower } from '@/components/inference/types'; +import type { CollectiveXApiResponse } from '@/components/collectivex/types'; import type { SubmissionsResponse } from './submissions-types'; @@ -134,8 +135,12 @@ export interface EvalRow { run_url: string | null; } -async function fetchJson(url: string, signal?: AbortSignal): Promise { - const res = await fetch(url, { signal }); +async function fetchJson( + url: string, + signal?: AbortSignal, + options?: Omit, +): Promise { + const res = await fetch(url, { ...options, signal }); if (!res.ok) throw new Error(`API error: ${res.status} ${res.statusText}`); return res.json(); } @@ -293,6 +298,12 @@ export function fetchSubmissions(signal?: AbortSignal) { return fetchJson('/api/v1/submissions', signal); } +export function fetchCollectiveX(signal?: AbortSignal) { + return fetchJson('/data/collectivex.json', signal, { + cache: 'no-store', + }); +} + export interface FeedbackListRow { id: string; created_at: string; diff --git a/packages/app/src/lib/collectivex-snapshot.test.ts b/packages/app/src/lib/collectivex-snapshot.test.ts new file mode 100644 index 00000000..51859717 --- /dev/null +++ b/packages/app/src/lib/collectivex-snapshot.test.ts @@ -0,0 +1,236 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const state = vi.hoisted(() => ({ + document: {} as unknown, +})); + +vi.mock('@semianalysisai/inferencex-constants', () => ({ + GITHUB_API_BASE: 'https://api.github.com', + GITHUB_OWNER: 'SemiAnalysisAI', + GITHUB_REPO: 'InferenceX', +})); + +vi.mock('adm-zip', () => ({ + default: class MockAdmZip { + getEntries() { + return [ + { + entryName: 'result.json', + getData: () => Buffer.from(JSON.stringify(state.document)), + }, + ]; + } + }, +})); + +import { generateCollectiveXSnapshot } from './collectivex-snapshot'; + +const originalFetch = globalThis.fetch; + +function workflowRun(id = 12345, status = 'completed') { + return { + id, + name: 'CollectiveX Experimental', + head_branch: 'collectivex', + head_sha: 'abc123', + created_at: '2026-06-25T08:00:00Z', + html_url: `https://github.com/SemiAnalysisAI/InferenceX/actions/runs/${id}`, + conclusion: status === 'completed' ? 'success' : null, + status, + }; +} + +function resultDocument() { + return { + schema_version: 3, + family: 'moe', + generated_at: '2026-06-25T08:01:00Z', + status: 'valid', + comparison_key: 'abc123', + runner: 'mi355x-amds_04', + backend: 'mori', + phase: 'decode', + mode: 'normal', + resource_mode: 'normalized', + comparison_class: 'standardized', + measurement_contract: 'layout-and-dispatch-v1', + topology_class: 'mi355x-xgmi', + transport: 'xgmi', + world_size: 8, + ep_size: 8, + shape: { + hidden: 7168, + topk: 8, + experts: 256, + dispatch_dtype: 'bf16', + routing: 'uniform', + }, + rows: [ + { + tokens_per_rank: 1, + global_tokens: 8, + dispatch_us_p50: 40, + dispatch_us_p90: 45, + dispatch_us_p99: 50, + combine_us_p50: 20, + combine_us_p90: 25, + combine_us_p99: 30, + serial_us_p50: 60, + serial_us_p90: 70, + serial_us_p99: 80, + dispatch_logical_bytes: 8000, + combine_logical_bytes: 4000, + correct: true, + }, + ], + }; +} + +function artifactList() { + return { + artifacts: [ + { + id: 1, + name: 'collectivex_mi355x_mori_decode_12345', + archive_download_url: 'https://api.github.com/artifacts/1/zip', + }, + ], + }; +} + +function artifactDownload() { + return { + ok: true, + headers: new Headers({ 'Content-Length': '1024' }), + arrayBuffer: () => Promise.resolve(new Uint8Array([1]).buffer), + }; +} + +beforeEach(() => { + state.document = resultDocument(); +}); + +afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); +}); + +describe('generateCollectiveXSnapshot', () => { + it('discovers successful workflow artifacts and returns normalized latest series', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(snapshot).toMatchObject({ + scannedRuns: 1, + contributingRuns: 1, + generatedAt: '2026-06-25T08:01:00.000Z', + }); + expect(snapshot.series).toHaveLength(1); + expect(snapshot.series[0]).toMatchObject({ + id: 'cx-abc123', + sku: 'mi355x', + backend: 'mori', + run: { + id: '12345', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/12345', + }, + }); + }); + + it('includes a just-finished source run before GitHub marks the workflow successful', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(workflowRun(67890, 'in_progress')), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ + token: 'test-token', + sourceRunId: '67890', + }); + + expect(snapshot.scannedRuns).toBe(1); + expect(snapshot.series[0].run.id).toBe('67890'); + }); + + it('rejects a source run from a different branch', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ ...workflowRun(67890), head_branch: 'main' }), + }); + + await expect( + generateCollectiveXSnapshot({ token: 'test-token', sourceRunId: '67890' }), + ).rejects.toThrow('is not from the collectivex branch'); + }); + + it('ignores non-EP CollectiveX artifacts', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => + Promise.resolve({ + artifacts: [ + { + id: 1, + name: 'collectivex_mi355x_nccl_12345', + archive_download_url: 'https://api.github.com/artifacts/1/zip', + }, + ], + }), + }); + + await expect(generateCollectiveXSnapshot({ token: 'test-token' })).rejects.toThrow( + 'No valid CollectiveX EP results', + ); + }); + + it('requires a GitHub token', async () => { + await expect(generateCollectiveXSnapshot({ token: '' })).rejects.toThrow( + 'GitHub token not configured', + ); + }); + + it('surfaces workflow discovery failures', async () => { + globalThis.fetch = vi.fn().mockResolvedValueOnce({ + ok: false, + status: 502, + }); + + await expect(generateCollectiveXSnapshot({ token: 'test-token' })).rejects.toThrow( + 'Failed to list CollectiveX workflow runs: 502', + ); + }); +}); diff --git a/packages/app/src/lib/collectivex-snapshot.ts b/packages/app/src/lib/collectivex-snapshot.ts new file mode 100644 index 00000000..b32708d7 --- /dev/null +++ b/packages/app/src/lib/collectivex-snapshot.ts @@ -0,0 +1,212 @@ +import { GITHUB_API_BASE, GITHUB_OWNER, GITHUB_REPO } from '@semianalysisai/inferencex-constants'; + +import { + normalizeCollectiveXDocument, + selectLatestCollectiveXSeries, +} from '@/components/collectivex/data'; +import type { CollectiveXApiResponse, CollectiveXSeries } from '@/components/collectivex/types'; + +import { + downloadGithubArtifact, + extractZipEntries, + fetchGithubRunArtifacts, + fetchGithubWorkflowRun, + type GithubArtifact, + type GithubWorkflowRun, +} from './github-artifacts'; + +const WORKFLOW_FILE = 'collectivex-experimental.yml'; +const WORKFLOW_BRANCH = 'collectivex'; +const DEFAULT_MAX_DISCOVERY_RUNS = 30; +const DOWNLOAD_CONCURRENCY = 6; +const MAX_ARTIFACT_BYTES = 10 * 1024 * 1024; + +interface GithubWorkflowRunsResponse { + workflow_runs?: GithubWorkflowRun[]; +} + +interface ArtifactWithRun { + artifact: GithubArtifact; + run: GithubWorkflowRun; +} + +export interface GenerateCollectiveXSnapshotOptions { + token: string; + sourceRunId?: string; + maxDiscoveryRuns?: number; +} + +const GITHUB_HEADERS = { + Accept: 'application/vnd.github.v3+json', +} as const; + +async function inBatches( + items: T[], + batchSize: number, + task: (item: T) => Promise, +): Promise { + const results: R[] = []; + for (let offset = 0; offset < items.length; offset += batchSize) { + results.push(...(await Promise.all(items.slice(offset, offset + batchSize).map(task)))); + } + return results; +} + +async function fetchCompletedCollectiveXRuns( + token: string, + maxDiscoveryRuns: number, +): Promise { + const params = new URLSearchParams({ + branch: WORKFLOW_BRANCH, + status: 'success', + per_page: String(maxDiscoveryRuns), + }); + const response = await fetch( + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/workflows/${WORKFLOW_FILE}/runs?${params}`, + { + cache: 'no-store', + headers: { + ...GITHUB_HEADERS, + Authorization: `Bearer ${token}`, + }, + }, + ); + if (!response.ok) { + throw new Error(`Failed to list CollectiveX workflow runs: ${response.status}`); + } + const payload = (await response.json()) as GithubWorkflowRunsResponse; + return (payload.workflow_runs ?? []).slice(0, maxDiscoveryRuns); +} + +async function fetchSourceRun(runId: string, token: string): Promise { + const response = await fetchGithubWorkflowRun(runId, token); + if (!response.ok) { + throw new Error(`Failed to load CollectiveX source run ${runId}: ${response.status}`); + } + const run = (await response.json()) as GithubWorkflowRun; + if (run.head_branch !== WORKFLOW_BRANCH) { + throw new Error(`CollectiveX source run ${runId} is not from the ${WORKFLOW_BRANCH} branch`); + } + return run; +} + +async function discoverRuns( + token: string, + maxDiscoveryRuns: number, + sourceRunId?: string, +): Promise { + const completedRuns = await fetchCompletedCollectiveXRuns(token, maxDiscoveryRuns); + if (!sourceRunId || completedRuns.some((run) => String(run.id) === sourceRunId)) { + return completedRuns; + } + + // The source workflow dispatches this generator after artifact upload but + // before the workflow itself reaches "success", so explicitly include it. + const sourceRun = await fetchSourceRun(sourceRunId, token); + return [sourceRun, ...completedRuns]; +} + +function isEpArtifact(artifact: GithubArtifact): boolean { + return ( + artifact.name.startsWith('collectivex_') && + (artifact.name.includes('_deepep_') || artifact.name.includes('_mori_')) + ); +} + +async function discoverArtifacts( + runs: GithubWorkflowRun[], + token: string, +): Promise { + const artifactLists = await inBatches(runs, DOWNLOAD_CONCURRENCY, async (run) => ({ + run, + artifacts: await fetchGithubRunArtifacts(String(run.id), token), + })); + + return artifactLists.flatMap(({ run, artifacts }) => + artifacts.filter(isEpArtifact).map((artifact) => ({ artifact, run })), + ); +} + +function parseArtifactDocuments(buffer: Buffer, artifactName: string): unknown[] { + return extractZipEntries( + buffer, + '.json', + (_entryName, contents) => { + const parsed = JSON.parse(contents) as unknown; + return Array.isArray(parsed) ? parsed : [parsed]; + }, + (entryName, error) => { + console.warn(`Failed to parse ${entryName} from ${artifactName}:`, error); + }, + ); +} + +async function downloadAndNormalize( + source: ArtifactWithRun, + token: string, +): Promise { + const response = await downloadGithubArtifact(source.artifact.archive_download_url, token); + if (!response.ok) { + console.warn( + `Failed to download CollectiveX artifact ${source.artifact.name}: ${response.status}`, + ); + return []; + } + + const contentLength = Number(response.headers.get('Content-Length') ?? 0); + if (contentLength > MAX_ARTIFACT_BYTES) { + console.warn(`Skipping oversized CollectiveX artifact ${source.artifact.name}`); + return []; + } + + const buffer = Buffer.from(await response.arrayBuffer()); + if (buffer.byteLength > MAX_ARTIFACT_BYTES) { + console.warn(`Skipping oversized CollectiveX artifact ${source.artifact.name}`); + return []; + } + + return parseArtifactDocuments(buffer, source.artifact.name) + .map((document) => + normalizeCollectiveXDocument(document, { + run: { + id: String(source.run.id), + url: source.run.html_url, + createdAt: source.run.created_at, + sha: source.run.head_sha, + }, + }), + ) + .filter((series): series is CollectiveXSeries => series !== null); +} + +export async function generateCollectiveXSnapshot({ + token, + sourceRunId, + maxDiscoveryRuns = DEFAULT_MAX_DISCOVERY_RUNS, +}: GenerateCollectiveXSnapshotOptions): Promise { + if (!token) throw new Error('GitHub token not configured'); + + const runs = await discoverRuns(token, maxDiscoveryRuns, sourceRunId); + const artifacts = await discoverArtifacts(runs, token); + const artifactSeries = await inBatches(artifacts, DOWNLOAD_CONCURRENCY, (source) => + downloadAndNormalize(source, token), + ); + const series = selectLatestCollectiveXSeries(artifactSeries.flat()); + if (series.length === 0) { + throw new Error('No valid CollectiveX EP results found in recent workflow runs'); + } + + const contributingRuns = new Set(series.map((item) => item.run.id).filter(Boolean)).size; + const newestTimestamp = Math.max( + ...series.map((item) => Date.parse(item.generatedAt)).filter(Number.isFinite), + ); + + return { + series, + scannedRuns: runs.length, + contributingRuns, + generatedAt: Number.isFinite(newestTimestamp) + ? new Date(newestTimestamp).toISOString() + : new Date(0).toISOString(), + }; +} diff --git a/packages/app/src/lib/d3-chart/layers/lines.test.ts b/packages/app/src/lib/d3-chart/layers/lines.test.ts index 6631bcc8..26216ef8 100644 --- a/packages/app/src/lib/d3-chart/layers/lines.test.ts +++ b/packages/app/src/lib/d3-chart/layers/lines.test.ts @@ -108,6 +108,28 @@ describe('renderLines', () => { } }); + it('sets a per-series stroke dash pattern when configured', () => { + const group = createMockGroup(); + const { xScale, yScale } = makeScales(); + renderLines( + group as any, + SAMPLE_LINES, + xScale, + yScale, + makeConfig({ + getStrokeDasharray: (key) => (key === 'seriesB' ? '6 4' : null), + }), + ); + + const paths = group.selectAll('.line-path'); + const dashByClass: Record = {}; + for (const el of paths.elements) { + dashByClass[el.attrs['class'] as string] = el.attrs['stroke-dasharray'] ?? null; + } + expect(dashByClass['line-path line-seriesA']).toBeNull(); + expect(dashByClass['line-path line-seriesB']).toBe('6 4'); + }); + it('generates valid d attribute from line generator', () => { const group = createMockGroup(); const { xScale, yScale } = makeScales(); diff --git a/packages/app/src/lib/d3-chart/layers/lines.ts b/packages/app/src/lib/d3-chart/layers/lines.ts index 79c394e9..fd17ff2e 100644 --- a/packages/app/src/lib/d3-chart/layers/lines.ts +++ b/packages/app/src/lib/d3-chart/layers/lines.ts @@ -6,6 +6,7 @@ type AnyXScale = ContinuousScale | d3.ScaleTime; export interface LineConfig { getColor: (key: string) => string; + getStrokeDasharray?: (key: string) => string | null; strokeWidth?: number; curve?: d3.CurveFactory; /** Return false to create gaps in the line (e.g., missing data points). */ @@ -58,6 +59,7 @@ export function renderLines( .merge(selection) .attr('class', (d) => `line-path line-${d.key}`) .attr('stroke', (d) => config.getColor(d.key)) + .attr('stroke-dasharray', (d) => config.getStrokeDasharray?.(d.key) ?? null) .attr('stroke-width', config.strokeWidth ?? 2) .attr('d', (d) => lineGenerator(d.points)); } diff --git a/packages/app/src/lib/tab-meta.ts b/packages/app/src/lib/tab-meta.ts index 9145532b..351388d9 100644 --- a/packages/app/src/lib/tab-meta.ts +++ b/packages/app/src/lib/tab-meta.ts @@ -15,6 +15,7 @@ export const VALID_TABS = [ 'calculator', 'reliability', 'gpu-specs', + 'collectivex', 'ai-chart', 'gpu-metrics', 'submissions', @@ -55,6 +56,11 @@ export const TAB_META: Record = description: 'Detailed GPU specifications for AI inference. Compare NVIDIA, AMD, and Intel GPUs — memory bandwidth, FLOPS, interconnects, and topology.', }, + collectivex: { + title: 'CollectiveX Communication Benchmarks', + description: + 'Experimental cross-vendor expert-parallel communication benchmarks. Compare MoE dispatch and combine latency across NVIDIA and AMD GPU platforms.', + }, 'ai-chart': { title: 'AI-Powered Chart Generation', description: From 206af2c1322800b973eb4d537da0abe0657fb6cb Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Thu, 25 Jun 2026 18:21:32 +0800 Subject: [PATCH 02/23] fix: use log2 for CollectiveX token axis --- packages/app/cypress/e2e/collectivex.cy.ts | 7 +++++++ .../components/collectivex/CollectiveXChart.tsx | 14 +++++++++++--- .../lib/d3-chart/D3Chart/scale-builders.test.ts | 8 ++++++++ .../app/src/lib/d3-chart/D3Chart/scale-builders.ts | 6 +++++- packages/app/src/lib/d3-chart/D3Chart/types.ts | 2 +- 5 files changed, 32 insertions(+), 5 deletions(-) diff --git a/packages/app/cypress/e2e/collectivex.cy.ts b/packages/app/cypress/e2e/collectivex.cy.ts index 38dc6d33..91f8b29f 100644 --- a/packages/app/cypress/e2e/collectivex.cy.ts +++ b/packages/app/cypress/e2e/collectivex.cy.ts @@ -20,6 +20,13 @@ describe('CollectiveX', () => { cy.get('[data-testid="collectivex-explorer-chart"] svg').should('be.visible'); cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 4); + cy.get('[data-testid="collectivex-explorer-chart"] .x-axis .tick text').then(($ticks) => { + const labels = $ticks + .toArray() + .map((tick) => tick.textContent?.trim()) + .filter(Boolean); + expect(labels).to.deep.equal(['1', '2', '4', '8', '16', '32', '64']); + }); cy.get('[data-testid="collectivex-comparison-warning"]') .should('contain.text', 'Not directly comparable') .and('contain.text', 'resource mode'); diff --git a/packages/app/src/components/collectivex/CollectiveXChart.tsx b/packages/app/src/components/collectivex/CollectiveXChart.tsx index 2796ab94..9d91d029 100644 --- a/packages/app/src/components/collectivex/CollectiveXChart.tsx +++ b/packages/app/src/components/collectivex/CollectiveXChart.tsx @@ -72,6 +72,10 @@ function formatCompact(value: number): string { return value.toFixed(2); } +function formatTokenCount(value: number): string { + return Number.isInteger(value) ? value.toLocaleString('en-US') : formatCompact(value); +} + function formatMetric(value: number, yAxis: CollectiveXYAxis): string { if (yAxis === 'latency') return `${value.toFixed(value >= 100 ? 0 : 1)} µs`; if (yAxis === 'tokens-per-second') return `${formatCompact(value)} tok/s`; @@ -160,12 +164,16 @@ export function CollectiveXChart({ ? '' : 'Shift+Scroll to zoom · Drag to pan · Double-click to reset · Click a point to pin tooltip' } - xScale={{ type: xScaleType, domain: xDomain, nice: true }} + xScale={ + xScaleType === 'log' + ? { type: 'log', domain: xDomain, base: 2, nice: false } + : { type: 'linear', domain: xDomain, nice: true } + } yScale={{ type: yScaleType, domain: yDomain, nice: true }} xAxis={{ - label: X_AXIS_LABELS[xAxis], + label: `${X_AXIS_LABELS[xAxis]}${xScaleType === 'log' ? ' (log2)' : ''}`, tickCount: compact ? 5 : 8, - tickFormat: (value) => formatCompact(Number(value)), + tickFormat: (value) => formatTokenCount(Number(value)), }} yAxis={{ label: Y_AXIS_LABELS[yAxis], diff --git a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts index 057cadf8..32e9f60c 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts @@ -46,6 +46,14 @@ describe('buildScale', () => { } }); + it('supports a custom log base without expanding the domain when nice=false', () => { + const scale = buildScale({ type: 'log', domain: [1, 128], base: 2, nice: false }, [0, 700]); + + expect('base' in scale && scale.base()).toBe(2); + expect(scale.domain()).toEqual([1, 128]); + expect('ticks' in scale && scale.ticks(8)).toEqual([1, 2, 4, 8, 16, 32, 64, 128]); + }); + it('builds a time scale', () => { const d1 = new Date('2025-01-01'); const d2 = new Date('2025-12-31'); diff --git a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts index cb8ce2e7..c865575f 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts @@ -26,7 +26,11 @@ export function buildScale(config: ScaleConfig, range: [number, number]): BuiltS } case 'log': { - const l = d3.scaleLog().domain(config.domain).range(range); + const l = d3 + .scaleLog() + .base(config.base ?? 10) + .domain(config.domain) + .range(range); return config.nice === false ? l : l.nice(); } diff --git a/packages/app/src/lib/d3-chart/D3Chart/types.ts b/packages/app/src/lib/d3-chart/D3Chart/types.ts index 3062784e..51fc49bb 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/types.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/types.ts @@ -20,7 +20,7 @@ import type { RadarConfig } from '../layers/radar'; export type ScaleConfig = | { type: 'band'; domain: string[]; padding?: number } | { type: 'linear'; domain: [number, number]; nice?: boolean } - | { type: 'log'; domain: [number, number]; nice?: boolean } + | { type: 'log'; domain: [number, number]; nice?: boolean; base?: number } | { type: 'time'; domain: [Date, Date]; nice?: boolean }; // --------------------------------------------------------------------------- From 17f137d95acd046b92e95547f94dc20329a512a9 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Sat, 27 Jun 2026 10:21:15 +0800 Subject: [PATCH 03/23] feat: align CollectiveX with updated report --- .github/workflows/update-collectivex-data.yml | 4 +- packages/app/cypress/e2e/collectivex.cy.ts | 194 +- .../app/cypress/fixtures/api/collectivex.json | 2889 +- packages/app/public/data/collectivex.json | 74227 +++++++++++++++- .../app/scripts/generate-collectivex-data.ts | 2 +- .../collectivex/CollectiveXChart.tsx | 33 +- .../collectivex/CollectiveXDisplay.tsx | 444 +- .../collectivex/CollectiveXHeatmap.tsx | 162 + .../collectivex/CollectiveXScaling.tsx | 140 + .../collectivex/CollectiveXTables.tsx | 322 + .../src/components/collectivex/data.test.ts | 479 +- .../app/src/components/collectivex/data.ts | 936 +- .../app/src/components/collectivex/types.ts | 109 +- .../app/src/lib/collectivex-snapshot.test.ts | 176 +- packages/app/src/lib/collectivex-snapshot.ts | 168 +- .../app/src/lib/d3-chart/D3Chart/types.ts | 2 + .../d3-chart/D3Chart/useD3ChartRenderer.ts | 17 +- .../app/src/lib/d3-chart/chart-update.test.ts | 55 + packages/app/src/lib/d3-chart/chart-update.ts | 45 +- 19 files changed, 77608 insertions(+), 2796 deletions(-) create mode 100644 packages/app/src/components/collectivex/CollectiveXHeatmap.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXScaling.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXTables.tsx diff --git a/.github/workflows/update-collectivex-data.yml b/.github/workflows/update-collectivex-data.yml index 3bc75204..e98abe77 100644 --- a/.github/workflows/update-collectivex-data.yml +++ b/.github/workflows/update-collectivex-data.yml @@ -20,7 +20,7 @@ permissions: jobs: update: - timeout-minutes: 15 + timeout-minutes: 30 runs-on: ubuntu-latest env: DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} @@ -49,7 +49,7 @@ jobs: - name: Generate static CollectiveX snapshot env: - GITHUB_TOKEN: ${{ secrets.INFX_MAIN_PAT }} + GITHUB_TOKEN: ${{ secrets.PAT }} run: | set -euo pipefail if [ -n "$SOURCE_RUN_ID" ]; then diff --git a/packages/app/cypress/e2e/collectivex.cy.ts b/packages/app/cypress/e2e/collectivex.cy.ts index 91f8b29f..238ab761 100644 --- a/packages/app/cypress/e2e/collectivex.cy.ts +++ b/packages/app/cypress/e2e/collectivex.cy.ts @@ -6,6 +6,15 @@ function expectToggleOptions(testId: string, labels: string[]) { }); } +function xTickLabels() { + return cy.get('[data-testid="collectivex-explorer-chart"] .x-axis .tick text').then(($ticks) => + $ticks + .toArray() + .map((tick) => tick.textContent?.trim()) + .filter(Boolean), + ); +} + describe('CollectiveX', () => { beforeEach(() => { cy.intercept('GET', '/data/collectivex.json', { fixture: 'api/collectivex.json' }).as( @@ -15,30 +24,53 @@ describe('CollectiveX', () => { cy.wait('@collectivexData'); }); - it('renders the artifact-backed explorer and latency overview', () => { - cy.get('[data-testid="collectivex-display"]').should('contain.text', 'CollectiveX'); + it('renders the updated artifact-backed report hierarchy', () => { + cy.get('[data-testid="collectivex-display"]') + .should('contain.text', 'CollectiveX') + .and('contain.text', 'Retained sweeps'); cy.get('[data-testid="collectivex-explorer-chart"] svg').should('be.visible'); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); - cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 4); - cy.get('[data-testid="collectivex-explorer-chart"] .x-axis .tick text').then(($ticks) => { - const labels = $ticks - .toArray() - .map((tick) => tick.textContent?.trim()) - .filter(Boolean); - expect(labels).to.deep.equal(['1', '2', '4', '8', '16', '32', '64']); - }); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); + cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 21); + xTickLabels().should('deep.equal', ['1', '2', '4', '8', '16', '32', '64', '128']); + cy.get('[data-testid="collectivex-comparison-warning"]') .should('contain.text', 'Not directly comparable') - .and('contain.text', 'resource mode'); - cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 12); - cy.get('[data-testid="collectivex-overview-decode-ep4"]').should('exist'); + .and('contain.text', 'source SHA'); + cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 6); cy.get('[data-testid="collectivex-overview-decode-ep8"]').should('exist'); - cy.get('[data-testid="collectivex-overview-prefill-ep4"]').should('exist'); cy.get('[data-testid="collectivex-overview-prefill-ep8"]').should('exist'); + cy.get('[data-testid^="collectivex-heatmap-"]').should('have.length.at.least', 3); + cy.get('[data-testid="collectivex-sensitivity-table"]').should('contain.text', 'zipf'); + cy.get('[data-testid="collectivex-failures-table"]').should( + 'contain.text', + 'roundtrip_gt_isolated_sum', + ); + cy.get('[data-testid="collectivex-coverage-table"]').should('contain.text', 'official'); + cy.get('[data-testid="collectivex-scaling-weak"]').should( + 'contain.text', + 'two or more EP degrees', + ); + }); + + it('uses evenly spaced measured powers of two on the log2 token axis', () => { + cy.get('[data-testid="collectivex-explorer-chart"] .x-axis .tick').then(($ticks) => { + const positions = $ticks.toArray().map((tick) => { + const transform = tick.getAttribute('transform') ?? ''; + const match = /translate\((?[-\d.]+)/u.exec(transform); + return Number(match?.groups?.x); + }); + const gaps = positions.slice(1).map((position, index) => position - positions[index]); + expect(Math.max(...gaps) - Math.min(...gaps)).to.be.lessThan(0.5); + }); }); - it('exposes and applies every control from the generated v3 report', () => { - expectToggleOptions('collectivex-operation-toggle', ['Dispatch', 'Combine', 'Serial']); + it('exposes the new operation, routing, publication, and axis controls', () => { + expectToggleOptions('collectivex-operation-toggle', [ + 'Dispatch', + 'Combine', + 'Round trip', + 'Isolated sum', + ]); expectToggleOptions('collectivex-phase-toggle', ['Decode', 'Prefill']); expectToggleOptions('collectivex-percentile-toggle', ['p50', 'p90', 'p99']); expectToggleOptions('collectivex-suite-toggle', [ @@ -46,122 +78,86 @@ describe('CollectiveX', () => { 'Backend default', 'Resource constrained', ]); + expectToggleOptions('collectivex-publication-toggle', ['Publishable', 'Official only', 'All']); expectToggleOptions('collectivex-x-scale-toggle', ['Log', 'Linear']); expectToggleOptions('collectivex-y-scale-toggle', ['Log', 'Linear']); - cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Combine').click(); + cy.get('[data-testid="collectivex-routing-select"]').click(); + cy.get('[role="option"]').then(($options) => { + expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ + 'All', + 'uniform', + 'zipf', + 'zipf+eplb', + ]); + }); + cy.contains('[role="option"]', 'zipf').click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'zipf'); + + cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Round trip').click(); cy.get('[data-testid="collectivex-main-chart"]').should( 'contain.text', - 'Combine · decode · p50', + 'Round trip (measured)', ); - cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Serial').click(); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Serial'); - cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Dispatch').click(); - - cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Dispatch · prefill'); - cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Decode').click(); + cy.get('[data-testid="collectivex-operation-toggle"]') + .contains('button', 'Isolated sum') + .click(); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'Isolated sum (Σp, not measured)') + .and('contain.text', 'not a measured chained operation'); - cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p90').click(); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'decode · p90'); cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p99').click(); cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'decode · p99'); - cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p50').click(); - - cy.get('[data-testid="collectivex-suite-toggle"]') - .contains('button', 'Backend default') - .click(); - cy.get('[data-testid="collectivex-suite-toggle"]') - .contains('button', 'Resource constrained') - .click(); - cy.get('[data-testid="collectivex-suite-toggle"]').contains('button', 'All').click(); cy.get('[data-testid="collectivex-x-axis-select"]').click(); - cy.get('[role="option"]').then(($options) => { - expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ - 'Source tokens / rank', - 'Global source tokens', - ]); - }); cy.contains('[role="option"]', 'Global source tokens').click(); cy.get('[data-testid="collectivex-x-axis-select"]').should( 'contain.text', 'Global source tokens', ); - cy.get('[data-testid="collectivex-x-axis-select"]').click(); - cy.contains('[role="option"]', 'Source tokens / rank').click(); - cy.get('[data-testid="collectivex-y-axis-select"]').click(); - cy.get('[role="option"]').then(($options) => { - expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ - 'Latency', - 'Tokens / second', - 'Logical routed payload rate', - ]); - }); - cy.contains('[role="option"]', 'Tokens / second').click(); - cy.get('[data-testid="collectivex-main-chart"]').should( - 'contain.text', - 'Tokens / second versus', - ); cy.get('[data-testid="collectivex-y-axis-select"]').click(); cy.contains('[role="option"]', 'Logical routed payload rate').click(); - cy.get('[data-testid="collectivex-main-chart"]').should( - 'contain.text', - 'Logical payload rate versus', - ); - cy.get('[data-testid="collectivex-y-axis-select"]').click(); - cy.contains('[role="option"]', 'Latency').click(); - - cy.get('[data-testid="collectivex-x-scale-toggle"]').contains('button', 'Linear').click(); - cy.get('[data-testid="collectivex-x-scale-toggle"]') - .contains('button', 'Linear') - .should('have.attr', 'aria-selected', 'true'); - cy.get('[data-testid="collectivex-x-scale-toggle"]').contains('button', 'Log').click(); - cy.get('[data-testid="collectivex-y-scale-toggle"]').contains('button', 'Linear').click(); - cy.get('[data-testid="collectivex-y-scale-toggle"]') - .contains('button', 'Linear') - .should('have.attr', 'aria-selected', 'true'); - cy.get('[data-testid="collectivex-y-scale-toggle"]').contains('button', 'Log').click(); - }); - - it('updates the rendered curve when the percentile changes', () => { - cy.get('[data-testid="collectivex-explorer-chart"] .line-path') - .first() - .invoke('attr', 'd') - .then((p50Path) => { - cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p99').click(); - cy.get('[data-testid="collectivex-main-chart"]').should( - 'contain.text', - 'Dispatch · decode · p99', - ); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path') - .first() - .invoke('attr', 'd') - .should('not.eq', p50Path); - }); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'Logical payload rate versus') + .and('contain.text', 'not wire'); }); - it('stitches matching decode points into the prefill curves', () => { + it('keeps decode observations out of the prefill panel', () => { cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Dispatch · prefill'); - cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 8); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 12); + xTickLabels().should('deep.equal', ['128', '256', '512', '1,024', '2,048', '4,096']); + cy.get('[data-testid="collectivex-main-chart"]').should( + 'not.contain.text', + 'stitched into the prefill curve', + ); }); - it('filters to the resource-constrained comparison suite', () => { + it('publication filtering keeps diagnostic data quarantined by default', () => { cy.get('[data-testid="collectivex-suite-toggle"]') .contains('button', 'Resource constrained') .click(); cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); - cy.get('[data-testid="collectivex-comparison-warning"]').should('not.exist'); + + cy.get('[data-testid="collectivex-publication-toggle"]').contains('button', 'All').click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'MI355X'); + + cy.get('[data-testid="collectivex-publication-toggle"]') + .contains('button', 'Publishable') + .click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); }); it('legend toggles remove and restore a rendered series', () => { cy.get('[data-testid="collectivex-main-chart"]').within(() => { - cy.contains('label', 'H100 · deepep · bf16 · EP4 · comm only').click(); + cy.contains('label', 'H100 EP8 · deepep · bf16').click(); }); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); - cy.get('[data-testid="collectivex-main-chart"]').contains('button', 'Reset filter').click(); cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-main-chart"]').contains('button', 'Reset filter').click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); }); }); diff --git a/packages/app/cypress/fixtures/api/collectivex.json b/packages/app/cypress/fixtures/api/collectivex.json index 05bc6e92..95180f8b 100644 --- a/packages/app/cypress/fixtures/api/collectivex.json +++ b/packages/app/cypress/fixtures/api/collectivex.json @@ -1,16 +1,16 @@ { - "scannedRuns": 12, - "contributingRuns": 4, - "generatedAt": "2026-06-25T08:31:09.000Z", + "snapshotVersion": 2, "series": [ { - "id": "cx-h100-decode", - "identity": "h100|deepep|decode", - "stitchKey": "h100|deepep|fixed", - "colorKey": "h100_fixture", + "id": "cx-7a284f4e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_42947950", + "comparisonKey": "fb346b1019e55bb0", "schemaVersion": 3, - "generatedAt": "2026-06-24T22:50:09.000Z", + "generatedAt": "2026-06-26T23:51:32.113885+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_14", "sku": "h100", "backend": "deepep", "phase": "decode", @@ -18,53 +18,349 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", - "worldSize": 4, - "epSize": 4, - "label": "H100 · deepep · bf16 · EP4 · comm only", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "h100-trace", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28134642131", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", - "createdAt": "2026-06-24T22:49:12Z", - "sha": "h100sha" + "id": "28271543513", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271543513", + "createdAt": "2026-06-26T23:46:04Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, - "globalTokens": 4, - "dispatch": { "p50": 60, "p90": 72, "p99": 90 }, - "combine": { "p50": 70, "p90": 82, "p99": 100 }, - "serial": { "p50": 130, "p90": 154, "p99": 190 }, + "globalTokens": 8, + "dispatch": { + "p50": 71.00799679756165, + "p90": 100.67199915647507, + "p95": 101.6319990158081, + "p99": 103.74400019645691 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 81.79199695587158, + "p95": 117.47200042009354, + "p99": 304.4799864292145 + }, + "roundtrip": { + "p50": 126.52799487113953, + "p90": 130.3360015153885, + "p95": 131.84000551700592, + "p99": 137.95199990272522 + }, + "isolatedSum": { + "p50": 144.3519964814186, + "p90": 182.46399611234665, + "p95": 219.10399943590164, + "p99": 408.2239866256714 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.57600063085556, + "p90": 76.31999999284744, + "p95": 79.13599908351898, + "p99": 88.32000195980072 + }, + "combine": { + "p50": 72.54400104284286, + "p90": 73.98399710655212, + "p95": 74.36800003051758, + "p99": 78.84799689054489 + }, + "roundtrip": { + "p50": 126.81600451469421, + "p90": 131.1360001564026, + "p95": 134.24000144004822, + "p99": 137.69599795341492 + }, + "isolatedSum": { + "p50": 141.12000167369843, + "p90": 150.30399709939957, + "p95": 153.50399911403656, + "p99": 167.1679988503456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.05599749088287, + "p90": 103.67999970912933, + "p95": 108.51199924945831, + "p99": 261.34398579597473 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 80.03199845552444, + "p95": 87.0399996638298, + "p99": 87.87199854850769 + }, + "roundtrip": { + "p50": 130.52800297737122, + "p90": 157.4079990386963, + "p95": 160.76800227165222, + "p99": 164.22399878501892 + }, + "isolatedSum": { + "p50": 146.43199741840363, + "p90": 183.71199816465378, + "p95": 195.55199891328812, + "p99": 349.2159843444824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.45600020885468, + "p90": 98.88000041246414, + "p95": 103.00800204277039, + "p99": 109.69600081443787 + }, + "combine": { + "p50": 73.7600028514862, + "p90": 82.59200304746628, + "p95": 83.99999886751175, + "p99": 88.41600269079208 + }, + "roundtrip": { + "p50": 131.29599392414093, + "p90": 154.59200739860535, + "p95": 157.05600380897522, + "p99": 165.66400229930878 + }, + "isolatedSum": { + "p50": 145.21600306034088, + "p90": 181.47200345993042, + "p95": 187.00800091028214, + "p99": 198.11200350522995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.97599852085114, + "p90": 100.8640006184578, + "p95": 103.26399654150009, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 74.49600100517273, + "p90": 87.10400015115738, + "p95": 87.74399757385254, + "p99": 88.86399865150452 + }, + "roundtrip": { + "p50": 128.1919926404953, + "p90": 158.720001578331, + "p95": 161.53599321842194, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 157.47199952602386, + "p90": 187.96800076961517, + "p95": 191.00799411535263, + "p99": 197.31199741363525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, "globalTokens": 256, - "dispatch": { "p50": 90, "p90": 110, "p99": 145 }, - "combine": { "p50": 105, "p90": 130, "p99": 170 }, - "serial": { "p50": 195, "p90": 240, "p99": 315 }, + "dispatch": { + "p50": 90.81599861383438, + "p90": 103.04000228643417, + "p95": 107.87200182676315, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 81.50400221347809, + "p90": 89.9519994854927, + "p95": 90.43200314044952, + "p99": 96.19200229644775 + }, + "roundtrip": { + "p50": 140.47999680042267, + "p90": 163.29599916934967, + "p95": 166.87999665737152, + "p99": 171.03999853134155 + }, + "isolatedSum": { + "p50": 172.32000082731247, + "p90": 192.99200177192688, + "p95": 198.30400496721268, + "p99": 207.71200209856033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.31200337409973, + "p90": 119.10399794578552, + "p95": 121.69600278139114, + "p99": 131.26400113105774 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 97.15200215578079, + "p95": 103.93600165843964, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 162.1759980916977, + "p90": 181.7920058965683, + "p95": 184.4799965620041, + "p99": 187.74400651454926 + }, + "isolatedSum": { + "p50": 187.52000480890274, + "p90": 216.25600010156631, + "p95": 225.63200443983078, + "p99": 235.74399948120117 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 38993920, "combineLogicalBytes": 38993920, - "fanoutMean": 5.31, + "fanoutMean": 5.3125, "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.3119985461235, + "p90": 132.7359974384308, + "p95": 134.5919966697693, + "p99": 140.35199582576752 + }, + "combine": { + "p50": 108.41599851846695, + "p90": 120.44800072908401, + "p95": 120.7360029220581, + "p99": 121.47200107574463 + }, + "roundtrip": { + "p50": 198.2080042362213, + "p90": 216.86400473117828, + "p95": 221.24800086021423, + "p99": 223.80800545215607 + }, + "isolatedSum": { + "p50": 221.72799706459045, + "p90": 253.1839981675148, + "p95": 255.3279995918274, + "p99": 261.82399690151215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72,13 +368,15 @@ ] }, { - "id": "cx-h100-prefill", - "identity": "h100|deepep|prefill", - "stitchKey": "h100|deepep|fixed", - "colorKey": "h100_fixture", + "id": "cx-efe3a643", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_42947950", + "comparisonKey": "4c920ba7523ac63b", "schemaVersion": 3, - "generatedAt": "2026-06-24T22:50:10.000Z", + "generatedAt": "2026-06-26T23:47:28.966623+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_08", "sku": "h100", "backend": "deepep", "phase": "prefill", @@ -86,53 +384,275 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", - "worldSize": 4, - "epSize": 4, - "label": "H100 · deepep · bf16 · EP4 · comm only", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "h100-trace", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28134642131", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", - "createdAt": "2026-06-24T22:49:12Z", - "sha": "h100sha" + "id": "28271547494", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271547494", + "createdAt": "2026-06-26T23:46:11Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 128, - "globalTokens": 512, - "dispatch": { "p50": 125, "p90": 150, "p99": 190 }, - "combine": { "p50": 150, "p90": 180, "p99": 230 }, - "serial": { "p50": 275, "p90": 330, "p99": 420 }, + "globalTokens": 1024, + "dispatch": { + "p50": 111.84000223875046, + "p90": 124.15999919176102, + "p95": 131.1360001564026, + "p99": 137.66400516033173 + }, + "combine": { + "p50": 106.6880002617836, + "p90": 114.30399864912033, + "p95": 120.09599804878235, + "p99": 123.03999811410904 + }, + "roundtrip": { + "p50": 199.0399956703186, + "p90": 207.58399367332458, + "p95": 216.3199931383133, + "p99": 222.1119999885559 + }, + "isolatedSum": { + "p50": 218.52800250053406, + "p90": 238.46399784088135, + "p95": 251.23199820518494, + "p99": 260.70400327444077 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, - "fanoutMean": 5.29, + "fanoutMean": 5.291015625, "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, + "tokensPerRank": 256, "globalTokens": 2048, - "dispatch": { "p50": 220, "p90": 260, "p99": 320 }, - "combine": { "p50": 300, "p90": 360, "p99": 440 }, - "serial": { "p50": 520, "p90": 620, "p99": 760 }, + "dispatch": { + "p50": 142.97600090503693, + "p90": 152.3520052433014, + "p95": 161.28000617027283, + "p99": 169.21600699424744 + }, + "combine": { + "p50": 150.176003575325, + "p90": 155.68000078201294, + "p95": 162.36799955368042, + "p99": 171.26399278640747 + }, + "roundtrip": { + "p50": 263.2319927215576, + "p90": 269.72800493240356, + "p95": 276.0320007801056, + "p99": 290.5920147895813 + }, + "isolatedSum": { + "p50": 293.15200448036194, + "p90": 308.03200602531433, + "p95": 323.64800572395325, + "p99": 340.4799997806549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.8640021085739, + "p90": 211.39200031757355, + "p95": 214.27200734615326, + "p99": 220.96000611782074 + }, + "combine": { + "p50": 229.72799837589264, + "p90": 236.67199909687042, + "p95": 238.71999979019165, + "p99": 246.2719976902008 + }, + "roundtrip": { + "p50": 400.86400508880615, + "p90": 413.5040044784546, + "p95": 418.94400119781494, + "p99": 428.51200699806213 + }, + "isolatedSum": { + "p50": 430.59200048446655, + "p90": 448.06399941444397, + "p95": 452.9920071363449, + "p99": 467.23200380802155 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 312266752, "combineLogicalBytes": 312266752, - "fanoutMean": 5.32, + "fanoutMean": 5.31787109375, "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.1040132045746, + "p90": 308.9280128479004, + "p95": 311.2959861755371, + "p99": 318.015992641449 + }, + "combine": { + "p50": 365.9839928150177, + "p90": 372.8959858417511, + "p95": 375.39198994636536, + "p99": 382.4320137500763 + }, + "roundtrip": { + "p50": 644.8000073432922, + "p90": 654.528021812439, + "p95": 657.8879952430725, + "p99": 668.4799790382385 + }, + "isolatedSum": { + "p50": 669.0880060195923, + "p90": 681.8239986896515, + "p95": 686.6879761219025, + "p99": 700.4480063915253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.8800258636475, + "p90": 540.5759811401367, + "p95": 545.0239777565002, + "p99": 551.6160130500793 + }, + "combine": { + "p50": 638.0159854888916, + "p90": 650.2400040626526, + "p95": 653.1519889831543, + "p99": 660.1920127868652 + }, + "roundtrip": { + "p50": 1135.424017906189, + "p90": 1147.7760076522827, + "p95": 1151.0720252990723, + "p99": 1157.5039625167847 + }, + "isolatedSum": { + "p50": 1164.896011352539, + "p90": 1190.8159852027893, + "p95": 1198.1759667396545, + "p99": 1211.8080258369446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1005.2160024642944, + "p90": 1027.2639989852905, + "p95": 1033.5359573364258, + "p99": 1050.271987915039 + }, + "combine": { + "p50": 1168.511986732483, + "p90": 1181.7599534988403, + "p95": 1189.1520023345947, + "p99": 1202.015995979309 + }, + "roundtrip": { + "p50": 2131.455898284912, + "p90": 2150.815963745117, + "p95": 2158.112049102783, + "p99": 2167.3600673675537 + }, + "isolatedSum": { + "p50": 2173.7279891967773, + "p90": 2209.023952484131, + "p95": 2222.6879596710205, + "p99": 2252.287983894348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -140,53 +660,291 @@ ] }, { - "id": "cx-mi355x-decode", - "identity": "mi355x|mori|decode", - "stitchKey": "mi355x|mori|fixed", - "colorKey": "mi355x_fixture", + "id": "cx-9ca51f4f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d982b749", + "comparisonKey": "4dde4e46080a91eb", "schemaVersion": 3, - "generatedAt": "2026-06-25T08:31:09.000Z", + "generatedAt": "2026-06-26T23:49:18.590174+00:00", "status": "valid", - "sku": "mi355x", - "backend": "mori", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X · mori · bf16 · EP8 · normalized · layout + dispatch", + "label": "H200 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "mi355x-trace", - "backendVersion": "mori-0227", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28156624181", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28156624181", - "createdAt": "2026-06-25T08:17:23Z", - "sha": "mi355xsha" + "id": "28271601584", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271601584", + "createdAt": "2026-06-26T23:47:53Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, - "dispatch": { "p50": 41, "p90": 45, "p99": 63 }, - "combine": { "p50": 18, "p90": 20, "p99": 24 }, - "serial": { "p50": 59, "p90": 65, "p99": 87 }, + "dispatch": { + "p50": 105.0880029797554, + "p90": 132.7040046453476, + "p95": 145.21600306034088, + "p99": 190.11199474334717 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 93.37600320577621, + "p95": 98.01600128412247, + "p99": 108.51199924945831 + }, + "roundtrip": { + "p50": 123.45600128173828, + "p90": 180.60800433158875, + "p95": 190.7840073108673, + "p99": 233.2800030708313 + }, + "isolatedSum": { + "p50": 176.4800027012825, + "p90": 226.0800078511238, + "p95": 243.23200434446335, + "p99": 298.6239939928055 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 87.10400015115738, + "p90": 130.0799995660782, + "p95": 139.96799290180206, + "p99": 167.1999990940094 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 97.50399738550186, + "p95": 105.31199723482132, + "p99": 143.61600577831268 + }, + "roundtrip": { + "p50": 144.83200013637543, + "p90": 179.1040003299713, + "p95": 191.96799397468567, + "p99": 229.5680046081543 + }, + "isolatedSum": { + "p50": 162.6880019903183, + "p90": 227.58399695158005, + "p95": 245.27999013662338, + "p99": 310.8160048723221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.06399738788605, + "p90": 86.14400029182434, + "p95": 95.51999717950821, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 80.06399869918823, + "p95": 85.66399663686752, + "p99": 102.52799838781357 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 146.43199741840363, + "p95": 154.7199934720993, + "p99": 173.47200214862823 + }, + "isolatedSum": { + "p50": 140.73599874973297, + "p90": 166.20799899101257, + "p95": 181.18399381637573, + "p99": 214.4000008702278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.9919970035553, + "p90": 93.12000125646591, + "p95": 103.2319962978363, + "p99": 120.7360029220581 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 82.07999914884567, + "p95": 88.41600269079208, + "p99": 100.67199915647507 + }, + "roundtrip": { + "p50": 124.1919994354248, + "p90": 152.8639942407608, + "p95": 164.09599781036377, + "p99": 197.85599410533905 + }, + "isolatedSum": { + "p50": 142.2399953007698, + "p90": 175.20000040531158, + "p95": 191.6479989886284, + "p99": 221.40800207853317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 69.72800195217133, + "p90": 91.36000275611877, + "p95": 105.66399991512299, + "p99": 141.56800508499146 + }, + "combine": { + "p50": 70.592001080513, + "p90": 82.04799890518188, + "p95": 87.3280018568039, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 151.32799744606018, + "p95": 162.23999857902527, + "p99": 186.46399676799774 + }, + "isolatedSum": { + "p50": 140.32000303268433, + "p90": 173.40800166130066, + "p95": 192.99200177192688, + "p99": 241.02400243282318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.85599744319916, + "p90": 108.89600217342377, + "p95": 117.95199662446976, + "p99": 130.5599957704544 + }, + "combine": { + "p50": 77.56800204515457, + "p90": 96.25600278377533, + "p95": 99.7759997844696, + "p99": 110.43199896812439 + }, + "roundtrip": { + "p50": 136.19199395179749, + "p90": 168.19199919700623, + "p95": 180.25599420070648, + "p99": 210.01599729061127 + }, + "isolatedSum": { + "p50": 159.42399948835373, + "p90": 205.1520049571991, + "p95": 217.72799640893936, + "p99": 240.9919947385788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -194,13 +952,73 @@ { "tokensPerRank": 64, "globalTokens": 512, - "dispatch": { "p50": 70, "p90": 80, "p99": 95 }, - "combine": { "p50": 45, "p90": 52, "p99": 70 }, - "serial": { "p50": 115, "p90": 132, "p99": 165 }, + "dispatch": { + "p50": 94.81599926948547, + "p90": 107.16799646615982, + "p95": 116.99199676513672, + "p99": 140.6719982624054 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 97.79199957847595, + "p95": 106.04800283908844, + "p99": 131.04000687599182 + }, + "roundtrip": { + "p50": 156.5759927034378, + "p90": 172.19200730323792, + "p95": 179.00800704956055, + "p99": 190.49599766731262 + }, + "isolatedSum": { + "p50": 180.57599663734436, + "p90": 204.95999604463577, + "p95": 223.03999960422516, + "p99": 271.7120051383972 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 38993920, "combineLogicalBytes": 38993920, - "fanoutMean": 5.31, + "fanoutMean": 5.3125, "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.90400338172913, + "p90": 139.23199474811554, + "p95": 145.47200500965118, + "p99": 182.65600502490997 + }, + "combine": { + "p50": 103.84000092744827, + "p90": 120.25599926710129, + "p95": 126.56000256538391, + "p99": 146.68799936771393 + }, + "roundtrip": { + "p50": 196.19199633598328, + "p90": 217.15199947357178, + "p95": 223.68000447750092, + "p99": 249.2160052061081 + }, + "isolatedSum": { + "p50": 219.7440043091774, + "p90": 259.4879940152168, + "p95": 272.0320075750351, + "p99": 329.3440043926239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -208,53 +1026,143 @@ ] }, { - "id": "cx-mi355x-prefill", - "identity": "mi355x|mori|prefill", - "stitchKey": "mi355x|mori|fixed", - "colorKey": "mi355x_fixture", + "id": "cx-5553e87c", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_d982b749", + "comparisonKey": "6da1f9e2ab025dbe", "schemaVersion": 3, - "generatedAt": "2026-06-24T01:59:40.000Z", + "generatedAt": "2026-06-26T23:49:31.030615+00:00", "status": "valid", - "sku": "mi355x", - "backend": "mori", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X · mori · bf16 · EP8 · normalized · layout + dispatch", + "label": "H200 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "mi355x-trace", - "backendVersion": "mori-0227", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069889124", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069889124", - "createdAt": "2026-06-24T01:57:55Z", - "sha": "mi355xprefillsha" + "id": "28271605214", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271605214", + "createdAt": "2026-06-26T23:47:59Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, - "dispatch": { "p50": 95, "p90": 110, "p99": 135 }, - "combine": { "p50": 75, "p90": 90, "p99": 115 }, - "serial": { "p50": 170, "p90": 200, "p99": 250 }, + "dispatch": { + "p50": 116.64000153541565, + "p90": 132.9600065946579, + "p95": 139.80799913406372, + "p99": 183.1039935350418 + }, + "combine": { + "p50": 106.11200332641602, + "p90": 121.08799815177917, + "p95": 127.61600315570831, + "p99": 162.7199947834015 + }, + "roundtrip": { + "p50": 197.11999595165253, + "p90": 216.67200326919556, + "p95": 225.2800017595291, + "p99": 246.75199389457703 + }, + "isolatedSum": { + "p50": 222.75200486183167, + "p90": 254.04800474643707, + "p95": 267.42400228977203, + "p99": 345.8239883184433 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, - "fanoutMean": 5.29, + "fanoutMean": 5.291015625, "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.8719928264618, + "p90": 166.143998503685, + "p95": 172.7360039949417, + "p99": 195.8719938993454 + }, + "combine": { + "p50": 143.327996134758, + "p90": 159.743994474411, + "p95": 162.81600296497345, + "p99": 171.7119961977005 + }, + "roundtrip": { + "p50": 260.70401072502136, + "p90": 280.8319926261902, + "p95": 286.27198934555054, + "p99": 329.3119966983795 + }, + "isolatedSum": { + "p50": 287.1999889612198, + "p90": 325.887992978096, + "p95": 335.55200695991516, + "p99": 367.5839900970459 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -262,18 +1170,1785 @@ { "tokensPerRank": 512, "globalTokens": 4096, - "dispatch": { "p50": 180, "p90": 210, "p99": 260 }, - "combine": { "p50": 230, "p90": 275, "p99": 340 }, - "serial": { "p50": 410, "p90": 485, "p99": 600 }, + "dispatch": { + "p50": 203.23200523853302, + "p90": 227.00800001621246, + "p95": 239.07199501991272, + "p99": 277.1199941635132 + }, + "combine": { + "p50": 224.60800409317017, + "p90": 241.31199717521667, + "p95": 248.44799935817719, + "p99": 268.22400093078613 + }, + "roundtrip": { + "p50": 403.0719995498657, + "p90": 426.68798565864563, + "p95": 434.4640076160431, + "p99": 486.01600527763367 + }, + "isolatedSum": { + "p50": 427.8400093317032, + "p90": 468.31999719142914, + "p95": 487.5199943780899, + "p99": 545.3439950942993 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 312266752, "combineLogicalBytes": 312266752, - "fanoutMean": 5.32, + "fanoutMean": 5.31787109375, "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.7520124912262, + "p90": 335.90400218963623, + "p95": 347.51999378204346, + "p99": 390.9119963645935 + }, + "combine": { + "p50": 357.9519987106323, + "p90": 372.1280097961426, + "p95": 378.9440095424652, + "p99": 416.6080057621002 + }, + "roundtrip": { + "p50": 646.7199921607971, + "p90": 668.3200001716614, + "p95": 684.4800114631653, + "p99": 754.4959783554077 + }, + "isolatedSum": { + "p50": 672.7040112018585, + "p90": 708.0320119857788, + "p95": 726.4640033245087, + "p99": 807.5200021266937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 542.0799851417542, + "p90": 560.1279735565186, + "p95": 575.3600001335144, + "p99": 736.2880110740662 + }, + "combine": { + "p50": 621.8879818916321, + "p90": 636.031985282898, + "p95": 641.6959762573242, + "p99": 732.7359914779663 + }, + "roundtrip": { + "p50": 1137.279987335205, + "p90": 1170.591950416565, + "p95": 1213.7600183486938, + "p99": 1369.6320056915283 + }, + "isolatedSum": { + "p50": 1163.9679670333862, + "p90": 1196.1599588394165, + "p95": 1217.0559763908386, + "p99": 1469.0240025520325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 997.3120093345642, + "p90": 1021.28005027771, + "p95": 1029.7919511795044, + "p99": 1212.8000259399414 + }, + "combine": { + "p50": 1121.6000318527222, + "p90": 1139.456033706665, + "p95": 1149.2160558700562, + "p99": 1185.4079961776733 + }, + "roundtrip": { + "p50": 2089.888095855713, + "p90": 2112.6720905303955, + "p95": 2126.431941986084, + "p99": 2277.951955795288 + }, + "isolatedSum": { + "p50": 2118.9120411872864, + "p90": 2160.736083984375, + "p95": 2179.0080070495605, + "p99": 2398.2080221176147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 } ] - } - ] + }, + { + "id": "cx-60c60832", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "3677ee6ace04ac65", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:53:59.155172+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_05", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28273516714", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714", + "createdAt": "2026-06-27T00:53:08Z", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.6000018119812, + "p90": 43.76000165939331, + "p95": 45.239999890327454, + "p99": 54.71999943256378 + }, + "combine": { + "p50": 17.920000478625298, + "p90": 19.039999693632126, + "p95": 20.999999716877937, + "p99": 22.87999913096428 + }, + "roundtrip": { + "p50": 56.32000043988228, + "p90": 59.4400018453598, + "p95": 60.64099818468094, + "p99": 63.19999694824219 + }, + "isolatedSum": { + "p50": 58.5200022906065, + "p90": 62.800001353025436, + "p95": 66.23999960720539, + "p99": 77.59999856352806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.64000058174133, + "p90": 45.35999894142151, + "p95": 46.76000028848648, + "p99": 50.23999884724617 + }, + "combine": { + "p50": 16.759999096393585, + "p90": 18.68000067770481, + "p95": 19.801000133156776, + "p99": 22.08000048995018 + }, + "roundtrip": { + "p50": 58.9199997484684, + "p90": 61.799999326467514, + "p95": 62.95999884605408, + "p99": 65.20000100135803 + }, + "isolatedSum": { + "p50": 59.39999967813492, + "p90": 64.03999961912632, + "p95": 66.56100042164326, + "p99": 72.31999933719635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.44000092148781, + "p90": 45.281000435352325, + "p95": 46.4400015771389, + "p99": 47.919999808073044 + }, + "combine": { + "p50": 19.999999552965164, + "p90": 21.99999988079071, + "p95": 23.360000923275948, + "p99": 25.72000026702881 + }, + "roundtrip": { + "p50": 61.91999837756157, + "p90": 65.20099937915802, + "p95": 66.3599967956543, + "p99": 67.84100085496902 + }, + "isolatedSum": { + "p50": 62.44000047445297, + "p90": 67.28100031614304, + "p95": 69.80000250041485, + "p99": 73.64000007510185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.44000092148781, + "p90": 45.00100016593933, + "p95": 46.88100144267082, + "p99": 49.27999898791313 + }, + "combine": { + "p50": 20.880000665783882, + "p90": 22.840000689029694, + "p95": 24.240000173449516, + "p99": 26.399999856948853 + }, + "roundtrip": { + "p50": 62.401000410318375, + "p90": 65.48000127077103, + "p95": 66.28099828958511, + "p99": 68.00000369548798 + }, + "isolatedSum": { + "p50": 63.32000158727169, + "p90": 67.84100085496902, + "p95": 71.12100161612034, + "p99": 75.67999884486198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.520999908447266, + "p90": 45.1200008392334, + "p95": 46.59999907016754, + "p99": 49.04000088572502 + }, + "combine": { + "p50": 25.8799996227026, + "p90": 27.879999950528145, + "p95": 29.239999130368233, + "p99": 31.800001859664917 + }, + "roundtrip": { + "p50": 67.80099868774414, + "p90": 71.16000354290009, + "p95": 72.2000002861023, + "p99": 74.47999715805054 + }, + "isolatedSum": { + "p50": 68.40099953114986, + "p90": 73.00000078976154, + "p95": 75.83999820053577, + "p99": 80.84000274538994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7f743bfe", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_aa268d13", + "comparisonKey": "791af0af2f802328", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:41.322977+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271945409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409", + "createdAt": "2026-06-26T23:58:46Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.37600320577621, + "p90": 101.59999877214432, + "p95": 103.16800326108932, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 78.17599922418594, + "p95": 79.99999821186066, + "p99": 82.59200304746628 + }, + "roundtrip": { + "p50": 142.59199798107147, + "p90": 150.62400698661804, + "p95": 152.54400670528412, + "p99": 159.5200002193451 + }, + "isolatedSum": { + "p50": 167.07200556993484, + "p90": 179.77599799633026, + "p95": 183.16800147294998, + "p99": 190.75199961662292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.0640013217926, + "p90": 100.89600086212158, + "p95": 101.82400047779083, + "p99": 107.07200318574905 + }, + "combine": { + "p50": 74.43200051784515, + "p90": 80.48000186681747, + "p95": 81.216000020504, + "p99": 82.11199939250946 + }, + "roundtrip": { + "p50": 143.39199662208557, + "p90": 147.87200093269348, + "p95": 153.31199765205383, + "p99": 168.60799491405487 + }, + "isolatedSum": { + "p50": 170.49600183963776, + "p90": 181.37600272893906, + "p95": 183.04000049829483, + "p99": 189.18400257825851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.87199985980988, + "p90": 100.73599964380264, + "p95": 102.81600058078766, + "p99": 109.95200276374817 + }, + "combine": { + "p50": 74.30399954319, + "p90": 80.89599758386612, + "p95": 81.4720019698143, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 142.752006649971, + "p90": 153.02400290966034, + "p95": 154.9759954214096, + "p99": 160.0639969110489 + }, + "isolatedSum": { + "p50": 170.17599940299988, + "p90": 181.63199722766876, + "p95": 184.28800255060196, + "p99": 194.14400309324265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.88800030946732, + "p90": 101.82400047779083, + "p95": 103.96800190210342, + "p99": 111.42399907112122 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 81.4720019698143, + "p95": 82.04799890518188, + "p99": 84.03199911117554 + }, + "roundtrip": { + "p50": 146.7519998550415, + "p90": 153.47200632095337, + "p95": 154.9759954214096, + "p99": 167.9680049419403 + }, + "isolatedSum": { + "p50": 173.50400239229202, + "p90": 183.29600244760513, + "p95": 186.0160008072853, + "p99": 195.45599818229675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.08800166845322, + "p90": 100.67199915647507, + "p95": 104.25599664449692, + "p99": 110.6560006737709 + }, + "combine": { + "p50": 78.94399762153625, + "p90": 82.04799890518188, + "p95": 82.78399705886841, + "p99": 89.40800279378891 + }, + "roundtrip": { + "p50": 150.7200002670288, + "p90": 159.10400450229645, + "p95": 161.69600188732147, + "p99": 167.07199811935425 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 182.71999806165695, + "p95": 187.03999370336533, + "p99": 200.06400346755981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.47999703884125, + "p90": 101.31199657917023, + "p95": 104.5759990811348, + "p99": 110.62400043010712 + }, + "combine": { + "p50": 86.46400272846222, + "p90": 90.11200070381165, + "p95": 90.62399715185165, + "p99": 93.18400174379349 + }, + "roundtrip": { + "p50": 158.75199437141418, + "p90": 163.55200111865997, + "p95": 164.89599645137787, + "p99": 169.21600699424744 + }, + "isolatedSum": { + "p50": 182.94399976730347, + "p90": 191.42399728298187, + "p95": 195.19999623298645, + "p99": 203.8080021739006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 106.9440022110939, + "p90": 138.36799561977386, + "p95": 143.0400013923645, + "p99": 250.2720057964325 + }, + "combine": { + "p50": 95.0080007314682, + "p90": 98.39999675750732, + "p95": 98.91200065612793, + "p99": 105.59999942779541 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 184.03199315071106, + "p95": 187.3600035905838, + "p99": 190.5599981546402 + }, + "isolatedSum": { + "p50": 201.9520029425621, + "p90": 236.7679923772812, + "p95": 241.95200204849243, + "p99": 355.8720052242279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.4160017967224, + "p90": 145.9520012140274, + "p95": 148.83199334144592, + "p99": 151.99999511241913 + }, + "combine": { + "p50": 119.74400281906128, + "p90": 122.56000190973282, + "p95": 123.80799651145935, + "p99": 129.7920048236847 + }, + "roundtrip": { + "p50": 228.2560020685196, + "p90": 233.88800024986267, + "p95": 236.12800240516663, + "p99": 240.28800427913666 + }, + "isolatedSum": { + "p50": 248.1600046157837, + "p90": 268.5120031237602, + "p95": 272.6399898529053, + "p99": 281.7919999361038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a38d13e8", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_769b9c4b", + "comparisonKey": "115d84ad1ee38d09", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:11.807854+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271948775", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775", + "createdAt": "2026-06-26T23:58:53Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.39999961853027, + "p90": 100.832000374794, + "p95": 105.56799918413162, + "p99": 192.73599982261658 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 88.44800293445587, + "p95": 188.38399648666382, + "p99": 344.2560136318207 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 133.08799266815186, + "p95": 149.4400054216385, + "p99": 156.12800419330597 + }, + "isolatedSum": { + "p50": 143.5839980840683, + "p90": 189.28000330924988, + "p95": 293.95199567079544, + "p99": 536.9920134544373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.38399916887283, + "p90": 75.71200281381607, + "p95": 77.11999863386154, + "p99": 95.61599791049957 + }, + "combine": { + "p50": 71.29599899053574, + "p90": 73.44000041484833, + "p95": 74.36800003051758, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 126.68800354003906, + "p90": 130.87999820709229, + "p95": 133.56800377368927, + "p99": 142.59199798107147 + }, + "isolatedSum": { + "p50": 139.67999815940857, + "p90": 149.1520032286644, + "p95": 151.48799866437912, + "p99": 177.88799852132797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.54400104284286, + "p90": 99.2640033364296, + "p95": 102.08000242710114, + "p99": 107.39199817180634 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 79.71200346946716, + "p95": 84.22400057315826, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 130.23999333381653, + "p90": 156.41599893569946, + "p95": 160.22400557994843, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 145.53599804639816, + "p90": 178.97600680589676, + "p95": 186.3040030002594, + "p99": 194.7840005159378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.19200229644775, + "p90": 109.56799983978271, + "p95": 112.73600161075592, + "p99": 155.87200224399567 + }, + "combine": { + "p50": 75.45600086450577, + "p90": 88.06400001049042, + "p95": 89.4400030374527, + "p99": 97.37599641084671 + }, + "roundtrip": { + "p50": 130.94399869441986, + "p90": 154.4319987297058, + "p95": 156.44800662994385, + "p99": 176.67199671268463 + }, + "isolatedSum": { + "p50": 171.64800316095352, + "p90": 197.63199985027313, + "p95": 202.17600464820862, + "p99": 253.24799865484238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.97599852085114, + "p90": 100.16000270843506, + "p95": 103.55199873447418, + "p99": 106.72000050544739 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 87.3280018568039, + "p95": 88.95999938249588, + "p99": 89.82399851083755 + }, + "roundtrip": { + "p50": 131.6480040550232, + "p90": 158.9760035276413, + "p95": 161.31199896335602, + "p99": 166.78400337696075 + }, + "isolatedSum": { + "p50": 157.1199968457222, + "p90": 187.48800456523895, + "p95": 192.51199811697006, + "p99": 196.54399901628494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.30400216579437, + "p90": 105.6319996714592, + "p95": 106.6880002617836, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 80.99199831485748, + "p90": 89.15200084447861, + "p95": 89.88799899816513, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 142.17600226402283, + "p90": 157.6640009880066, + "p95": 160.44799983501434, + "p99": 164.8319959640503 + }, + "isolatedSum": { + "p50": 171.29600048065186, + "p90": 194.7840005159378, + "p95": 196.57599925994873, + "p99": 201.9520029425621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.74399888515472, + "p90": 116.2559986114502, + "p95": 121.98399752378464, + "p99": 398.6560106277466 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 101.1200025677681, + "p95": 104.25599664449692, + "p99": 111.55200004577637 + }, + "roundtrip": { + "p50": 160.76800227165222, + "p90": 181.536003947258, + "p95": 185.37600338459015, + "p99": 188.35200369358063 + }, + "isolatedSum": { + "p50": 185.95200031995773, + "p90": 217.3760011792183, + "p95": 226.23999416828156, + "p99": 510.20801067352295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.3679991364479, + "p90": 133.44000279903412, + "p95": 137.31199502944946, + "p99": 142.7839994430542 + }, + "combine": { + "p50": 108.15999656915665, + "p90": 120.2239990234375, + "p95": 121.24799937009811, + "p99": 123.99999797344208 + }, + "roundtrip": { + "p50": 199.35999810695648, + "p90": 217.31199324131012, + "p95": 220.15999257564545, + "p99": 380.8319866657257 + }, + "isolatedSum": { + "p50": 222.52799570560455, + "p90": 253.66400182247162, + "p95": 258.5599943995476, + "p99": 266.7839974164963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f9f6948", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:02.253264+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:26:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.74399888515472, + "p90": 102.78400033712387, + "p95": 104.99200224876404, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 79.32800054550171, + "p90": 82.07999914884567, + "p95": 82.87999778985977, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 147.74399995803833, + "p90": 154.6880006790161, + "p95": 157.44000673294067, + "p99": 171.9360053539276 + }, + "isolatedSum": { + "p50": 175.07199943065643, + "p90": 184.86399948596954, + "p95": 187.8720000386238, + "p99": 197.40799814462662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.23199850320816, + "p90": 101.27999633550644, + "p95": 102.52799838781357, + "p99": 107.87200182676315 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 80.92799782752991, + "p95": 81.44000172615051, + "p99": 84.76799726486206 + }, + "roundtrip": { + "p50": 127.45599448680878, + "p90": 153.02400290966034, + "p95": 155.64799308776855, + "p99": 159.4880074262619 + }, + "isolatedSum": { + "p50": 143.45599710941315, + "p90": 182.20799416303635, + "p95": 183.96800011396408, + "p99": 192.6399990916252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.23200243711472, + "p90": 102.36799716949463, + "p95": 107.84000158309937, + "p99": 439.64800238609314 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 81.66400343179703, + "p95": 86.81599795818329, + "p99": 88.92799913883209 + }, + "roundtrip": { + "p50": 128.7360042333603, + "p90": 159.19999778270721, + "p95": 161.31199896335602, + "p99": 167.1680063009262 + }, + "isolatedSum": { + "p50": 168.19199919700623, + "p90": 184.03200060129166, + "p95": 194.65599954128265, + "p99": 528.5760015249252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.42399644851685, + "p90": 102.52799838781357, + "p95": 104.89600151777267, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 82.91199803352356, + "p95": 87.07199990749359, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 151.48800611495972, + "p90": 159.90400314331055, + "p95": 162.20800578594208, + "p99": 169.47199404239655 + }, + "isolatedSum": { + "p50": 175.00799894332886, + "p90": 185.43999642133713, + "p95": 191.96800142526627, + "p99": 201.50399953126907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.71199864149094, + "p90": 100.8640006184578, + "p95": 102.68799960613251, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 80.64000308513641, + "p90": 87.90399879217148, + "p95": 89.24800157546997, + "p99": 95.23200243711472 + }, + "roundtrip": { + "p50": 152.319997549057, + "p90": 160.19199788570404, + "p95": 162.23999857902527, + "p99": 168.92799735069275 + }, + "isolatedSum": { + "p50": 176.35200172662735, + "p90": 188.76799941062927, + "p95": 191.93600118160248, + "p99": 201.7280012369156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.86399734020233, + "p90": 103.26399654150009, + "p95": 105.47199845314026, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 80.35200089216232, + "p90": 89.31200206279755, + "p95": 90.04800021648407, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 136.48000359535217, + "p90": 164.60800170898438, + "p95": 167.10400581359863, + "p99": 175.10400712490082 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 192.57599860429764, + "p95": 195.51999866962433, + "p99": 208.92799645662308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.4879982471466, + "p90": 112.8000020980835, + "p95": 114.3679991364479, + "p99": 125.72799623012543 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 104.12800312042236, + "p95": 104.99200224876404, + "p99": 106.33599758148193 + }, + "roundtrip": { + "p50": 170.71999609470367, + "p90": 181.21600151062012, + "p95": 182.91200697422028, + "p99": 186.81600689888 + }, + "isolatedSum": { + "p50": 200.31999796628952, + "p90": 216.92800521850586, + "p95": 219.36000138521194, + "p99": 232.06399381160736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.29599809646606, + "p90": 130.87999820709229, + "p95": 133.5040032863617, + "p99": 139.93600010871887 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 119.58400160074234, + "p95": 119.99999731779099, + "p99": 122.3360002040863 + }, + "roundtrip": { + "p50": 197.56799936294556, + "p90": 215.80800414085388, + "p95": 217.92000532150269, + "p99": 219.80799734592438 + }, + "isolatedSum": { + "p50": 217.56799519062042, + "p90": 250.46399980783463, + "p95": 253.50400060415268, + "p99": 262.2720003128052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d0599c0", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "colorKey": "mi355x_2fa43515", + "comparisonKey": "2796ed88af4b14b0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:40:45.756534+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi355x-amds_04", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "unknown", + "conformanceClass": "minimum-functional", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247575150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", + "createdAt": "2026-06-26T15:22:26Z", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.55999964475632, + "p90": 43.15999895334244, + "p95": 44.881001114845276, + "p99": 47.55999892950058 + }, + "combine": { + "p50": 16.119999811053276, + "p90": 18.719999119639397, + "p95": 19.840000197291374, + "p99": 22.520000115036964 + }, + "roundtrip": { + "p50": 56.040000170469284, + "p90": 59.20000001788139, + "p95": 60.80099940299988, + "p99": 63.120998442173004 + }, + "isolatedSum": { + "p50": 56.67999945580959, + "p90": 61.879998072981834, + "p95": 64.72100131213665, + "p99": 70.07999904453754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.55999997258186, + "p90": 45.441001653671265, + "p95": 47.040000557899475, + "p99": 49.959998577833176 + }, + "combine": { + "p50": 16.16000011563301, + "p90": 18.360000103712082, + "p95": 19.600000232458115, + "p99": 22.63999916613102 + }, + "roundtrip": { + "p50": 58.83999913930893, + "p90": 61.88099831342697, + "p95": 63.48100304603577, + "p99": 65.40100276470184 + }, + "isolatedSum": { + "p50": 58.720000088214874, + "p90": 63.80100175738335, + "p95": 66.64000079035759, + "p99": 72.5999977439642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.160000652074814, + "p90": 44.840000569820404, + "p95": 46.28000035881996, + "p99": 49.84100162982941 + }, + "combine": { + "p50": 19.039999693632126, + "p90": 22.1599992364645, + "p95": 23.48100021481514, + "p99": 54.63999882340431 + }, + "roundtrip": { + "p50": 61.59999966621399, + "p90": 64.71999734640121, + "p95": 65.76000154018402, + "p99": 68.36000084877014 + }, + "isolatedSum": { + "p50": 61.20000034570694, + "p90": 66.9999998062849, + "p95": 69.7610005736351, + "p99": 104.48100045323372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.52000153064728, + "p90": 45.1200008392334, + "p95": 46.080999076366425, + "p99": 48.8400012254715 + }, + "combine": { + "p50": 20.479999482631683, + "p90": 22.520000115036964, + "p95": 23.479999974370003, + "p99": 25.800000876188278 + }, + "roundtrip": { + "p50": 62.67999857664108, + "p90": 65.5599981546402, + "p95": 66.880002617836, + "p99": 68.56100261211395 + }, + "isolatedSum": { + "p50": 63.00000101327896, + "p90": 67.64000095427036, + "p95": 69.56099905073643, + "p99": 74.64000210165977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.67999902367592, + "p90": 45.27999833226204, + "p95": 46.799998730421066, + "p99": 49.720000475645065 + }, + "combine": { + "p50": 24.921000003814697, + "p90": 27.240000665187836, + "p95": 28.07999961078167, + "p99": 30.27999959886074 + }, + "roundtrip": { + "p50": 67.9209977388382, + "p90": 71.04100286960602, + "p95": 72.12000340223312, + "p99": 74.08100366592407 + }, + "isolatedSum": { + "p50": 67.60099902749062, + "p90": 72.51999899744987, + "p95": 74.87999834120274, + "p99": 80.0000000745058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + } + ], + "failures": [ + { + "id": "cxf-6e691abd", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "generatedAt": "2026-06-26T17:32:59.549027+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28254359089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", + "createdAt": "2026-06-26T17:27:42Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + } + } + ], + "scannedRuns": 12, + "scannedArtifacts": 10, + "contributingRuns": 9, + "generatedAt": "2026-06-27T00:53:59.155172+00:00" } diff --git a/packages/app/public/data/collectivex.json b/packages/app/public/data/collectivex.json index 9a3b491d..cebcf471 100644 --- a/packages/app/public/data/collectivex.json +++ b/packages/app/public/data/collectivex.json @@ -1,457 +1,452 @@ { + "snapshotVersion": 2, "series": [ { - "id": "cx-4bc828ab3634ef77", - "identity": "b200|deepep|decode|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", - "stitchKey": "b200|deepep|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", - "colorKey": "b200_418405a4", - "schemaVersion": 1, - "generatedAt": "2026-06-24T01:53:08.683564+00:00", + "id": "cx-3f6620d0", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "11fb97077712804e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:34.883169+00:00", "status": "valid", - "sku": "b200", + "publicationStatus": "official", + "runner": "b300-nv_05", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "deepep-normal-v1", - "topologyClass": "b200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B200 · deepep · bf16 · EP8", + "label": "B300 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "dispatchDtype": "bf16" + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "unknown", + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069683835", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069683835", - "createdAt": "2026-06-24T01:52:05Z", - "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + "id": "28272154473", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272154473", + "createdAt": "2026-06-27T00:05:17Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 76.9599974155426, - "p90": 76.9599974155426, - "p99": 106.55999928712845 - }, - "combine": { - "p50": 67.61600077152252, - "p90": 67.61600077152252, - "p99": 93.05600076913834 - }, - "serial": { - "p50": 126.46399438381195, - "p90": 126.46399438381195, - "p99": 156.6080003976822 - }, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 8, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.78400319814682, - "p90": 74.78400319814682, - "p99": 104.86400127410889 + "p50": 57.40800127387047, + "p90": 59.26400050520897, + "p95": 61.055999249219894, + "p99": 69.66400146484375 }, "combine": { - "p50": 76.4480009675026, - "p90": 76.4480009675026, - "p99": 87.00799942016602 + "p50": 66.30399823188782, + "p90": 67.32799857854843, + "p95": 68.25599819421768, + "p99": 77.02399790287018 }, - "serial": { - "p50": 127.26399302482605, - "p90": 127.26399302482605, - "p99": 155.2640050649643 + "roundtrip": { + "p50": 106.88000172376633, + "p90": 111.35999858379364, + "p95": 112.96000331640244, + "p99": 129.31199371814728 }, - "dispatchLogicalBytes": 215040, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 15, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 75.32799988985062, - "p90": 75.32799988985062, - "p99": 102.08000242710114 - }, - "combine": { - "p50": 77.05599814653397, - "p90": 77.05599814653397, - "p99": 100.25600343942642 + "isolatedSum": { + "p50": 123.71199950575829, + "p90": 126.5919990837574, + "p95": 129.31199744343758, + "p99": 146.68799936771393 }, - "serial": { - "p50": 139.0720009803772, - "p90": 139.0720009803772, - "p99": 158.11200439929962 - }, - "dispatchLogicalBytes": 372736, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 26, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.78400319814682, - "p90": 74.78400319814682, - "p99": 99.35999661684036 + "p50": 58.33600088953972, + "p90": 60.67200005054474, + "p95": 62.68800050020218, + "p99": 68.15999746322632 }, "combine": { - "p50": 77.88799703121185, - "p90": 77.88799703121185, - "p99": 94.52799707651138 - }, - "serial": { - "p50": 136.19199395179749, - "p90": 136.19199395179749, - "p99": 169.76000368595123 - }, - "dispatchLogicalBytes": 673792, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 47, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 76.73600316047668, - "p90": 76.73600316047668, - "p99": 108.0000028014183 + "p50": 67.84000247716904, + "p90": 77.2159993648529, + "p95": 77.88799703121185, + "p99": 78.75200361013412 }, - "combine": { - "p50": 78.52800190448761, - "p90": 78.52800190448761, - "p99": 96.47999703884125 + "roundtrip": { + "p50": 121.88799679279327, + "p90": 125.05599856376648, + "p95": 126.08000636100769, + "p99": 136.99199259281158 }, - "serial": { - "p50": 136.25599443912506, - "p90": 136.25599443912506, - "p99": 172.5119948387146 + "isolatedSum": { + "p50": 126.17600336670876, + "p90": 137.88799941539764, + "p95": 140.57599753141403, + "p99": 146.91200107336044 }, - "dispatchLogicalBytes": 1333248, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 93, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 80.28800040483475, - "p90": 80.28800040483475, - "p99": 109.0880036354065 + "p50": 69.85600292682648, + "p90": 74.27199929952621, + "p95": 75.3600001335144, + "p99": 82.97599852085114 }, "combine": { - "p50": 79.48800176382065, - "p90": 79.48800176382065, - "p99": 99.2640033364296 - }, - "serial": { - "p50": 143.8719928264618, - "p90": 143.8719928264618, - "p99": 174.112007021904 - }, - "dispatchLogicalBytes": 2680832, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 187, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 87.45600283145905, - "p90": 87.45600283145905, - "p99": 112.0000034570694 + "p50": 78.52800190448761, + "p90": 79.19999957084656, + "p95": 79.99999821186066, + "p99": 82.8159973025322 }, - "combine": { - "p50": 101.18400305509567, - "p90": 101.18400305509567, - "p99": 126.20800733566284 + "roundtrip": { + "p50": 131.3599944114685, + "p90": 135.903999209404, + "p95": 136.76799833774567, + "p99": 147.5519984960556 }, - "serial": { - "p50": 168.89600455760956, - "p90": 168.89600455760956, - "p99": 189.98399376869202 + "isolatedSum": { + "p50": 148.3840048313141, + "p90": 153.47199887037277, + "p95": 155.35999834537506, + "p99": 165.79199582338333 }, - "dispatchLogicalBytes": 5089280, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 355, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.34399747848511, - "p90": 105.34399747848511, - "p99": 125.5359947681427 + "p50": 94.24000233411789, + "p90": 96.79999947547913, + "p95": 99.39199686050415, + "p99": 103.74400019645691 }, "combine": { - "p50": 115.23199826478958, - "p90": 115.23199826478958, - "p99": 150.43200552463531 + "p50": 115.35999923944473, + "p90": 116.12799763679504, + "p95": 116.73600226640701, + "p99": 127.29600071907043 }, - "serial": { - "p50": 198.43199849128723, - "p90": 198.43199849128723, - "p99": 219.200000166893 + "roundtrip": { + "p50": 193.4400051832199, + "p90": 198.91199469566345, + "p95": 199.71199333667755, + "p99": 208.3200067281723 }, - "dispatchLogicalBytes": 9906176, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 691, + "isolatedSum": { + "p50": 209.60000157356262, + "p90": 212.92799711227417, + "p95": 216.12799912691116, + "p99": 231.04000091552734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-ccc512683c0a4e2e", - "identity": "b200|deepep|prefill|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", - "stitchKey": "b200|deepep|normal|tuned|standardized|deepep-normal-v1|b200-nvlink-island|8|8|bf16|balanced|7168|8|256", - "colorKey": "b200_418405a4", - "schemaVersion": 1, - "generatedAt": "2026-06-24T01:53:06.799084+00:00", + "id": "cx-854f00de", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "afbd085a57d290fd", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:27.937449+00:00", "status": "valid", - "sku": "b200", + "publicationStatus": "official", + "runner": "b300-nv_17", + "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "deepep-normal-v1", - "topologyClass": "b200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B200 · deepep · bf16 · EP8", + "label": "B300 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "dispatchDtype": "bf16" + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "unknown", + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069683835", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069683835", - "createdAt": "2026-06-24T01:52:05Z", - "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + "id": "28271865772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271865772", + "createdAt": "2026-06-26T23:56:07Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 111.61600053310394, - "p90": 111.61600053310394, - "p99": 154.59200739860535 + "p50": 55.58399856090546, + "p90": 57.40800127387047, + "p95": 59.13599953055382, + "p99": 65.63200056552887 }, "combine": { - "p50": 118.14399808645248, - "p90": 118.14399808645248, - "p99": 136.1600011587143 - }, - "serial": { - "p50": 208.12800526618958, - "p90": 208.12800526618958, - "p99": 244.7039932012558 - }, - "dispatchLogicalBytes": 9977856, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 696, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 140.70400595664978, - "p90": 140.70400595664978, - "p99": 164.76799547672272 + "p50": 66.14399701356888, + "p90": 67.55200028419495, + "p95": 68.38399916887283, + "p99": 77.2159993648529 }, - "combine": { - "p50": 152.73599326610565, - "p90": 152.73599326610565, - "p99": 173.24799299240112 + "roundtrip": { + "p50": 105.18400371074677, + "p90": 111.29599809646606, + "p95": 113.50400000810623, + "p99": 132.1280002593994 }, - "serial": { - "p50": 270.01601457595825, - "p90": 270.01601457595825, - "p99": 344.5119857788086 + "isolatedSum": { + "p50": 121.72799557447433, + "p90": 124.96000155806541, + "p95": 127.51999869942665, + "p99": 142.84799993038177 }, - "dispatchLogicalBytes": 19841024, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 1384, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 193.59999895095825, - "p90": 193.59999895095825, - "p99": 220.96000611782074 + "p50": 58.400001376867294, + "p90": 60.99199876189232, + "p95": 62.880001962184906, + "p99": 73.05599749088287 }, "combine": { - "p50": 248.22400510311127, - "p90": 248.22400510311127, - "p99": 265.50400257110596 - }, - "serial": { - "p50": 419.5519983768463, - "p90": 419.5519983768463, - "p99": 437.824010848999 - }, - "dispatchLogicalBytes": 39380992, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 2747, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 298.335999250412, - "p90": 298.335999250412, - "p99": 331.9680094718933 + "p50": 67.29599833488464, + "p90": 77.15199887752533, + "p95": 77.72800326347351, + "p99": 79.64800298213959 }, - "combine": { - "p50": 413.34399580955505, - "p90": 413.34399580955505, - "p99": 439.9360120296478 + "roundtrip": { + "p50": 117.95199662446976, + "p90": 122.72000312805176, + "p95": 123.9359974861145, + "p99": 138.46400380134583 }, - "serial": { - "p50": 692.7679777145386, - "p90": 692.7679777145386, - "p99": 731.3920259475708 + "isolatedSum": { + "p50": 125.69599971175194, + "p90": 138.14399763941765, + "p95": 140.60800522565842, + "p99": 152.70400047302246 }, - "dispatchLogicalBytes": 79077376, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 5516, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 507.04002380371094, - "p90": 507.04002380371094, - "p99": 531.1040282249451 + "p50": 67.9360032081604, + "p90": 71.16799801588058, + "p95": 73.72800260782242, + "p99": 86.5280032157898 }, "combine": { - "p50": 708.1279754638672, - "p90": 708.1279754638672, - "p99": 725.055992603302 + "p50": 77.95199751853943, + "p90": 79.19999957084656, + "p95": 80.06399869918823, + "p99": 83.8719978928566 }, - "serial": { - "p50": 1193.6960220336914, - "p90": 1193.6960220336914, - "p99": 1213.1199836730957 + "roundtrip": { + "p50": 128.7039965391159, + "p90": 131.1360001564026, + "p95": 132.76800513267517, + "p99": 140.6400054693222 }, - "dispatchLogicalBytes": 156864512, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 10942, + "isolatedSum": { + "p50": 145.88800072669983, + "p90": 150.36799758672714, + "p95": 153.79200130701065, + "p99": 170.4000011086464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 925.5679845809937, - "p90": 925.5679845809937, - "p99": 948.8000273704529 + "p50": 92.70399808883667, + "p90": 97.63199836015701, + "p95": 99.71199929714203, + "p99": 135.42400300502777 }, "combine": { - "p50": 1307.1680068969727, - "p90": 1307.1680068969727, - "p99": 1337.0239734649658 + "p50": 114.78400230407715, + "p90": 116.70400202274323, + "p95": 118.97599697113037, + "p99": 164.0319973230362 + }, + "roundtrip": { + "p50": 190.62399864196777, + "p90": 196.60800695419312, + "p95": 197.66399264335632, + "p99": 203.99999618530273 }, - "serial": { - "p50": 2205.2481174468994, - "p90": 2205.2481174468994, - "p99": 2238.879919052124 + "isolatedSum": { + "p50": 207.48800039291382, + "p90": 214.33600038290024, + "p95": 218.6879962682724, + "p99": 299.45600032806396 }, - "dispatchLogicalBytes": 312395776, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 21791, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-cd8f8fb6c8b34ff2", - "identity": "b300|deepep|decode|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", - "stitchKey": "b300|deepep|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", - "colorKey": "b300_b219a378", - "schemaVersion": 2, - "generatedAt": "2026-06-24T23:33:09.035182+00:00", + "id": "cx-2fa7319c", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "b300_c9569580", + "comparisonKey": "89fa2de88509570c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:54:19.552522+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_01", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -459,1775 +454,71416 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 · deepep · bf16 · EP8 · comm only", + "label": "B300 EP8 · deepep · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, - "routingConsistent": null, - "traceSignature": null, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28135639401", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135639401", - "createdAt": "2026-06-24T23:12:52Z", - "sha": "4e217f93fda64a43d32a46f1e57325ff848148d8" + "id": "28273513209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273513209", + "createdAt": "2026-06-27T00:53:00Z", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.992001831531525, - "p90": 56.992001831531525, - "p99": 81.24800026416779 + "p50": 56.41600117087364, + "p90": 58.46399813890457, + "p95": 60.95999851822853, + "p99": 71.55200093984604 }, "combine": { - "p50": 66.75200164318085, - "p90": 66.75200164318085, - "p99": 73.02399724721909 + "p50": 66.27199798822403, + "p90": 67.55200028419495, + "p95": 68.28799843788147, + "p99": 77.27999985218048 }, - "serial": { - "p50": 123.74400347471237, - "p90": 123.74400347471237, - "p99": 154.27199751138687 + "roundtrip": { + "p50": 105.85600137710571, + "p90": 112.28799819946289, + "p95": 113.3119985461235, + "p99": 124.09599870443344 }, + "isolatedSum": { + "p50": 122.68799915909767, + "p90": 126.01599842309952, + "p95": 129.24799695611, + "p99": 148.83200079202652 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2, "globalTokens": 16, "dispatch": { "p50": 56.60799890756607, - "p90": 56.60799890756607, - "p99": 64.96000289916992 + "p90": 58.04799869656563, + "p95": 59.39200147986412, + "p99": 63.64800035953522 }, "combine": { - "p50": 66.97600334882736, - "p90": 66.97600334882736, - "p99": 90.2400016784668 + "p50": 67.03999638557434, + "p90": 68.7360018491745, + "p95": 69.15199756622314, + "p99": 77.2159993648529 + }, + "roundtrip": { + "p50": 107.04000294208527, + "p90": 109.76000130176544, + "p95": 111.35999858379364, + "p99": 119.19999867677689 }, - "serial": { - "p50": 123.58400225639343, - "p90": 123.58400225639343, - "p99": 155.20000457763672 + "isolatedSum": { + "p50": 123.64799529314041, + "p90": 126.78400054574013, + "p95": 128.54399904608727, + "p99": 140.86399972438812 }, + "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.37600103020668, - "p90": 57.37600103020668, - "p99": 78.27199995517731 + "p50": 58.81600081920624, + "p90": 64.44799900054932, + "p95": 66.01600348949432, + "p99": 71.61600142717361 }, "combine": { - "p50": 67.1359971165657, - "p90": 67.1359971165657, - "p99": 79.3600007891655 + "p50": 67.26399809122086, + "p90": 69.63200122117996, + "p95": 77.15199887752533, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 122.20799922943115, + "p90": 125.18399953842163, + "p95": 125.91999769210815, + "p99": 130.3360015153885 }, - "serial": { - "p50": 124.51199814677238, - "p90": 124.51199814677238, - "p99": 157.6320007443428 + "isolatedSum": { + "p50": 126.0799989104271, + "p90": 134.08000022172928, + "p95": 143.16800236701965, + "p99": 150.52799880504608 }, + "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.61599946022034, - "p90": 59.61599946022034, - "p99": 90.36800265312195 + "p50": 59.42400172352791, + "p90": 64.25599753856659, + "p95": 67.87200272083282, + "p99": 74.62400197982788 }, "combine": { - "p50": 69.43999975919724, - "p90": 69.43999975919724, - "p99": 79.55200225114822 + "p50": 68.9919963479042, + "p90": 78.015998005867, + "p95": 78.62400263547897, + "p99": 81.88799768686295 }, - "serial": { - "p50": 129.05599921941757, - "p90": 129.05599921941757, - "p99": 169.92000490427017 + "roundtrip": { + "p50": 119.39200013875961, + "p90": 125.05599856376648, + "p95": 126.17599964141846, + "p99": 130.36799430847168 }, + "isolatedSum": { + "p50": 128.4159980714321, + "p90": 142.2719955444336, + "p95": 146.4960053563118, + "p99": 156.51199966669083 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 63.26399743556976, - "p90": 63.26399743556976, - "p99": 78.23999971151352 + "p50": 66.68800115585327, + "p90": 73.7600028514862, + "p95": 75.13599842786789, + "p99": 80.35200089216232 }, "combine": { - "p50": 69.43999975919724, - "p90": 69.43999975919724, - "p99": 79.83999699354172 + "p50": 69.88800317049026, + "p90": 78.5600021481514, + "p95": 78.75200361013412, + "p99": 82.56000280380249 + }, + "roundtrip": { + "p50": 119.26399916410446, + "p90": 121.47200107574463, + "p95": 123.52000176906586, + "p99": 127.68000364303589 }, - "serial": { - "p50": 132.703997194767, - "p90": 132.703997194767, - "p99": 158.07999670505524 + "isolatedSum": { + "p50": 136.57600432634354, + "p90": 152.3200049996376, + "p95": 153.888002038002, + "p99": 162.9120036959648 }, + "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 68.89600306749344, - "p90": 68.89600306749344, - "p99": 88.22400122880936 - }, - "combine": { - "p50": 78.62400263547897, - "p90": 78.62400263547897, - "p99": 86.2400010228157 - }, - "serial": { - "p50": 147.5200057029724, - "p90": 147.5200057029724, - "p99": 174.46400225162506 - }, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 85.7279971241951, - "p90": 85.7279971241951, - "p99": 98.01600128412247 + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc6ca42c", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "8a9fa1be98f83eb3", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:17.025326+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272146490", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272146490", + "createdAt": "2026-06-27T00:05:03Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 55.84000051021576, + "p90": 57.95200169086456, + "p95": 60.54399907588959, + "p99": 68.09599697589874 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 66.94400310516357, + "p95": 67.52000004053116, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 106.04800283908844, + "p90": 111.07199639081955, + "p95": 112.67200112342834, + "p99": 125.15200674533844 + }, + "isolatedSum": { + "p50": 122.04799801111221, + "p90": 124.89600479602814, + "p95": 128.06399911642075, + "p99": 158.9759960770607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.07199904322624, + "p90": 62.3680017888546, + "p95": 65.08799642324448, + "p99": 71.00799679756165 + }, + "combine": { + "p50": 69.18399780988693, + "p90": 78.14399898052216, + "p95": 78.59200239181519, + "p99": 88.22400122880936 + }, + "roundtrip": { + "p50": 119.07199770212173, + "p90": 124.32000041007996, + "p95": 125.37600100040436, + "p99": 140.06400108337402 + }, + "isolatedSum": { + "p50": 128.25599685311317, + "p90": 140.51200076937675, + "p95": 143.67999881505966, + "p99": 159.231998026371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.70400160551071, + "p90": 73.66400212049484, + "p95": 75.13599842786789, + "p99": 93.56799721717834 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 79.6160027384758, + "p95": 81.44000172615051, + "p99": 91.48799628019333 + }, + "roundtrip": { + "p50": 130.65600395202637, + "p90": 135.71199774742126, + "p95": 136.76799833774567, + "p99": 144.1279947757721 + }, + "isolatedSum": { + "p50": 147.32800424098969, + "p90": 153.28000485897064, + "p95": 156.5760001540184, + "p99": 185.05599349737167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.08800101280212, + "p90": 98.78399968147278, + "p95": 100.63999891281128, + "p99": 110.17599701881409 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 116.28799885511398, + "p95": 117.21599847078323, + "p99": 126.39999389648438 + }, + "roundtrip": { + "p50": 192.25600361824036, + "p90": 198.2080042362213, + "p95": 198.7839937210083, + "p99": 203.61599326133728 + }, + "isolatedSum": { + "p50": 208.48000049591064, + "p90": 215.07199853658676, + "p95": 217.8559973835945, + "p99": 236.57599091529846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a995e296", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "fe9431c5beaaf675", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:39.072562+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_03", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "wide-dynamic-range", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272150514", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272150514", + "createdAt": "2026-06-27T00:05:10Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 1758.687973022461, + "p90": 2565.7920837402344, + "p95": 2910.815954208374, + "p99": 3400.576114654541 + }, + "combine": { + "p50": 1759.8719596862793, + "p90": 1907.871961593628, + "p95": 2670.1760292053223, + "p99": 2940.095901489258 + }, + "roundtrip": { + "p50": 1802.39999294281, + "p90": 1987.0719909667969, + "p95": 2666.1760807037354, + "p99": 2924.000024795532 + }, + "isolatedSum": { + "p50": 3518.5599327087402, + "p90": 4473.664045333862, + "p95": 5580.991983413696, + "p99": 6340.672016143799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 1754.8799514770508, + "p90": 2488.703966140747, + "p95": 2823.359966278076, + "p99": 3391.4880752563477 + }, + "combine": { + "p50": 1760.4479789733887, + "p90": 1861.184000968933, + "p95": 2647.264003753662, + "p99": 2955.8401107788086 + }, + "roundtrip": { + "p50": 1819.2960023880005, + "p90": 1958.5280418395996, + "p95": 2686.271905899048, + "p99": 2968.319892883301 + }, + "isolatedSum": { + "p50": 3515.3279304504395, + "p90": 4349.88796710968, + "p95": 5470.623970031738, + "p99": 6347.328186035156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 1767.3920392990112, + "p90": 2204.767942428589, + "p95": 2829.9520015716553, + "p99": 3398.303985595703 + }, + "combine": { + "p50": 1764.0960216522217, + "p90": 1887.1040344238281, + "p95": 2647.615909576416, + "p99": 3015.5839920043945 + }, + "roundtrip": { + "p50": 1835.6800079345703, + "p90": 1997.1840381622314, + "p95": 2681.3440322875977, + "p99": 2967.072010040283 + }, + "isolatedSum": { + "p50": 3531.488060951233, + "p90": 4091.871976852417, + "p95": 5477.567911148071, + "p99": 6413.887977600098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 1790.7520532608032, + "p90": 2270.848035812378, + "p95": 2845.247983932495, + "p99": 3459.712028503418 + }, + "combine": { + "p50": 1809.7599744796753, + "p90": 1956.9599628448486, + "p95": 2685.7919692993164, + "p99": 3029.952049255371 + }, + "roundtrip": { + "p50": 1890.3039693832397, + "p90": 2169.4719791412354, + "p95": 2888.256072998047, + "p99": 3985.24808883667 + }, + "isolatedSum": { + "p50": 3600.5120277404785, + "p90": 4227.807998657227, + "p95": 5531.0399532318115, + "p99": 6489.664077758789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b81422f4", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "d97d7a8231265a6c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:13.336317+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_13", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272142980", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272142980", + "createdAt": "2026-06-27T00:04:57Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.63999915122986, + "p90": 59.26400050520897, + "p95": 62.04799935221672, + "p99": 73.85600358247757 + }, + "combine": { + "p50": 66.43199920654297, + "p90": 67.4239993095398, + "p95": 68.25599819421768, + "p99": 78.04799824953079 + }, + "roundtrip": { + "p50": 106.78400099277496, + "p90": 111.39199882745743, + "p95": 113.34399878978729, + "p99": 117.0239970088005 + }, + "isolatedSum": { + "p50": 123.07199835777283, + "p90": 126.68799981474876, + "p95": 130.3039975464344, + "p99": 151.90400183200836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.55199897289276, + "p90": 61.824001371860504, + "p95": 63.680000603199005, + "p99": 71.07199728488922 + }, + "combine": { + "p50": 68.92800331115723, + "p90": 77.7600035071373, + "p95": 77.95199751853943, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 120.03199756145477, + "p90": 124.4800016283989, + "p95": 125.95200538635254, + "p99": 145.53600549697876 + }, + "isolatedSum": { + "p50": 128.48000228405, + "p90": 139.5840048789978, + "p95": 141.63199812173843, + "p99": 149.72800016403198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.83200258016586, + "p90": 72.38399982452393, + "p95": 75.16799867153168, + "p99": 78.17599922418594 + }, + "combine": { + "p50": 78.65600287914276, + "p90": 79.71200346946716, + "p95": 80.57600259780884, + "p99": 100.92800110578537 + }, + "roundtrip": { + "p50": 130.72000443935394, + "p90": 134.2719942331314, + "p95": 135.74400544166565, + "p99": 155.7759940624237 + }, + "isolatedSum": { + "p50": 147.48800545930862, + "p90": 152.0960032939911, + "p95": 155.74400126934052, + "p99": 179.1040003299713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.34400296211243, + "p90": 95.93600034713745, + "p95": 99.2640033364296, + "p99": 107.61599987745285 + }, + "combine": { + "p50": 115.4559999704361, + "p90": 116.44800007343292, + "p95": 117.0559972524643, + "p99": 126.43200159072876 + }, + "roundtrip": { + "p50": 192.9599940776825, + "p90": 198.81600141525269, + "p95": 199.8080015182495, + "p99": 274.1439938545227 + }, + "isolatedSum": { + "p50": 208.80000293254852, + "p90": 212.38400042057037, + "p95": 216.3200005888939, + "p99": 234.0480014681816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a22ca77b", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", + "colorKey": "b300_77566238", + "comparisonKey": "08fb0b4fb4077abb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:04.079730+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_02", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2279937619f3971", + "workloadId": "set:4:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271873027", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271873027", + "createdAt": "2026-06-26T23:56:21Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.41600117087364, + "p90": 58.848001062870026, + "p95": 61.216000467538834, + "p99": 80.25600016117096 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 69.60000097751617, + "p95": 76.73600316047668, + "p99": 82.62400329113007 + }, + "roundtrip": { + "p50": 106.49599879980087, + "p90": 109.27999764680862, + "p95": 111.13599687814713, + "p99": 124.1919994354248 + }, + "isolatedSum": { + "p50": 124.09600242972374, + "p90": 128.4480020403862, + "p95": 137.95200362801552, + "p99": 162.88000345230103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.43200162053108, + "p90": 60.70400029420853, + "p95": 62.6240000128746, + "p99": 78.65600287914276 + }, + "combine": { + "p50": 77.98399776220322, + "p90": 78.72000336647034, + "p95": 78.84799689054489, + "p99": 81.4720019698143 + }, + "roundtrip": { + "p50": 118.07999759912491, + "p90": 122.91199713945389, + "p95": 124.1919994354248, + "p99": 131.99999928474426 + }, + "isolatedSum": { + "p50": 136.4159993827343, + "p90": 139.42400366067886, + "p95": 141.4719969034195, + "p99": 160.12800484895706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.82400268316269, + "p90": 71.87200337648392, + "p95": 73.7600028514862, + "p99": 84.25600081682205 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 81.08799904584885, + "p95": 81.91999793052673, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 133.82400572299957, + "p90": 140.09599387645721, + "p95": 141.92000031471252, + "p99": 145.82400023937225 + }, + "isolatedSum": { + "p50": 148.99200201034546, + "p90": 152.96000242233276, + "p95": 155.68000078201294, + "p99": 174.97599869966507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.76000064611435, + "p90": 107.71200060844421, + "p95": 109.02400314807892, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 130.36799430847168, + "p90": 139.615997672081, + "p95": 140.03199338912964, + "p99": 143.13599467277527 + }, + "roundtrip": { + "p50": 230.68800568580627, + "p90": 234.52800512313843, + "p95": 235.55199801921844, + "p99": 240.09600281715393 + }, + "isolatedSum": { + "p50": 236.12799495458603, + "p90": 247.3279982805252, + "p95": 249.05599653720856, + "p99": 257.9199969768524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c5ecae32", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_a314501b", + "comparisonKey": "a145623f8abcc709", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:12.406102+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_06", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": "set:4:2eebbed158fe1320", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271879618", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271879618", + "createdAt": "2026-06-26T23:56:35Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.97600269317627, + "p90": 65.21599739789963, + "p95": 66.01600348949432, + "p99": 75.74400305747986 + }, + "combine": { + "p50": 54.336000233888626, + "p90": 55.26399984955788, + "p95": 56.60799890756607, + "p99": 65.5359998345375 + }, + "roundtrip": { + "p50": 94.94400024414062, + "p90": 98.27200323343277, + "p95": 100.63999891281128, + "p99": 111.93600296974182 + }, + "isolatedSum": { + "p50": 117.3120029270649, + "p90": 120.4799972474575, + "p95": 122.6240023970604, + "p99": 141.28000289201736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.39200147986412, + "p90": 61.63199990987778, + "p95": 62.65600025653839, + "p99": 71.68000191450119 + }, + "combine": { + "p50": 56.73599988222122, + "p90": 65.34399837255478, + "p95": 65.95200300216675, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 108.57599973678589, + "p90": 113.56800049543381, + "p95": 114.84800279140472, + "p99": 120.12799829244614 + }, + "isolatedSum": { + "p50": 116.12800136208534, + "p90": 126.97599828243256, + "p95": 128.60800325870514, + "p99": 157.1200042963028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.72000205516815, + "p90": 76.57600194215775, + "p95": 77.88799703121185, + "p99": 85.31200140714645 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 67.32799857854843, + "p95": 67.61600077152252, + "p99": 78.84799689054489 + }, + "roundtrip": { + "p50": 120.51200121641159, + "p90": 123.99999797344208, + "p95": 124.64000284671783, + "p99": 130.0159990787506 + }, + "isolatedSum": { + "p50": 137.34400272369385, + "p90": 143.90400052070618, + "p95": 145.50399780273438, + "p99": 164.15999829769135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 70.11199742555618, + "p90": 71.87200337648392, + "p95": 73.79200309515, + "p99": 79.64800298213959 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 70.04799693822861, + "p95": 76.92799717187881, + "p99": 79.1039988398552 + }, + "roundtrip": { + "p50": 122.23999947309494, + "p90": 129.5360028743744, + "p95": 131.32800161838531, + "p99": 142.87999272346497 + }, + "isolatedSum": { + "p50": 138.46399635076523, + "p90": 141.92000031471252, + "p95": 150.7200002670288, + "p99": 158.75200182199478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-72792847", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", + "colorKey": "b300_5b993222", + "comparisonKey": "10e590b8f933d382", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:30.886921+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_10", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2ad5ef98d328fa1", + "workloadId": "set:4:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271900377", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271900377", + "createdAt": "2026-06-26T23:57:16Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.96000158786774, + "p90": 59.10399928689003, + "p95": 62.272001057863235, + "p99": 71.68000191450119 + }, + "combine": { + "p50": 66.39999896287918, + "p90": 67.07199662923813, + "p95": 67.45599955320358, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 106.91200196743011, + "p90": 113.40799927711487, + "p95": 117.18399822711945, + "p99": 195.77600061893463 + }, + "isolatedSum": { + "p50": 123.36000055074692, + "p90": 126.17599591612816, + "p95": 129.72800061106682, + "p99": 161.8560031056404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.58399921655655, + "p90": 62.65600025653839, + "p95": 65.34399837255478, + "p99": 81.85599744319916 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 77.11999863386154, + "p95": 77.79199630022049, + "p99": 79.9039974808693 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 125.791996717453, + "p95": 127.71199643611908, + "p99": 145.82400023937225 + }, + "isolatedSum": { + "p50": 127.58400291204453, + "p90": 139.77599889039993, + "p95": 143.13599467277527, + "p99": 161.75999492406845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.63200122117996, + "p90": 75.32799988985062, + "p95": 77.27999985218048, + "p99": 98.08000177145004 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 79.26400005817413, + "p95": 79.45600152015686, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 133.53599607944489, + "p90": 137.15200126171112, + "p95": 138.5280042886734, + "p99": 155.10399639606476 + }, + "isolatedSum": { + "p50": 148.25600385665894, + "p90": 154.59199994802475, + "p95": 156.73600137233734, + "p99": 187.83999979496002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.39199751615524, + "p90": 104.96000200510025, + "p95": 106.62399977445602, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 127.80800461769104, + "p90": 129.2160004377365, + "p95": 130.5920034646988, + "p99": 150.62400698661804 + }, + "roundtrip": { + "p50": 215.87200462818146, + "p90": 223.07200729846954, + "p95": 224.7679978609085, + "p99": 231.32799565792084 + }, + "isolatedSum": { + "p50": 231.20000213384628, + "p90": 234.17600244283676, + "p95": 237.21600323915482, + "p99": 261.4400088787079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cc647506", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "b300_8d2811e3", + "comparisonKey": "478acd4108c50326", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:32.426052+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271886823", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823", + "createdAt": "2026-06-26T23:56:49Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.703999638557434, + "p90": 59.90400165319443, + "p95": 62.65600025653839, + "p99": 69.98399645090103 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 66.43199920654297, + "p95": 66.72000139951706, + "p99": 73.7600028514862 + }, + "roundtrip": { + "p50": 107.16799646615982, + "p90": 112.83200234174728, + "p95": 114.14399743080139, + "p99": 120.44800072908401 + }, + "isolatedSum": { + "p50": 122.5920021533966, + "p90": 126.3360008597374, + "p95": 129.37600165605545, + "p99": 143.74399930238724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.848001062870026, + "p90": 60.80000102519989, + "p95": 62.84800171852112, + "p99": 74.40000027418137 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 70.30399888753891, + "p95": 76.99199765920639, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 116.54400080442429, + "p90": 123.29600006341934, + "p95": 124.83199685811996, + "p99": 130.46400249004364 + }, + "isolatedSum": { + "p50": 126.848004758358, + "p90": 131.1039999127388, + "p95": 139.8399993777275, + "p99": 152.96000242233276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.9039968252182, + "p90": 78.27199995517731, + "p95": 79.52000200748444, + "p99": 87.5839963555336 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 79.19999957084656, + "p95": 79.71200346946716, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 134.24000144004822, + "p90": 138.20800185203552, + "p95": 139.5840048789978, + "p99": 144.3520039319992 + }, + "isolatedSum": { + "p50": 154.30399775505066, + "p90": 157.47199952602386, + "p95": 159.2320054769516, + "p99": 171.23199999332428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.07200253009796, + "p90": 105.98400235176086, + "p95": 107.04000294208527, + "p99": 113.21599781513214 + }, + "combine": { + "p50": 127.13600695133209, + "p90": 128.1599998474121, + "p95": 128.57599556446075, + "p99": 131.04000687599182 + }, + "roundtrip": { + "p50": 209.1200053691864, + "p90": 214.30400013923645, + "p95": 216.12800657749176, + "p99": 229.66399788856506 + }, + "isolatedSum": { + "p50": 230.20800948143005, + "p90": 234.14400219917297, + "p95": 235.61599850654602, + "p99": 244.25600469112396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3bfb4348", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", + "colorKey": "b300_2e44c039", + "comparisonKey": "5c5e6a7ecdec195f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:26.448327+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_16", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fddabb3277bec", + "workloadId": "set:4:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271893428", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271893428", + "createdAt": "2026-06-26T23:57:02Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.832000613212585, + "p90": 63.71200084686279, + "p95": 64.54399973154068, + "p99": 69.88800317049026 + }, + "combine": { + "p50": 55.67999929189682, + "p90": 58.20799991488457, + "p95": 64.86400216817856, + "p99": 68.89600306749344 + }, + "roundtrip": { + "p50": 94.52799707651138, + "p90": 99.2640033364296, + "p95": 101.56799852848053, + "p99": 107.04000294208527 + }, + "isolatedSum": { + "p50": 112.5119999051094, + "p90": 121.92000076174736, + "p95": 129.40800189971924, + "p99": 138.7840062379837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 56.92800134420395, + "p90": 59.039998799562454, + "p95": 60.5119988322258, + "p99": 66.04799628257751 + }, + "combine": { + "p50": 56.63999915122986, + "p90": 66.23999774456024, + "p95": 66.56000018119812, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 107.80800133943558, + "p90": 113.43999952077866, + "p95": 114.656001329422, + "p99": 124.22399967908859 + }, + "isolatedSum": { + "p50": 113.56800049543381, + "p90": 125.2799965441227, + "p95": 127.07199901342392, + "p99": 144.95999366044998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 67.90400296449661, + "p90": 74.07999783754349, + "p95": 75.93599706888199, + "p99": 82.2720006108284 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 70.0799971818924, + "p95": 77.05599814653397, + "p99": 79.26400005817413 + }, + "roundtrip": { + "p50": 120.4800009727478, + "p90": 124.89599734544754, + "p95": 126.27199292182922, + "p99": 140.99200069904327 + }, + "isolatedSum": { + "p50": 135.80800592899323, + "p90": 144.15999501943588, + "p95": 152.99199521541595, + "p99": 161.53600066900253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.77599847316742, + "p90": 94.17600184679031, + "p95": 95.74399888515472, + "p99": 114.20799791812897 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 119.19999867677689, + "p95": 126.36800110340118, + "p99": 130.43199479579926 + }, + "roundtrip": { + "p50": 194.0159946680069, + "p90": 201.08799636363983, + "p95": 202.84800231456757, + "p99": 212.92799711227417 + }, + "isolatedSum": { + "p50": 208.0639973282814, + "p90": 213.3760005235672, + "p95": 222.1119999885559, + "p99": 244.63999271392822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f0dd83d8", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_c1ad910f", + "comparisonKey": "80e2eefb7447672f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:41:08.828331+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:29:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.992001831531525, + "p90": 59.039998799562454, + "p95": 61.824001371860504, + "p99": 73.44000041484833 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 67.4239993095398, + "p95": 68.15999746322632, + "p99": 77.47200131416321 + }, + "roundtrip": { + "p50": 106.81600123643875, + "p90": 113.08799684047699, + "p95": 114.23999816179276, + "p99": 135.6479972600937 + }, + "isolatedSum": { + "p50": 123.32800030708313, + "p90": 126.46399810910225, + "p95": 129.98399883508682, + "p99": 150.91200172901154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.992001831531525, + "p90": 58.78400057554245, + "p95": 60.92799827456474, + "p99": 73.21599870920181 + }, + "combine": { + "p50": 67.32799857854843, + "p90": 69.11999732255936, + "p95": 70.65600156784058, + "p99": 79.93599772453308 + }, + "roundtrip": { + "p50": 106.9440022110939, + "p90": 109.40799862146378, + "p95": 110.88000237941742, + "p99": 119.39200013875961 + }, + "isolatedSum": { + "p50": 124.32000041007996, + "p90": 127.9039978981018, + "p95": 131.58399984240532, + "p99": 153.1519964337349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.792000472545624, + "p90": 59.39200147986412, + "p95": 61.28000095486641, + "p99": 68.09599697589874 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 69.66400146484375, + "p95": 76.99199765920639, + "p99": 78.75200361013412 + }, + "roundtrip": { + "p50": 116.22399836778641, + "p90": 122.68800288438797, + "p95": 124.35200065374374, + "p99": 127.93600559234619 + }, + "isolatedSum": { + "p50": 125.60000270605087, + "p90": 129.05600294470787, + "p95": 138.2719986140728, + "p99": 146.84800058603287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.29600074887276, + "p90": 61.15199998021126, + "p95": 62.39999830722809, + "p99": 68.1919977068901 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 77.31200009584427, + "p95": 77.72800326347351, + "p99": 78.78399640321732 + }, + "roundtrip": { + "p50": 120.25599926710129, + "p90": 125.82400441169739, + "p95": 126.75200402736664, + "p99": 133.44000279903412 + }, + "isolatedSum": { + "p50": 127.67999991774559, + "p90": 138.46400007605553, + "p95": 140.1280015707016, + "p99": 146.97599411010742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.78400123119354, + "p90": 69.023996591568, + "p95": 71.03999704122543, + "p99": 76.73600316047668 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 78.5600021481514, + "p95": 78.72000336647034, + "p99": 80.86399734020233 + }, + "roundtrip": { + "p50": 119.61600184440613, + "p90": 122.72000312805176, + "p95": 124.35200065374374, + "p99": 131.29599392414093 + }, + "isolatedSum": { + "p50": 140.03200083971024, + "p90": 147.5839987397194, + "p95": 149.76000040769577, + "p99": 157.60000050067902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.24799829721451, + "p90": 70.91200351715088, + "p95": 73.69600236415863, + "p99": 81.69600367546082 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 79.80799674987793, + "p95": 80.73599636554718, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 130.68799674510956, + "p90": 135.23200154304504, + "p95": 136.51199638843536, + "p99": 140.47999680042267 + }, + "isolatedSum": { + "p50": 147.8400006890297, + "p90": 150.7200002670288, + "p95": 154.4319987297058, + "p99": 172.64000326395035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 82.49600231647491, + "p90": 92.70399808883667, + "p95": 95.0080007314682, + "p99": 99.45599734783173 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 100.09600222110748, + "p95": 102.36799716949463, + "p99": 106.65600001811981 + }, + "roundtrip": { + "p50": 158.65600109100342, + "p90": 163.00800442695618, + "p95": 164.19200599193573, + "p99": 169.50400173664093 + }, + "isolatedSum": { + "p50": 174.75200444459915, + "p90": 192.80000030994415, + "p95": 197.37599790096283, + "p99": 206.11199736595154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.91999989748001, + "p90": 95.83999961614609, + "p95": 98.04800152778625, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 115.35999923944473, + "p90": 115.93600362539291, + "p95": 116.60800129175186, + "p99": 119.45600062608719 + }, + "roundtrip": { + "p50": 192.51200556755066, + "p90": 198.88000190258026, + "p95": 199.48799908161163, + "p99": 209.47200059890747 + }, + "isolatedSum": { + "p50": 209.27999913692474, + "p90": 211.776003241539, + "p95": 214.65600281953812, + "p99": 224.44800287485123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dede7717", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", + "colorKey": "b300_0622d929", + "comparisonKey": "c4ede73885f09b56", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:12:16.850895+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_17", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254508907", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", + "createdAt": "2026-06-26T17:30:32Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.69599974155426, + "p90": 60.06399914622307, + "p95": 61.664000153541565, + "p99": 77.7600035071373 + }, + "combine": { + "p50": 68.03199648857117, + "p90": 69.76000219583511, + "p95": 76.92799717187881, + "p99": 78.52800190448761 + }, + "roundtrip": { + "p50": 107.80800133943558, + "p90": 110.59200018644333, + "p95": 112.19199746847153, + "p99": 128.76799702644348 + }, + "isolatedSum": { + "p50": 125.72799623012543, + "p90": 129.82400134205818, + "p95": 138.59199732542038, + "p99": 156.2880054116249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.559998869895935, + "p90": 60.15999987721443, + "p95": 61.664000153541565, + "p99": 72.76800274848938 + }, + "combine": { + "p50": 68.25599819421768, + "p90": 76.86399668455124, + "p95": 77.53600180149078, + "p99": 79.9039974808693 + }, + "roundtrip": { + "p50": 116.22399836778641, + "p90": 122.11199849843979, + "p95": 123.07199835777283, + "p99": 127.9039978981018 + }, + "isolatedSum": { + "p50": 126.81599706411362, + "p90": 137.02399656176567, + "p95": 139.20000195503235, + "p99": 152.67200022935867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.59199911355972, + "p90": 60.5119988322258, + "p95": 61.664000153541565, + "p99": 69.66400146484375 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 78.40000092983246, + "p95": 78.52800190448761, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 121.66400253772736, + "p90": 125.37600100040436, + "p95": 127.20000743865967, + "p99": 135.74400544166565 + }, + "isolatedSum": { + "p50": 128.60799580812454, + "p90": 138.91199976205826, + "p95": 140.19200205802917, + "p99": 150.88000148534775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.61599946022034, + "p90": 61.95199862122536, + "p95": 63.90400230884552, + "p99": 71.52000069618225 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 78.65600287914276, + "p95": 78.94399762153625, + "p99": 89.28000181913376 + }, + "roundtrip": { + "p50": 119.80800330638885, + "p90": 122.65600264072418, + "p95": 124.83199685811996, + "p99": 136.83199882507324 + }, + "isolatedSum": { + "p50": 137.02400028705597, + "p90": 140.60800150036812, + "p95": 142.84799993038177, + "p99": 160.800002515316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.91999661922455, + "p90": 76.09599828720093, + "p95": 78.04799824953079, + "p99": 85.24800091981888 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 79.1039988398552, + "p95": 79.39200103282928, + "p99": 85.08799970149994 + }, + "roundtrip": { + "p50": 121.44000083208084, + "p90": 126.94400548934937, + "p95": 128.92800569534302, + "p99": 145.31199634075165 + }, + "isolatedSum": { + "p50": 152.319997549057, + "p90": 155.19999712705612, + "p95": 157.43999928236008, + "p99": 170.33600062131882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.07199728488922, + "p90": 72.86400347948074, + "p95": 73.47200065851212, + "p99": 82.40000158548355 + }, + "combine": { + "p50": 80.06399869918823, + "p90": 81.37600123882294, + "p95": 81.82399719953537, + "p99": 89.88799899816513 + }, + "roundtrip": { + "p50": 134.36800241470337, + "p90": 141.56800508499146, + "p95": 143.99999380111694, + "p99": 148.80000054836273 + }, + "isolatedSum": { + "p50": 151.13599598407745, + "p90": 154.24000471830368, + "p95": 155.29599785804749, + "p99": 172.28800058364868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.34400165081024, + "p90": 89.63199704885483, + "p95": 91.93599969148636, + "p99": 96.57599776983261 + }, + "combine": { + "p50": 93.98400038480759, + "p90": 103.10400277376175, + "p95": 103.29599678516388, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 169.3439930677414, + "p90": 172.89599776268005, + "p95": 175.87199807167053, + "p99": 196.16000354290009 + }, + "isolatedSum": { + "p50": 179.32800203561783, + "p90": 192.73599982261658, + "p95": 195.23199647665024, + "p99": 202.4959996342659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.13600367307663, + "p90": 109.79200154542923, + "p95": 111.7120012640953, + "p99": 131.96800649166107 + }, + "combine": { + "p50": 130.49599528312683, + "p90": 139.52000439167023, + "p95": 139.8719996213913, + "p99": 140.54399728775024 + }, + "roundtrip": { + "p50": 231.1680018901825, + "p90": 235.00800132751465, + "p95": 236.7040067911148, + "p99": 257.6960027217865 + }, + "isolatedSum": { + "p50": 237.63199895620346, + "p90": 249.31200593709946, + "p95": 251.5840008854866, + "p99": 272.5120037794113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e56568fe", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", + "colorKey": "b300_01ab5b1a", + "comparisonKey": "1f56c3705f670037", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:38:03.696815+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271231753", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", + "createdAt": "2026-06-26T23:36:29Z", + "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 55.904000997543335, + "p90": 59.776000678539276, + "p95": 65.72800129652023, + "p99": 85.11999994516373 + }, + "combine": { + "p50": 65.60000032186508, + "p90": 66.3679987192154, + "p95": 66.91200286149979, + "p99": 76.86399668455124 + }, + "roundtrip": { + "p50": 105.05600273609161, + "p90": 111.35999858379364, + "p95": 112.96000331640244, + "p99": 121.05599790811539 + }, + "isolatedSum": { + "p50": 121.50400131940842, + "p90": 126.14399939775467, + "p95": 132.64000415802002, + "p99": 161.98399662971497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 55.84000051021576, + "p90": 57.56799876689911, + "p95": 60.095999389886856, + "p99": 72.4480003118515 + }, + "combine": { + "p50": 65.69600105285645, + "p90": 66.3679987192154, + "p95": 66.84800237417221, + "p99": 69.2799985408783 + }, + "roundtrip": { + "p50": 104.76800054311752, + "p90": 109.40799862146378, + "p95": 112.03200370073318, + "p99": 159.19999778270721 + }, + "isolatedSum": { + "p50": 121.5360015630722, + "p90": 123.9359974861145, + "p95": 126.94400176405907, + "p99": 141.7279988527298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.37600103020668, + "p90": 60.80000102519989, + "p95": 65.76000154018402, + "p99": 95.8079993724823 + }, + "combine": { + "p50": 66.59200042486191, + "p90": 77.18399912118912, + "p95": 77.82399654388428, + "p99": 79.16799932718277 + }, + "roundtrip": { + "p50": 106.91200196743011, + "p90": 112.38399893045425, + "p95": 115.23199826478958, + "p99": 124.22399967908859 + }, + "isolatedSum": { + "p50": 123.96800145506859, + "p90": 137.984000146389, + "p95": 143.5839980840683, + "p99": 174.97599869966507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.6559996008873, + "p90": 63.231997191905975, + "p95": 65.60000032186508, + "p99": 69.47200000286102 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 76.48000121116638, + "p95": 77.15199887752533, + "p99": 84.1279998421669 + }, + "roundtrip": { + "p50": 122.11199849843979, + "p90": 125.34399330615997, + "p95": 128.4479945898056, + "p99": 151.5520066022873 + }, + "isolatedSum": { + "p50": 126.78399682044983, + "p90": 139.71199840307236, + "p95": 142.7519991993904, + "p99": 153.59999984502792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.487998485565186, + "p90": 65.24799764156342, + "p95": 67.00800359249115, + "p99": 73.56800138950348 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 77.34400033950806, + "p95": 77.88799703121185, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 119.1679984331131, + "p90": 124.67200309038162, + "p95": 125.69600343704224, + "p99": 134.5600038766861 + }, + "isolatedSum": { + "p50": 127.61599570512772, + "p90": 142.59199798107147, + "p95": 144.896000623703, + "p99": 163.10399770736694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.52800124883652, + "p90": 76.51200145483017, + "p95": 77.18399912118912, + "p99": 81.7599967122078 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 78.78399640321732, + "p95": 79.26400005817413, + "p99": 81.85599744319916 + }, + "roundtrip": { + "p50": 132.32000172138214, + "p90": 135.6160044670105, + "p95": 136.31999492645264, + "p99": 141.66399836540222 + }, + "isolatedSum": { + "p50": 152.44799852371216, + "p90": 155.29599785804749, + "p95": 156.44799917936325, + "p99": 163.61599415540695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.19199967384338, + "p90": 81.88799768686295, + "p95": 83.52000266313553, + "p99": 90.30400216579437 + }, + "combine": { + "p50": 90.59199690818787, + "p90": 91.67999774217606, + "p95": 92.57599711418152, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 155.45600652694702, + "p90": 160.5760008096695, + "p95": 161.98399662971497, + "p99": 169.53599452972412 + }, + "isolatedSum": { + "p50": 170.78399658203125, + "p90": 173.567995429039, + "p95": 176.09599977731705, + "p99": 191.52000546455383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.69599950313568, + "p90": 105.15200346708298, + "p95": 106.04800283908844, + "p99": 115.167997777462 + }, + "combine": { + "p50": 126.81600451469421, + "p90": 127.77599692344666, + "p95": 128.12800705432892, + "p99": 131.71200454235077 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 212.41599321365356, + "p95": 215.45599400997162, + "p99": 240.79999327659607 + }, + "isolatedSum": { + "p50": 228.5120040178299, + "p90": 232.92800039052963, + "p95": 234.17600989341736, + "p99": 246.88000231981277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a499b6fe", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", + "colorKey": "b300_085c12d4", + "comparisonKey": "f41671f558a3c8d2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:23:15.234137+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_10", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255311146", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", + "createdAt": "2026-06-26T17:45:43Z", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.86400085687637, + "p90": 59.7120001912117, + "p95": 63.32799792289734, + "p99": 72.64000177383423 + }, + "combine": { + "p50": 64.83200192451477, + "p90": 66.46399945020676, + "p95": 66.94400310516357, + "p99": 76.51200145483017 + }, + "roundtrip": { + "p50": 105.12000322341919, + "p90": 110.72000116109848, + "p95": 111.7440015077591, + "p99": 122.56000190973282 + }, + "isolatedSum": { + "p50": 121.69600278139114, + "p90": 126.17599964141846, + "p95": 130.2720010280609, + "p99": 149.1520032286644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.5999990105629, + "p90": 59.808000922203064, + "p95": 62.07999959588051, + "p99": 71.45600020885468 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 67.00800359249115, + "p95": 67.29599833488464, + "p99": 76.92799717187881 + }, + "roundtrip": { + "p50": 106.27199709415436, + "p90": 108.22399705648422, + "p95": 110.01600325107574, + "p99": 132.54399597644806 + }, + "isolatedSum": { + "p50": 123.87199699878693, + "p90": 126.81600451469421, + "p95": 129.37599793076515, + "p99": 148.3839973807335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.53599852323532, + "p90": 59.808000922203064, + "p95": 60.70400029420853, + "p99": 67.87200272083282 + }, + "combine": { + "p50": 66.43199920654297, + "p90": 67.45599955320358, + "p95": 69.31199878454208, + "p99": 78.78399640321732 + }, + "roundtrip": { + "p50": 106.6880002617836, + "p90": 109.50399935245514, + "p95": 111.87200248241425, + "p99": 125.08800625801086 + }, + "isolatedSum": { + "p50": 123.96799772977829, + "p90": 127.26400047540665, + "p95": 130.0159990787506, + "p99": 146.65599912405014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.848001062870026, + "p90": 61.15199998021126, + "p95": 64.41599875688553, + "p99": 78.14399898052216 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 76.25599950551987, + "p95": 76.92799717187881, + "p99": 79.64800298213959 + }, + "roundtrip": { + "p50": 116.28799885511398, + "p90": 122.8799968957901, + "p95": 124.70400333404541, + "p99": 145.08800208568573 + }, + "isolatedSum": { + "p50": 127.1359995007515, + "p90": 137.40799948573112, + "p95": 141.34399592876434, + "p99": 157.79200196266174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.54399907588959, + "p90": 66.14399701356888, + "p95": 68.67200136184692, + "p99": 83.29600095748901 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 77.2159993648529, + "p95": 77.82399654388428, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 126.0479986667633, + "p95": 127.16799974441528, + "p99": 131.1040073633194 + }, + "isolatedSum": { + "p50": 129.18400019407272, + "p90": 143.35999637842178, + "p95": 146.4959979057312, + "p99": 162.20799833536148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.31199878454208, + "p90": 75.52000135183334, + "p95": 76.4160007238388, + "p99": 83.20000022649765 + }, + "combine": { + "p50": 78.46400141716003, + "p90": 79.26400005817413, + "p95": 79.45600152015686, + "p99": 82.40000158548355 + }, + "roundtrip": { + "p50": 132.192000746727, + "p90": 135.6479972600937, + "p95": 136.3839954137802, + "p99": 147.20000326633453 + }, + "isolatedSum": { + "p50": 147.77600020170212, + "p90": 154.78400141000748, + "p95": 155.87200224399567, + "p99": 165.6000018119812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.8399983048439, + "p90": 90.30400216579437, + "p95": 91.87199920415878, + "p99": 100.0640019774437 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 93.08800101280212, + "p95": 93.85599941015244, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 157.44000673294067, + "p90": 162.4639928340912, + "p95": 163.71199488639832, + "p99": 168.89600455760956 + }, + "isolatedSum": { + "p50": 179.1360005736351, + "p90": 183.3920031785965, + "p95": 185.72799861431122, + "p99": 208.19199830293655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.9520001411438, + "p90": 98.39999675750732, + "p95": 100.00000149011612, + "p99": 105.53599894046783 + }, + "combine": { + "p50": 115.29599875211716, + "p90": 116.12799763679504, + "p95": 116.48000031709671, + "p99": 127.87200510501862 + }, + "roundtrip": { + "p50": 193.08799505233765, + "p90": 199.90399479866028, + "p95": 201.50400698184967, + "p99": 214.1759991645813 + }, + "isolatedSum": { + "p50": 209.24799889326096, + "p90": 214.52799439430237, + "p95": 216.48000180721283, + "p99": 233.40800404548645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8481f6a4", + "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_63f1354f", + "comparisonKey": "63f9b5a5300d4d4b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:09:35.317427+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_16", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254489726", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", + "createdAt": "2026-06-26T17:30:12Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 50.303999334573746, + "p90": 52.06400156021118, + "p95": 53.82400006055832, + "p99": 65.05600363016129 + }, + "combine": { + "p50": 66.56000018119812, + "p90": 68.2239979505539, + "p95": 68.76800209283829, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 99.84000027179718, + "p90": 103.90400141477585, + "p95": 107.51999914646149, + "p99": 117.11999773979187 + }, + "isolatedSum": { + "p50": 116.86399951577187, + "p90": 120.28799951076508, + "p95": 122.5920021533966, + "p99": 143.0080011487007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 51.263999193906784, + "p90": 52.89600044488907, + "p95": 55.32800033688545, + "p99": 65.18399715423584 + }, + "combine": { + "p50": 66.97600334882736, + "p90": 68.7360018491745, + "p95": 69.11999732255936, + "p99": 78.11199873685837 + }, + "roundtrip": { + "p50": 100.99200159311295, + "p90": 103.26399654150009, + "p95": 105.76000064611435, + "p99": 113.6000007390976 + }, + "isolatedSum": { + "p50": 118.24000254273415, + "p90": 121.63200229406357, + "p95": 124.44799765944481, + "p99": 143.2959958910942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.4880008995533, + "p90": 53.408000618219376, + "p95": 54.9440011382103, + "p99": 61.63199990987778 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 69.60000097751617, + "p95": 76.89599692821503, + "p99": 79.16799932718277 + }, + "roundtrip": { + "p50": 108.73600095510483, + "p90": 115.80800265073776, + "p95": 117.0239970088005, + "p99": 124.35200065374374 + }, + "isolatedSum": { + "p50": 119.1680021584034, + "p90": 123.00800159573555, + "p95": 131.83999806642532, + "p99": 140.79999923706055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.639998495578766, + "p90": 55.64799904823303, + "p95": 59.39200147986412, + "p99": 68.00000369548798 + }, + "combine": { + "p50": 68.25599819421768, + "p90": 77.08799839019775, + "p95": 77.60000228881836, + "p99": 78.94399762153625 + }, + "roundtrip": { + "p50": 113.69600147008896, + "p90": 117.66400188207626, + "p95": 118.72000247240067, + "p99": 121.18399888277054 + }, + "isolatedSum": { + "p50": 120.89599668979645, + "p90": 132.7359974384308, + "p95": 136.99200376868248, + "p99": 146.94400131702423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.792001128196716, + "p90": 67.90400296449661, + "p95": 68.67200136184692, + "p99": 71.1359977722168 + }, + "combine": { + "p50": 70.46400010585785, + "p90": 78.40000092983246, + "p95": 78.59200239181519, + "p99": 81.44000172615051 + }, + "roundtrip": { + "p50": 113.18399757146835, + "p90": 115.9679964184761, + "p95": 117.53600090742111, + "p99": 127.87200510501862 + }, + "isolatedSum": { + "p50": 132.25600123405457, + "p90": 146.30400389432907, + "p95": 147.2640037536621, + "p99": 152.5759994983673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 62.65600025653839, + "p90": 64.92800265550613, + "p95": 66.880002617836, + "p99": 73.69600236415863 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 79.74400371313095, + "p95": 80.64000308513641, + "p99": 85.63199639320374 + }, + "roundtrip": { + "p50": 124.28800016641617, + "p90": 127.93600559234619, + "p95": 130.43199479579926, + "p99": 138.5599970817566 + }, + "isolatedSum": { + "p50": 141.24800264835358, + "p90": 144.67200636863708, + "p95": 147.5200057029724, + "p99": 159.32799875736237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 75.77600330114365, + "p90": 83.16799998283386, + "p95": 83.96799862384796, + "p99": 96.3520035147667 + }, + "combine": { + "p50": 91.48799628019333, + "p90": 93.6959981918335, + "p95": 95.90400010347366, + "p99": 104.76800054311752 + }, + "roundtrip": { + "p50": 150.11200308799744, + "p90": 153.28000485897064, + "p95": 154.91199493408203, + "p99": 159.96800363063812 + }, + "isolatedSum": { + "p50": 167.26399958133698, + "p90": 176.86399817466736, + "p95": 179.87199872732162, + "p99": 201.12000405788422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.36000210046768, + "p90": 89.31200206279755, + "p95": 92.3520028591156, + "p99": 98.36799651384354 + }, + "combine": { + "p50": 115.32799899578094, + "p90": 115.9679964184761, + "p95": 117.21599847078323, + "p99": 126.49600207805634 + }, + "roundtrip": { + "p50": 186.14399433135986, + "p90": 191.67999923229218, + "p95": 193.05600225925446, + "p99": 199.072003364563 + }, + "isolatedSum": { + "p50": 202.68800109624863, + "p90": 205.27999848127365, + "p95": 209.56800132989883, + "p99": 224.86399859189987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1911c35d", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_eee29686", + "comparisonKey": "37f5e47990ede677", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:41:38.976776+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254479346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", + "createdAt": "2026-06-26T17:30:02Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.03199824690819, + "p90": 58.240000158548355, + "p95": 59.61599946022034, + "p99": 69.56800073385239 + }, + "combine": { + "p50": 61.40799820423126, + "p90": 63.4239986538887, + "p95": 64.35199826955795, + "p99": 77.53600180149078 + }, + "roundtrip": { + "p50": 121.18399888277054, + "p90": 123.4240010380745, + "p95": 124.64000284671783, + "p99": 131.48799538612366 + }, + "isolatedSum": { + "p50": 117.43999645113945, + "p90": 121.66399881243706, + "p95": 123.96799772977829, + "p99": 147.10400253534317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.92800134420395, + "p90": 59.23200026154518, + "p95": 60.19200012087822, + "p99": 68.4799998998642 + }, + "combine": { + "p50": 62.24000081419945, + "p90": 64.19199705123901, + "p95": 65.05600363016129, + "p99": 69.69600170850754 + }, + "roundtrip": { + "p50": 122.65600264072418, + "p90": 124.79999661445618, + "p95": 125.98399817943573, + "p99": 135.1040005683899 + }, + "isolatedSum": { + "p50": 119.1680021584034, + "p90": 123.4239973127842, + "p95": 125.2480037510395, + "p99": 138.17600160837173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.631999254226685, + "p90": 60.03199890255928, + "p95": 61.37600168585777, + "p99": 67.16799736022949 + }, + "combine": { + "p50": 63.93600255250931, + "p90": 65.43999910354614, + "p95": 65.88800251483917, + "p99": 69.023996591568 + }, + "roundtrip": { + "p50": 125.50400197505951, + "p90": 128.51199507713318, + "p95": 132.06399977207184, + "p99": 143.10400187969208 + }, + "isolatedSum": { + "p50": 121.56800180673599, + "p90": 125.47199800610542, + "p95": 127.26400420069695, + "p99": 136.19199395179749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.487998485565186, + "p90": 61.88800185918808, + "p95": 62.81600147485733, + "p99": 73.2479989528656 + }, + "combine": { + "p50": 66.46399945020676, + "p90": 67.80800223350525, + "p95": 68.89600306749344, + "p99": 71.71200215816498 + }, + "roundtrip": { + "p50": 128.60800325870514, + "p90": 130.65600395202637, + "p95": 131.80799782276154, + "p99": 144.3520039319992 + }, + "isolatedSum": { + "p50": 125.95199793577194, + "p90": 129.69600409269333, + "p95": 131.71200454235077, + "p99": 144.96000111103058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.67200005054474, + "p90": 62.880001962184906, + "p95": 63.74400109052658, + "p99": 69.82400268316269 + }, + "combine": { + "p50": 67.64800101518631, + "p90": 69.63200122117996, + "p95": 70.91200351715088, + "p99": 79.71200346946716 + }, + "roundtrip": { + "p50": 130.87999820709229, + "p90": 133.15199315547943, + "p95": 134.43200290203094, + "p99": 141.88799262046814 + }, + "isolatedSum": { + "p50": 128.32000106573105, + "p90": 132.51200318336487, + "p95": 134.65600460767746, + "p99": 149.53600615262985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 63.61600011587143, + "p90": 65.47199934720993, + "p95": 66.23999774456024, + "p99": 72.54400104284286 + }, + "combine": { + "p50": 72.31999933719635, + "p90": 74.14399832487106, + "p95": 75.23199915885925, + "p99": 79.6160027384758 + }, + "roundtrip": { + "p50": 142.87999272346497, + "p90": 145.85599303245544, + "p95": 147.16799557209015, + "p99": 155.29599785804749 + }, + "isolatedSum": { + "p50": 135.93599945306778, + "p90": 139.615997672081, + "p95": 141.4719969034195, + "p99": 152.16000378131866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 72.64000177383423, + "p90": 75.3600001335144, + "p95": 76.51200145483017, + "p99": 82.65600353479385 + }, + "combine": { + "p50": 87.90399879217148, + "p90": 90.08000046014786, + "p95": 90.84799885749817, + "p99": 101.15200281143188 + }, + "roundtrip": { + "p50": 172.83199727535248, + "p90": 175.4239946603775, + "p95": 176.41599476337433, + "p99": 181.43999576568604 + }, + "isolatedSum": { + "p50": 160.5440005660057, + "p90": 165.44000059366226, + "p95": 167.36000031232834, + "p99": 183.80800634622574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.20000022649765, + "p90": 86.07999980449677, + "p95": 87.0399996638298, + "p99": 90.17600119113922 + }, + "combine": { + "p50": 108.70400071144104, + "p90": 110.97600311040878, + "p95": 112.06399649381638, + "p99": 116.41599982976913 + }, + "roundtrip": { + "p50": 218.07999908924103, + "p90": 221.343994140625, + "p95": 222.97599911689758, + "p99": 235.52000522613525 + }, + "isolatedSum": { + "p50": 191.9040009379387, + "p90": 197.05600291490555, + "p95": 199.10399615764618, + "p99": 206.59200102090836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fe6f5351", + "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_84b10b26", + "comparisonKey": "abf92acc41d9d301", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:10:48.557544+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254499301", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", + "createdAt": "2026-06-26T17:30:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.56800118088722, + "p90": 51.263999193906784, + "p95": 51.83999985456467, + "p99": 57.920001447200775 + }, + "combine": { + "p50": 62.24000081419945, + "p90": 63.680000603199005, + "p95": 64.51199948787689, + "p99": 66.3679987192154 + }, + "roundtrip": { + "p50": 114.78400230407715, + "p90": 116.86400324106216, + "p95": 118.01599711179733, + "p99": 126.68800354003906 + }, + "isolatedSum": { + "p50": 111.80800199508667, + "p90": 114.94399979710579, + "p95": 116.35199934244156, + "p99": 124.28800016641617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.92000013589859, + "p90": 51.80799961090088, + "p95": 52.76799947023392, + "p99": 58.9120015501976 + }, + "combine": { + "p50": 63.040003180503845, + "p90": 64.89600241184235, + "p95": 65.24799764156342, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 116.64000153541565, + "p90": 119.00799721479416, + "p95": 121.08799815177917, + "p99": 136.57599687576294 + }, + "isolatedSum": { + "p50": 112.96000331640244, + "p90": 116.70400202274323, + "p95": 118.01599711179733, + "p99": 133.02399963140488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.42400041222572, + "p90": 53.63199859857559, + "p95": 54.655998945236206, + "p99": 65.76000154018402 + }, + "combine": { + "p50": 63.10400366783142, + "p90": 64.96000289916992, + "p95": 65.63200056552887, + "p99": 75.93599706888199 + }, + "roundtrip": { + "p50": 117.53600090742111, + "p90": 119.87199634313583, + "p95": 120.86399644613266, + "p99": 132.192000746727 + }, + "isolatedSum": { + "p50": 114.52800408005714, + "p90": 118.59200149774551, + "p95": 120.28799951076508, + "p99": 141.695998609066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.41600051522255, + "p90": 54.496001452207565, + "p95": 55.71199953556061, + "p99": 60.5119988322258 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 67.48799979686737, + "p95": 67.9360032081604, + "p99": 73.21599870920181 + }, + "roundtrip": { + "p50": 122.04799801111221, + "p90": 124.38400089740753, + "p95": 126.52799487113953, + "p99": 147.16799557209015 + }, + "isolatedSum": { + "p50": 118.14400181174278, + "p90": 121.98400124907494, + "p95": 123.64800274372101, + "p99": 133.7279975414276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.69599908590317, + "p90": 55.84000051021576, + "p95": 56.86400085687637, + "p99": 65.0240033864975 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 68.9919963479042, + "p95": 69.69600170850754, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 123.36000055074692, + "p90": 125.66399574279785, + "p95": 127.16799974441528, + "p99": 140.70400595664978 + }, + "isolatedSum": { + "p50": 120.86399644613266, + "p90": 124.83199685811996, + "p95": 126.56000256538391, + "p99": 143.0080011487007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.66399949789047, + "p90": 59.776000678539276, + "p95": 60.63999980688095, + "p99": 65.72800129652023 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 74.14399832487106, + "p95": 75.55200159549713, + "p99": 83.96799862384796 + }, + "roundtrip": { + "p50": 138.40000331401825, + "p90": 140.60799777507782, + "p95": 141.66399836540222, + "p99": 149.53599870204926 + }, + "isolatedSum": { + "p50": 130.560003221035, + "p90": 133.91999900341034, + "p95": 136.19200140237808, + "p99": 149.6959999203682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 66.04799628257751, + "p90": 68.15999746322632, + "p95": 69.88800317049026, + "p99": 74.91199672222137 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 90.08000046014786, + "p95": 91.74399822950363, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 164.76799547672272, + "p90": 167.42399334907532, + "p95": 169.3120002746582, + "p99": 185.92000007629395 + }, + "isolatedSum": { + "p50": 153.98399531841278, + "p90": 158.23999792337418, + "p95": 161.6320013999939, + "p99": 173.15199971199036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.63200253248215, + "p90": 80.1599994301796, + "p95": 81.50400221347809, + "p99": 98.65599870681763 + }, + "combine": { + "p50": 108.35199803113937, + "p90": 110.78400164842606, + "p95": 111.84000223875046, + "p99": 126.01600587368011 + }, + "roundtrip": { + "p50": 211.42399311065674, + "p90": 214.52799439430237, + "p95": 215.87200462818146, + "p99": 223.1999933719635 + }, + "isolatedSum": { + "p50": 185.98400056362152, + "p90": 190.94400107860565, + "p95": 193.34400445222855, + "p99": 224.67200458049774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-238797ce", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", + "colorKey": "b300_c9569580", + "comparisonKey": "c4fbb2dad9521e3e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:38.465863+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_13", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "157ca81687ddb63", + "workloadId": "set:3:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271869301", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271869301", + "createdAt": "2026-06-26T23:56:14Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.66399794816971, + "p90": 99.42399710416794, + "p95": 101.24800354242325, + "p99": 112.15999722480774 + }, + "combine": { + "p50": 115.7120019197464, + "p90": 116.54400080442429, + "p95": 117.47200042009354, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 195.3279972076416, + "p90": 199.072003364563, + "p95": 200.57600736618042, + "p99": 214.1440063714981 + }, + "isolatedSum": { + "p50": 209.3759998679161, + "p90": 215.96799790859222, + "p95": 218.72000396251678, + "p99": 240.86399376392365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.7599927186966, + "p90": 200.3519982099533, + "p95": 202.94399559497833, + "p99": 209.75999534130096 + }, + "combine": { + "p50": 272.92799949645996, + "p90": 275.04000067710876, + "p95": 275.6800055503845, + "p99": 289.4720137119293 + }, + "roundtrip": { + "p50": 434.5279932022095, + "p90": 444.95999813079834, + "p95": 448.1920003890991, + "p99": 461.37601137161255 + }, + "isolatedSum": { + "p50": 466.68799221515656, + "p90": 475.3919988870621, + "p95": 478.62400114536285, + "p99": 499.2320090532303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.6960253715515, + "p90": 582.6879739761353, + "p95": 584.6400260925293, + "p99": 595.7120060920715 + }, + "combine": { + "p50": 818.336009979248, + "p90": 828.4479975700378, + "p95": 838.3679986000061, + "p99": 852.6399731636047 + }, + "roundtrip": { + "p50": 1377.7920007705688, + "p90": 1387.3920440673828, + "p95": 1397.2480297088623, + "p99": 1410.4640483856201 + }, + "isolatedSum": { + "p50": 1396.0320353507996, + "p90": 1411.135971546173, + "p95": 1423.0080246925354, + "p99": 1448.3519792556763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a989dada", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", + "colorKey": "b300_77566238", + "comparisonKey": "0cdc743c580a47d3", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:19.169974+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "9e6ac678a09f7f8", + "workloadId": "set:3:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271876366", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271876366", + "createdAt": "2026-06-26T23:56:28Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 1816.2239789962769, + "p90": 2297.152042388916, + "p95": 2896.320104598999, + "p99": 3506.6559314727783 + }, + "combine": { + "p50": 1859.1680526733398, + "p90": 2047.4560260772705, + "p95": 2707.1681022644043, + "p99": 3027.2960662841797 + }, + "roundtrip": { + "p50": 1932.8960180282593, + "p90": 2138.335943222046, + "p95": 2772.9599475860596, + "p99": 3193.279981613159 + }, + "isolatedSum": { + "p50": 3675.3920316696167, + "p90": 4344.6080684661865, + "p95": 5603.488206863403, + "p99": 6533.951997756958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 2029.6320915222168, + "p90": 2355.0078868865967, + "p95": 3023.6799716949463, + "p99": 3532.543897628784 + }, + "combine": { + "p50": 2128.671884536743, + "p90": 2460.576057434082, + "p95": 3003.5200119018555, + "p99": 3345.4079627990723 + }, + "roundtrip": { + "p50": 2337.8241062164307, + "p90": 2708.159923553467, + "p95": 3375.744104385376, + "p99": 3673.952102661133 + }, + "isolatedSum": { + "p50": 4158.30397605896, + "p90": 4815.583944320679, + "p95": 6027.199983596802, + "p99": 6877.951860427856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2545.1838970184326, + "p90": 2883.19993019104, + "p95": 3424.1280555725098, + "p99": 3852.544069290161 + }, + "combine": { + "p50": 2903.520107269287, + "p90": 3124.959945678711, + "p95": 3718.2400226593018, + "p99": 4377.791881561279 + }, + "roundtrip": { + "p50": 3660.6719493865967, + "p90": 3928.3199310302734, + "p95": 4631.743907928467, + "p99": 5148.064136505127 + }, + "isolatedSum": { + "p50": 5448.70400428772, + "p90": 6008.159875869751, + "p95": 7142.3680782318115, + "p99": 8230.33595085144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-092ff174", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_a314501b", + "comparisonKey": "c51826952291f0ba", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:58.409823+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": "set:3:388ff74baef05c72", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271883343", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271883343", + "createdAt": "2026-06-26T23:56:42Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.37599927186966, + "p90": 71.03999704122543, + "p95": 73.37599992752075, + "p99": 81.69600367546082 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 69.60000097751617, + "p95": 77.02399790287018, + "p99": 83.39200168848038 + }, + "roundtrip": { + "p50": 119.93599683046341, + "p90": 126.01600587368011, + "p95": 128.48000228405, + "p99": 135.55200397968292 + }, + "isolatedSum": { + "p50": 136.99200004339218, + "p90": 140.6399980187416, + "p95": 150.39999783039093, + "p99": 165.0880053639412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 93.98400038480759, + "p90": 98.68799895048141, + "p95": 100.28800368309021, + "p99": 105.72800040245056 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 116.5120005607605, + "p95": 116.73600226640701, + "p99": 123.48800152540207 + }, + "roundtrip": { + "p50": 193.08799505233765, + "p90": 197.88800179958344, + "p95": 198.59200716018677, + "p99": 204.0960043668747 + }, + "isolatedSum": { + "p50": 209.50400084257126, + "p90": 215.1999995112419, + "p95": 217.02400594949722, + "p99": 229.21600192785263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 197.37599790096283, + "p90": 199.96799528598785, + "p95": 200.80000162124634, + "p99": 207.10399746894836 + }, + "combine": { + "p50": 248.1600046157837, + "p90": 249.9839961528778, + "p95": 250.68798661231995, + "p99": 253.79198789596558 + }, + "roundtrip": { + "p50": 429.8880100250244, + "p90": 434.30399894714355, + "p95": 436.2879991531372, + "p99": 442.84799695014954 + }, + "isolatedSum": { + "p50": 445.5360025167465, + "p90": 449.95199143886566, + "p95": 451.4879882335663, + "p99": 460.89598536491394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eac6e215", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "colorKey": "b300_5b993222", + "comparisonKey": "d3d6cc25fee96bc7", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:52.035249+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_09", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "38fd0bcf7109c32", + "workloadId": "set:3:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271903494", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271903494", + "createdAt": "2026-06-26T23:57:23Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.08000308275223, + "p90": 107.87200182676315, + "p95": 109.15199667215347, + "p99": 120.28799951076508 + }, + "combine": { + "p50": 127.83999741077423, + "p90": 129.85600531101227, + "p95": 130.97600638866425, + "p99": 139.5840048789978 + }, + "roundtrip": { + "p50": 219.39200162887573, + "p90": 224.16000068187714, + "p95": 225.055992603302, + "p99": 235.35999655723572 + }, + "isolatedSum": { + "p50": 233.92000049352646, + "p90": 237.72800713777542, + "p95": 240.12800306081772, + "p99": 259.8720043897629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.95199811458588, + "p90": 224.03199970722198, + "p95": 228.83200645446777, + "p99": 252.70399451255798 + }, + "combine": { + "p50": 336.38399839401245, + "p90": 338.49599957466125, + "p95": 339.9040102958679, + "p99": 348.4160006046295 + }, + "roundtrip": { + "p50": 535.8399748802185, + "p90": 546.0159778594971, + "p95": 551.3280034065247, + "p99": 558.3680272102356 + }, + "isolatedSum": { + "p50": 554.3359965085983, + "p90": 562.5279992818832, + "p95": 568.7360167503357, + "p99": 601.1199951171875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 685.0559711456299, + "p90": 694.5599913597107, + "p95": 696.3199973106384, + "p99": 705.3760290145874 + }, + "combine": { + "p50": 1085.4400396347046, + "p90": 1086.3360166549683, + "p95": 1087.6480340957642, + "p99": 1096.7680215835571 + }, + "roundtrip": { + "p50": 1752.511978149414, + "p90": 1760.3199481964111, + "p95": 1762.0480060577393, + "p99": 1772.6080417633057 + }, + "isolatedSum": { + "p50": 1770.4960107803345, + "p90": 1780.896008014679, + "p95": 1783.9680314064026, + "p99": 1802.1440505981445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4cb883eb", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", + "colorKey": "b300_8d2811e3", + "comparisonKey": "c2361bc487e04e6e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:36.475166+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4caecd33bedf786", + "workloadId": "set:3:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271889990", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271889990", + "createdAt": "2026-06-26T23:56:56Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.87200117111206, + "p90": 106.88000172376633, + "p95": 109.3439981341362, + "p99": 126.62400305271149 + }, + "combine": { + "p50": 126.91199779510498, + "p90": 128.1919926404953, + "p95": 128.57599556446075, + "p99": 139.615997672081 + }, + "roundtrip": { + "p50": 209.6640020608902, + "p90": 213.95200490951538, + "p95": 215.488001704216, + "p99": 220.47999501228333 + }, + "isolatedSum": { + "p50": 230.78399896621704, + "p90": 235.07199436426163, + "p95": 237.91999369859695, + "p99": 266.2400007247925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.73599433898926, + "p90": 212.44800090789795, + "p95": 213.98399770259857, + "p99": 221.02400660514832 + }, + "combine": { + "p50": 325.28001070022583, + "p90": 336.41600608825684, + "p95": 336.70398592948914, + "p99": 340.4799997806549 + }, + "roundtrip": { + "p50": 510.528028011322, + "p90": 517.087996006012, + "p95": 519.1680192947388, + "p99": 526.4639854431152 + }, + "isolatedSum": { + "p50": 530.0160050392151, + "p90": 548.8640069961548, + "p95": 550.6879836320877, + "p99": 561.5040063858032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 648.1919884681702, + "p90": 659.0080261230469, + "p95": 662.6240015029907, + "p99": 672.5760102272034 + }, + "combine": { + "p50": 1063.8400316238403, + "p90": 1073.248028755188, + "p95": 1073.6639499664307, + "p99": 1096.60804271698 + }, + "roundtrip": { + "p50": 1698.815941810608, + "p90": 1708.1600427627563, + "p95": 1712.4799489974976, + "p99": 1786.7519855499268 + }, + "isolatedSum": { + "p50": 1712.0320200920105, + "p90": 1732.2560548782349, + "p95": 1736.2879514694214, + "p99": 1769.1840529441833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f7ec6aaf", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", + "colorKey": "b300_2e44c039", + "comparisonKey": "b198376a27b75c7f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:40.218743+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_11", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3dd868cb33839a3", + "workloadId": "set:3:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271897134", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271897134", + "createdAt": "2026-06-26T23:57:10Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.3520028591156, + "p90": 95.90400010347366, + "p95": 98.78399968147278, + "p99": 113.34399878978729 + }, + "combine": { + "p50": 116.19199812412262, + "p90": 120.2239990234375, + "p95": 126.39999389648438, + "p99": 127.68000364303589 + }, + "roundtrip": { + "p50": 194.5279985666275, + "p90": 202.43200659751892, + "p95": 204.22400534152985, + "p99": 214.23999965190887 + }, + "isolatedSum": { + "p50": 208.54400098323822, + "p90": 216.12799912691116, + "p95": 225.18399357795715, + "p99": 241.02400243282318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 180.09600043296814, + "p90": 188.6720061302185, + "p95": 190.46400487422943, + "p99": 204.83200252056122 + }, + "combine": { + "p50": 302.94400453567505, + "p90": 311.42398715019226, + "p95": 311.67998909950256, + "p99": 315.16799330711365 + }, + "roundtrip": { + "p50": 473.1520116329193, + "p90": 481.6960096359253, + "p95": 485.0560128688812, + "p99": 493.696004152298 + }, + "isolatedSum": { + "p50": 483.0400049686432, + "p90": 500.09599328041077, + "p95": 502.143993973732, + "p99": 519.9999958276749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.0479745864868, + "p90": 580.4160237312317, + "p95": 583.7439894676208, + "p99": 621.0560202598572 + }, + "combine": { + "p50": 1098.7199544906616, + "p90": 1109.1840267181396, + "p95": 1109.663963317871, + "p99": 1124.4159936904907 + }, + "roundtrip": { + "p50": 1622.8159666061401, + "p90": 1629.3760538101196, + "p95": 1632.2239637374878, + "p99": 1643.3279514312744 + }, + "isolatedSum": { + "p50": 1668.7679290771484, + "p90": 1689.6000504493713, + "p95": 1693.407952785492, + "p99": 1745.472013950348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7727ce9", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_c1ad910f", + "comparisonKey": "9532205a80f3d757", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:38:48.516779+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:29:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.11200135946274, + "p90": 98.9760011434555, + "p95": 100.54399818181992, + "p99": 116.44800007343292 + }, + "combine": { + "p50": 115.1999980211258, + "p90": 115.9679964184761, + "p95": 116.89600348472595, + "p99": 129.02399897575378 + }, + "roundtrip": { + "p50": 193.2159960269928, + "p90": 198.43199849128723, + "p95": 199.8080015182495, + "p99": 217.50399470329285 + }, + "isolatedSum": { + "p50": 209.31199938058853, + "p90": 214.9439975619316, + "p95": 217.44000166654587, + "p99": 245.4719990491867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.42400300502777, + "p90": 138.75199854373932, + "p95": 141.184002161026, + "p99": 151.0079950094223 + }, + "combine": { + "p50": 154.59200739860535, + "p90": 163.90399634838104, + "p95": 164.5440012216568, + "p99": 176.54399573802948 + }, + "roundtrip": { + "p50": 271.67999744415283, + "p90": 277.6319980621338, + "p95": 280.70399165153503, + "p99": 291.3599908351898 + }, + "isolatedSum": { + "p50": 290.0160104036331, + "p90": 302.65599489212036, + "p95": 305.7280033826828, + "p99": 327.5519907474518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.24800372123718, + "p90": 199.61600005626678, + "p95": 200.80000162124634, + "p99": 206.68800175189972 + }, + "combine": { + "p50": 265.8880054950714, + "p90": 274.59201216697693, + "p95": 275.2000093460083, + "p99": 286.78399324417114 + }, + "roundtrip": { + "p50": 442.59199500083923, + "p90": 448.96000623703003, + "p95": 455.00800013542175, + "p99": 461.40798926353455 + }, + "isolatedSum": { + "p50": 459.1360092163086, + "p90": 474.2080122232437, + "p95": 476.00001096725464, + "p99": 493.47199499607086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 326.2079954147339, + "p90": 329.75998520851135, + "p95": 331.6799998283386, + "p99": 341.6000008583069 + }, + "combine": { + "p50": 457.66401290893555, + "p90": 459.77601408958435, + "p95": 469.760000705719, + "p99": 473.7600088119507 + }, + "roundtrip": { + "p50": 762.5920176506042, + "p90": 771.7440128326416, + "p95": 774.2080092430115, + "p99": 789.6320223808289 + }, + "isolatedSum": { + "p50": 783.8720083236694, + "p90": 789.5359992980957, + "p95": 801.4400005340576, + "p99": 815.3600096702576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.1200060844421, + "p90": 582.5920104980469, + "p95": 583.5520029067993, + "p99": 591.2960171699524 + }, + "combine": { + "p50": 817.2799944877625, + "p90": 828.4159898757935, + "p95": 831.8719863891602, + "p99": 913.4079813957214 + }, + "roundtrip": { + "p50": 1376.9279718399048, + "p90": 1386.9119882583618, + "p95": 1392.7680253982544, + "p99": 1453.8240432739258 + }, + "isolatedSum": { + "p50": 1394.4000005722046, + "p90": 1411.0080003738403, + "p95": 1415.4239892959595, + "p99": 1504.7039985656738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1069.5040225982666, + "p90": 1078.0160427093506, + "p95": 1080.2559852600098, + "p99": 1090.880036354065 + }, + "combine": { + "p50": 1528.8959741592407, + "p90": 1540.4479503631592, + "p95": 1542.688012123108, + "p99": 1554.751992225647 + }, + "roundtrip": { + "p50": 2581.9520950317383, + "p90": 2594.6240425109863, + "p95": 2602.303981781006, + "p99": 2637.9199028015137 + }, + "isolatedSum": { + "p50": 2598.3999967575073, + "p90": 2618.4639930725098, + "p95": 2622.9439973831177, + "p99": 2645.632028579712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5fd5a06c", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", + "colorKey": "b300_0622d929", + "comparisonKey": "8c83b99af9d27709", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:11:00.153293+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_10", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254508907", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", + "createdAt": "2026-06-26T17:30:32Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.79200088977814, + "p90": 108.83200168609619, + "p95": 111.00800335407257, + "p99": 118.9119964838028 + }, + "combine": { + "p50": 130.0159990787506, + "p90": 139.20000195503235, + "p95": 139.74399864673615, + "p99": 150.84800124168396 + }, + "roundtrip": { + "p50": 228.38400304317474, + "p90": 234.65600609779358, + "p95": 235.61599850654602, + "p99": 252.28801369667053 + }, + "isolatedSum": { + "p50": 235.80799996852875, + "p90": 248.03200364112854, + "p95": 250.75200200080872, + "p99": 269.75999772548676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.36000645160675, + "p90": 162.56000101566315, + "p95": 163.90399634838104, + "p99": 170.59199512004852 + }, + "combine": { + "p50": 201.34399831295013, + "p90": 203.96800339221954, + "p95": 211.45600080490112, + "p99": 224.86400604248047 + }, + "roundtrip": { + "p50": 334.879994392395, + "p90": 340.03201127052307, + "p95": 342.0479893684387, + "p99": 360.28799414634705 + }, + "isolatedSum": { + "p50": 360.7040047645569, + "p90": 366.5280044078827, + "p95": 375.35999715328217, + "p99": 395.456001162529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.67200589179993, + "p90": 240.76800048351288, + "p95": 244.60799992084503, + "p99": 252.22399830818176 + }, + "combine": { + "p50": 338.01600337028503, + "p90": 347.8719890117645, + "p95": 348.7040102481842, + "p99": 361.407995223999 + }, + "roundtrip": { + "p50": 553.9519786834717, + "p90": 560.2239966392517, + "p95": 564.3839836120605, + "p99": 589.8879766464233 + }, + "isolatedSum": { + "p50": 570.688009262085, + "p90": 588.6399894952774, + "p95": 593.3120101690292, + "p99": 613.6319935321808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 409.5360040664673, + "p90": 415.0719940662384, + "p95": 416.76801443099976, + "p99": 433.50398540496826 + }, + "combine": { + "p50": 594.3359732627869, + "p90": 599.7120141983032, + "p95": 606.2399744987488, + "p99": 619.2640066146851 + }, + "roundtrip": { + "p50": 986.1119985580444, + "p90": 993.5680031776428, + "p95": 998.8160133361816, + "p99": 1015.8400535583496 + }, + "isolatedSum": { + "p50": 1003.8719773292542, + "p90": 1014.7840082645416, + "p95": 1023.0079889297485, + "p99": 1052.7679920196533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 756.384015083313, + "p90": 767.3280239105225, + "p95": 769.6639895439148, + "p99": 787.7439856529236 + }, + "combine": { + "p50": 1112.671971321106, + "p90": 1122.8480339050293, + "p95": 1133.6640119552612, + "p99": 1208.4800004959106 + }, + "roundtrip": { + "p50": 1856.0960292816162, + "p90": 1870.6879615783691, + "p95": 1877.087950706482, + "p99": 1941.5040016174316 + }, + "isolatedSum": { + "p50": 1869.055986404419, + "p90": 1890.1760578155518, + "p95": 1903.328001499176, + "p99": 1996.2239861488342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1458.8799476623535, + "p90": 1475.0720262527466, + "p95": 1481.4079999923706, + "p99": 1536.8640422821045 + }, + "combine": { + "p50": 2142.047882080078, + "p90": 2154.560089111328, + "p95": 2158.9438915252686, + "p99": 2215.9039974212646 + }, + "roundtrip": { + "p50": 3584.160089492798, + "p90": 3605.760097503662, + "p95": 3613.152027130127, + "p99": 3669.503927230835 + }, + "isolatedSum": { + "p50": 3600.9278297424316, + "p90": 3629.6321153640747, + "p95": 3640.351891517639, + "p99": 3752.768039703369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6620cae5", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", + "colorKey": "b300_01ab5b1a", + "comparisonKey": "5702bf02b3927f32", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:38:15.541333+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_06", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271231753", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", + "createdAt": "2026-06-26T23:36:29Z", + "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.3759970664978, + "p90": 104.76800054311752, + "p95": 106.01600259542465, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 126.11199915409088, + "p90": 127.3919939994812, + "p95": 127.83999741077423, + "p99": 129.18399274349213 + }, + "roundtrip": { + "p50": 207.8080028295517, + "p90": 212.6079946756363, + "p95": 213.69600296020508, + "p99": 224.2559939622879 + }, + "isolatedSum": { + "p50": 227.48799622058868, + "p90": 232.15999454259872, + "p95": 233.85600000619888, + "p99": 241.08799546957016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.31999492645264, + "p90": 139.80799913406372, + "p95": 140.86399972438812, + "p99": 150.43200552463531 + }, + "combine": { + "p50": 176.35199427604675, + "p90": 178.78399789333344, + "p95": 180.03199994564056, + "p99": 188.60800564289093 + }, + "roundtrip": { + "p50": 297.5679934024811, + "p90": 303.45600843429565, + "p95": 306.46398663520813, + "p99": 319.2960023880005 + }, + "isolatedSum": { + "p50": 312.6719892024994, + "p90": 318.59199702739716, + "p95": 320.8959996700287, + "p99": 339.04001116752625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.90400290489197, + "p90": 211.58400177955627, + "p95": 212.51200139522552, + "p99": 223.32799434661865 + }, + "combine": { + "p50": 325.1839876174927, + "p90": 335.55200695991516, + "p95": 335.80800890922546, + "p99": 337.8559947013855 + }, + "roundtrip": { + "p50": 506.20800256729126, + "p90": 514.4960284233093, + "p95": 519.7759866714478, + "p99": 534.0160131454468 + }, + "isolatedSum": { + "p50": 529.0879905223846, + "p90": 547.1360087394714, + "p95": 548.320010304451, + "p99": 561.1839890480042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 348.86398911476135, + "p90": 353.40800881385803, + "p95": 354.7520041465759, + "p99": 364.22398686408997 + }, + "combine": { + "p50": 582.4000239372253, + "p90": 585.9519839286804, + "p95": 593.0879712104797, + "p99": 594.5919752120972 + }, + "roundtrip": { + "p50": 909.5680117607117, + "p90": 917.2160029411316, + "p95": 918.5600280761719, + "p99": 924.127995967865 + }, + "isolatedSum": { + "p50": 931.2640130519867, + "p90": 939.3599927425385, + "p95": 947.8399753570557, + "p99": 958.8159620761871 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 641.8560147285461, + "p90": 648.639976978302, + "p95": 655.135989189148, + "p99": 660.256028175354 + }, + "combine": { + "p50": 1062.7520084381104, + "p90": 1072.7039575576782, + "p95": 1073.4080076217651, + "p99": 1076.5119791030884 + }, + "roundtrip": { + "p50": 1693.343997001648, + "p90": 1700.6080150604248, + "p95": 1702.847957611084, + "p99": 1706.6559791564941 + }, + "isolatedSum": { + "p50": 1704.6080231666565, + "p90": 1721.3439345359802, + "p95": 1728.543996810913, + "p99": 1736.7680072784424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1252.1920204162598, + "p90": 1262.719988822937, + "p95": 1264.7360563278198, + "p99": 1276.8640518188477 + }, + "combine": { + "p50": 2043.4560775756836, + "p90": 2045.151948928833, + "p95": 2047.1999645233154, + "p99": 2067.392110824585 + }, + "roundtrip": { + "p50": 3284.6720218658447, + "p90": 3295.1040267944336, + "p95": 3299.0400791168213, + "p99": 3313.3440017700195 + }, + "isolatedSum": { + "p50": 3295.6480979919434, + "p90": 3307.87193775177, + "p95": 3311.9360208511353, + "p99": 3344.2561626434326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9b7dbfc5", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", + "colorKey": "b300_085c12d4", + "comparisonKey": "afb8d29f702ca3c1", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:21:45.459593+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_16", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255311146", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", + "createdAt": "2026-06-26T17:45:43Z", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.28000247478485, + "p90": 96.16000205278397, + "p95": 98.78399968147278, + "p99": 129.2479932308197 + }, + "combine": { + "p50": 114.94400352239609, + "p90": 115.55200070142746, + "p95": 115.93600362539291, + "p99": 126.3359934091568 + }, + "roundtrip": { + "p50": 195.6160068511963, + "p90": 199.42399859428406, + "p95": 200.83199441432953, + "p99": 215.16799926757812 + }, + "isolatedSum": { + "p50": 208.22400599718094, + "p90": 211.71200275421143, + "p95": 214.7200033068657, + "p99": 255.5839866399765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.25599443912506, + "p90": 139.00800049304962, + "p95": 141.50400459766388, + "p99": 155.03999590873718 + }, + "combine": { + "p50": 153.72799336910248, + "p90": 163.2319986820221, + "p95": 163.80800306797028, + "p99": 167.67999529838562 + }, + "roundtrip": { + "p50": 269.9199914932251, + "p90": 275.64799785614014, + "p95": 276.92800760269165, + "p99": 291.77600145339966 + }, + "isolatedSum": { + "p50": 289.98398780822754, + "p90": 302.2399991750717, + "p95": 305.31200766563416, + "p99": 322.7199912071228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 192.671999335289, + "p90": 200.095996260643, + "p95": 201.1840045452118, + "p99": 211.99999749660492 + }, + "combine": { + "p50": 264.70398902893066, + "p90": 274.2399871349335, + "p95": 274.9119997024536, + "p99": 286.3999903202057 + }, + "roundtrip": { + "p50": 439.7439956665039, + "p90": 445.279985666275, + "p95": 447.519987821579, + "p99": 459.9039852619171 + }, + "isolatedSum": { + "p50": 457.37598836421967, + "p90": 474.3359833955765, + "p95": 476.0960042476654, + "p99": 498.3999878168106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 319.90399956703186, + "p90": 325.8560001850128, + "p95": 327.1999955177307, + "p99": 333.44000577926636 + }, + "combine": { + "p50": 450.78399777412415, + "p90": 458.8800072669983, + "p95": 459.77601408958435, + "p99": 482.87999629974365 + }, + "roundtrip": { + "p50": 756.1600208282471, + "p90": 761.5039944648743, + "p95": 763.5840177536011, + "p99": 783.5519909858704 + }, + "isolatedSum": { + "p50": 770.687997341156, + "p90": 784.7360074520111, + "p95": 786.9760096073151, + "p99": 816.32000207901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 573.0559825897217, + "p90": 581.6959738731384, + "p95": 583.7119817733765, + "p99": 671.4879870414734 + }, + "combine": { + "p50": 827.4880051612854, + "p90": 838.6240005493164, + "p95": 839.9040102958679, + "p99": 863.4560108184814 + }, + "roundtrip": { + "p50": 1382.9760551452637, + "p90": 1392.9920196533203, + "p95": 1396.8960046768188, + "p99": 1428.1599521636963 + }, + "isolatedSum": { + "p50": 1400.543987751007, + "p90": 1420.3199744224548, + "p95": 1423.6159920692444, + "p99": 1534.9439978599548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1061.8879795074463, + "p90": 1068.7040090560913, + "p95": 1075.9040117263794, + "p99": 1094.048023223877 + }, + "combine": { + "p50": 1530.2079916000366, + "p90": 1540.7040119171143, + "p95": 1551.2640476226807, + "p99": 1662.6559495925903 + }, + "roundtrip": { + "p50": 2579.9999237060547, + "p90": 2593.7600135803223, + "p95": 2600.543975830078, + "p99": 2645.440101623535 + }, + "isolatedSum": { + "p50": 2592.095971107483, + "p90": 2609.4080209732056, + "p95": 2627.16805934906, + "p99": 2756.7039728164673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-07a9b9e5", + "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_63f1354f", + "comparisonKey": "e1e888fe005f12d0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:43:21.918392+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254489726", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", + "createdAt": "2026-06-26T17:30:12Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.43200248479843, + "p90": 88.95999938249588, + "p95": 91.58399701118469, + "p99": 99.55199807882309 + }, + "combine": { + "p50": 115.35999923944473, + "p90": 116.03199690580368, + "p95": 116.38399958610535, + "p99": 121.56800180673599 + }, + "roundtrip": { + "p50": 186.8479996919632, + "p90": 192.47999787330627, + "p95": 193.31200420856476, + "p99": 215.45599400997162 + }, + "isolatedSum": { + "p50": 201.79200172424316, + "p90": 204.99199628829956, + "p95": 207.96799659729004, + "p99": 221.11999988555908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 128.60800325870514, + "p90": 131.48799538612366, + "p95": 132.79999792575836, + "p99": 147.20000326633453 + }, + "combine": { + "p50": 156.19200468063354, + "p90": 164.48000073432922, + "p95": 164.76799547672272, + "p99": 167.71200299263 + }, + "roundtrip": { + "p50": 264.8000121116638, + "p90": 271.232008934021, + "p95": 274.6239900588989, + "p99": 307.20001459121704 + }, + "isolatedSum": { + "p50": 284.8000079393387, + "p90": 295.9679961204529, + "p95": 297.5679934024811, + "p99": 314.91200625896454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.46399676799774, + "p90": 192.86400079727173, + "p95": 195.360004901886, + "p99": 208.3200067281723 + }, + "combine": { + "p50": 266.6879892349243, + "p90": 274.78399872779846, + "p95": 275.2639949321747, + "p99": 287.1359884738922 + }, + "roundtrip": { + "p50": 437.4080002307892, + "p90": 442.30398535728455, + "p95": 445.6320106983185, + "p99": 468.51199865341187 + }, + "isolatedSum": { + "p50": 453.15198600292206, + "p90": 467.6479995250702, + "p95": 470.62399983406067, + "p99": 495.4559952020645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 316.0319924354553, + "p90": 319.16800141334534, + "p95": 320.99199295043945, + "p99": 330.01598715782166 + }, + "combine": { + "p50": 458.8479995727539, + "p90": 461.66399121284485, + "p95": 470.20798921585083, + "p99": 483.39200019836426 + }, + "roundtrip": { + "p50": 752.0639896392822, + "p90": 761.3440155982971, + "p95": 763.6799812316895, + "p99": 787.6480221748352 + }, + "isolatedSum": { + "p50": 774.8799920082092, + "p90": 780.8319926261902, + "p95": 791.1999821662903, + "p99": 813.4079873561859 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 557.2800040245056, + "p90": 565.0240182876587, + "p95": 566.3679838180542, + "p99": 600.0319719314575 + }, + "combine": { + "p50": 817.4399733543396, + "p90": 827.8399705886841, + "p95": 832.0639729499817, + "p99": 854.3999791145325 + }, + "roundtrip": { + "p50": 1359.328031539917, + "p90": 1370.911955833435, + "p95": 1380.5760145187378, + "p99": 1444.640040397644 + }, + "isolatedSum": { + "p50": 1374.7199773788452, + "p90": 1392.8639888763428, + "p95": 1398.431956768036, + "p99": 1454.43195104599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1037.4079942703247, + "p90": 1044.800043106079, + "p95": 1047.4879741668701, + "p99": 1074.3039846420288 + }, + "combine": { + "p50": 1529.6319723129272, + "p90": 1541.375994682312, + "p95": 1552.0639419555664, + "p99": 1577.1199464797974 + }, + "roundtrip": { + "p50": 2550.9119033813477, + "p90": 2564.2240047454834, + "p95": 2571.199893951416, + "p99": 2613.2800579071045 + }, + "isolatedSum": { + "p50": 2567.039966583252, + "p90": 2586.176037788391, + "p95": 2599.5519161224365, + "p99": 2651.423931121826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c8d1506e", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_eee29686", + "comparisonKey": "efab2d3670b24be2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:42:54.702578+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254479346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", + "createdAt": "2026-06-26T17:30:02Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.45600217580795, + "p90": 86.14400029182434, + "p95": 87.2960016131401, + "p99": 102.08000242710114 + }, + "combine": { + "p50": 108.38399827480316, + "p90": 110.75200140476227, + "p95": 111.61600053310394, + "p99": 114.9120032787323 + }, + "roundtrip": { + "p50": 218.33600103855133, + "p90": 221.6320037841797, + "p95": 222.84799814224243, + "p99": 235.23199558258057 + }, + "isolatedSum": { + "p50": 191.84000045061111, + "p90": 196.8960016965866, + "p95": 198.91200214624405, + "p99": 216.99200570583344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 116.70400202274323, + "p90": 119.64800208806992, + "p95": 121.15199863910675, + "p99": 135.3600025177002 + }, + "combine": { + "p50": 155.29599785804749, + "p90": 167.4560010433197, + "p95": 176.60799622535706, + "p99": 184.1599941253662 + }, + "roundtrip": { + "p50": 324.47999715805054, + "p90": 328.19199562072754, + "p95": 330.04799485206604, + "p99": 345.40799260139465 + }, + "isolatedSum": { + "p50": 271.9999998807907, + "p90": 287.1040031313896, + "p95": 297.7599948644638, + "p99": 319.5199966430664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 177.7919977903366, + "p90": 182.27200210094452, + "p95": 183.9040070772171, + "p99": 191.103994846344 + }, + "combine": { + "p50": 267.520010471344, + "p90": 270.81599831581116, + "p95": 272.0640003681183, + "p99": 275.4879891872406 + }, + "roundtrip": { + "p50": 550.8480072021484, + "p90": 556.9599866867065, + "p95": 560.2560043334961, + "p99": 578.3360004425049 + }, + "isolatedSum": { + "p50": 445.3120082616806, + "p90": 453.0880004167557, + "p95": 455.9680074453354, + "p99": 466.5919840335846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 298.0160117149353, + "p90": 302.4959862232208, + "p95": 304.4799864292145, + "p99": 319.07200813293457 + }, + "combine": { + "p50": 452.1920084953308, + "p90": 456.6720128059387, + "p95": 458.624005317688, + "p99": 467.9360091686249 + }, + "roundtrip": { + "p50": 976.5759706497192, + "p90": 983.8719964027405, + "p95": 991.5199875831604, + "p99": 1023.3279466629028 + }, + "isolatedSum": { + "p50": 750.2080202102661, + "p90": 759.1679990291595, + "p95": 763.1039917469025, + "p99": 787.0080173015594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 541.4720177650452, + "p90": 546.7519760131836, + "p95": 549.4080185890198, + "p99": 557.7920079231262 + }, + "combine": { + "p50": 814.7199749946594, + "p90": 820.8320140838623, + "p95": 824.0640163421631, + "p99": 847.2959995269775 + }, + "roundtrip": { + "p50": 1818.0160522460938, + "p90": 1827.712059020996, + "p95": 1832.0000171661377, + "p99": 1889.5679712295532 + }, + "isolatedSum": { + "p50": 1356.1919927597046, + "p90": 1367.583990097046, + "p95": 1373.4720349311829, + "p99": 1405.0880074501038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1019.6160078048706, + "p90": 1027.9040336608887, + "p95": 1031.391978263855, + "p99": 1045.2799797058105 + }, + "combine": { + "p50": 1529.4400453567505, + "p90": 1537.2480154037476, + "p95": 1540.8639907836914, + "p99": 1614.6240234375 + }, + "roundtrip": { + "p50": 3477.3120880126953, + "p90": 3490.272045135498, + "p95": 3495.3598976135254, + "p99": 3531.3920974731445 + }, + "isolatedSum": { + "p50": 2549.056053161621, + "p90": 2565.1520490646362, + "p95": 2572.2559690475464, + "p99": 2659.9040031433105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9971d342", + "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_84b10b26", + "comparisonKey": "1c850249e23e1e8c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T18:09:25.013454+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254499301", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", + "createdAt": "2026-06-26T17:30:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.02399790287018, + "p90": 79.48800176382065, + "p95": 80.89599758386612, + "p99": 85.28000116348267 + }, + "combine": { + "p50": 108.5439994931221, + "p90": 111.29599809646606, + "p95": 112.35199868679047, + "p99": 124.41600114107132 + }, + "roundtrip": { + "p50": 211.74399554729462, + "p90": 214.4320011138916, + "p95": 216.0000056028366, + "p99": 233.15200209617615 + }, + "isolatedSum": { + "p50": 185.56799739599228, + "p90": 190.7839998602867, + "p95": 193.24799627065659, + "p99": 209.69600230455399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.1439967751503, + "p90": 113.53600025177002, + "p95": 115.90400338172913, + "p99": 132.6719969511032 + }, + "combine": { + "p50": 153.3759981393814, + "p90": 157.60000050067902, + "p95": 159.32799875736237, + "p99": 173.69599640369415 + }, + "roundtrip": { + "p50": 318.30400228500366, + "p90": 322.52800464630127, + "p95": 325.408011674881, + "p99": 346.49598598480225 + }, + "isolatedSum": { + "p50": 263.5199949145317, + "p90": 271.13600075244904, + "p95": 275.2320021390915, + "p99": 306.36799335479736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 171.26399278640747, + "p90": 176.15999281406403, + "p95": 178.6240041255951, + "p99": 194.815993309021 + }, + "combine": { + "p50": 268.2879865169525, + "p90": 273.0560004711151, + "p95": 275.64799785614014, + "p99": 283.58399868011475 + }, + "roundtrip": { + "p50": 543.7120199203491, + "p90": 550.6880283355713, + "p95": 554.1120171546936, + "p99": 576.0639905929565 + }, + "isolatedSum": { + "p50": 439.55197930336, + "p90": 449.21599328517914, + "p95": 454.27200198173523, + "p99": 478.39999198913574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 288.92800211906433, + "p90": 294.0160036087036, + "p95": 297.791987657547, + "p99": 315.3280019760132 + }, + "combine": { + "p50": 452.09598541259766, + "p90": 457.37600326538086, + "p95": 461.7280066013336, + "p99": 471.74400091171265 + }, + "roundtrip": { + "p50": 967.1040177345276, + "p90": 974.62397813797, + "p95": 977.5360226631165, + "p99": 995.6160187721252 + }, + "isolatedSum": { + "p50": 741.023987531662, + "p90": 751.3920068740845, + "p95": 759.5199942588806, + "p99": 787.0720028877258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.3920216560364, + "p90": 529.2800068855286, + "p95": 533.3439707756042, + "p99": 550.1120090484619 + }, + "combine": { + "p50": 816.32000207901, + "p90": 824.9599933624268, + "p95": 831.1359882354736, + "p99": 855.135977268219 + }, + "roundtrip": { + "p50": 1800.096035003662, + "p90": 1811.743974685669, + "p95": 1825.7919549942017, + "p99": 1866.8160438537598 + }, + "isolatedSum": { + "p50": 1339.7120237350464, + "p90": 1354.2400002479553, + "p95": 1364.4799590110779, + "p99": 1405.247986316681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.9280118942261, + "p90": 992.2239780426025, + "p95": 996.5760111808777, + "p99": 1026.9759893417358 + }, + "combine": { + "p50": 1529.312014579773, + "p90": 1539.1039848327637, + "p95": 1548.0320453643799, + "p99": 1564.3839836120605 + }, + "roundtrip": { + "p50": 3440.864086151123, + "p90": 3457.6640129089355, + "p95": 3468.832015991211, + "p99": 3514.2080783843994 + }, + "isolatedSum": { + "p50": 2514.240026473999, + "p90": 2531.327962875366, + "p95": 2544.6080565452576, + "p99": 2591.3599729537964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1c34e3d1", + "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_ff7906f8", + "comparisonKey": "ad5ebda2342035d4", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:21.600015+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271684428", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271684428", + "createdAt": "2026-06-26T23:50:25Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.4639972448349, + "p90": 106.52799904346466, + "p95": 128.12800705432892, + "p99": 158.87999534606934 + }, + "combine": { + "p50": 66.52799993753433, + "p90": 73.34399968385696, + "p95": 81.34400099515915, + "p99": 91.96799993515015 + }, + "roundtrip": { + "p50": 139.42399621009827, + "p90": 146.84799313545227, + "p95": 150.56000649929047, + "p99": 186.81600689888 + }, + "isolatedSum": { + "p50": 164.99199718236923, + "p90": 179.87199872732162, + "p95": 209.47200804948807, + "p99": 250.84799528121948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.58400118350983, + "p90": 103.45599800348282, + "p95": 106.39999806880951, + "p99": 124.67200309038162 + }, + "combine": { + "p50": 64.06400352716446, + "p90": 72.92799651622772, + "p95": 73.31199944019318, + "p99": 74.43200051784515 + }, + "roundtrip": { + "p50": 117.53600090742111, + "p90": 144.41600441932678, + "p95": 147.71200716495514, + "p99": 173.5360026359558 + }, + "isolatedSum": { + "p50": 135.6480047106743, + "p90": 176.38399451971054, + "p95": 179.71199750900269, + "p99": 199.10400360822678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 92.70399808883667, + "p90": 100.47999769449234, + "p95": 102.75200009346008, + "p99": 106.23999685049057 + }, + "combine": { + "p50": 66.01600348949432, + "p90": 72.38399982452393, + "p95": 72.86400347948074, + "p99": 75.6160020828247 + }, + "roundtrip": { + "p50": 134.33599472045898, + "p90": 143.77599954605103, + "p95": 146.08000218868256, + "p99": 149.82399344444275 + }, + "isolatedSum": { + "p50": 158.720001578331, + "p90": 172.86399751901627, + "p95": 175.61600357294083, + "p99": 181.85599893331528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.75999933481216, + "p90": 105.8880016207695, + "p95": 129.66400384902954, + "p99": 177.44000256061554 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 74.65600222349167, + "p95": 81.53600245714188, + "p99": 92.00000017881393 + }, + "roundtrip": { + "p50": 140.99200069904327, + "p90": 149.6960073709488, + "p95": 159.19999778270721, + "p99": 189.43999707698822 + }, + "isolatedSum": { + "p50": 169.0879985690117, + "p90": 180.54400384426117, + "p95": 211.20000630617142, + "p99": 269.4400027394295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.56799787282944, + "p90": 100.99200159311295, + "p95": 104.3199971318245, + "p99": 107.42399841547012 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 73.7600028514862, + "p95": 75.00799745321274, + "p99": 80.92799782752991 + }, + "roundtrip": { + "p50": 142.68800616264343, + "p90": 150.30400454998016, + "p95": 154.2080044746399, + "p99": 156.09599649906158 + }, + "isolatedSum": { + "p50": 169.21599954366684, + "p90": 174.75200444459915, + "p95": 179.32799458503723, + "p99": 188.35199624300003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.80799674987793, + "p90": 99.55199807882309, + "p95": 101.27999633550644, + "p99": 106.08000308275223 + }, + "combine": { + "p50": 66.68800115585327, + "p90": 76.03199779987335, + "p95": 80.38400113582611, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 123.87199699878693, + "p90": 150.27199685573578, + "p95": 152.16000378131866, + "p99": 155.4879993200302 + }, + "isolatedSum": { + "p50": 146.4959979057312, + "p90": 175.58399587869644, + "p95": 181.66399747133255, + "p99": 187.3920038342476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.28800368309021, + "p90": 107.96800255775452, + "p95": 109.47199910879135, + "p99": 119.90399658679962 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 87.71199733018875, + "p95": 89.1840010881424, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 151.8079936504364, + "p90": 162.59199380874634, + "p95": 164.06400501728058, + "p99": 168.57600212097168 + }, + "isolatedSum": { + "p50": 181.40800297260284, + "p90": 195.67999988794327, + "p95": 198.65600019693375, + "p99": 210.04799753427505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.69600015878677, + "p90": 115.99999666213989, + "p95": 118.1119978427887, + "p99": 124.83199685811996 + }, + "combine": { + "p50": 88.3840024471283, + "p90": 97.4079966545105, + "p95": 97.88800030946732, + "p99": 100.38399696350098 + }, + "roundtrip": { + "p50": 161.72799468040466, + "p90": 177.2480010986328, + "p95": 181.15200102329254, + "p99": 415.48800468444824 + }, + "isolatedSum": { + "p50": 194.08000260591507, + "p90": 213.4079933166504, + "p95": 215.999998152256, + "p99": 225.21599382162094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8988cd24", + "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_ff7906f8", + "comparisonKey": "c91a22e0dde262e4", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:51.137960+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271699258", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271699258", + "createdAt": "2026-06-26T23:50:52Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.551997423172, + "p90": 107.04000294208527, + "p95": 120.38400024175644, + "p99": 156.00000321865082 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 78.84799689054489, + "p95": 81.15199953317642, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 140.25600254535675, + "p90": 152.319997549057, + "p95": 169.8240041732788, + "p99": 207.68000185489655 + }, + "isolatedSum": { + "p50": 166.75199568271637, + "p90": 185.88799983263016, + "p95": 201.53599977493286, + "p99": 253.56800109148026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.4799998998642, + "p90": 104.12800312042236, + "p95": 121.69600278139114, + "p99": 155.13600409030914 + }, + "combine": { + "p50": 64.80000168085098, + "p90": 79.00799810886383, + "p95": 88.06400001049042, + "p99": 103.39199751615524 + }, + "roundtrip": { + "p50": 119.6800023317337, + "p90": 147.32800424098969, + "p95": 149.08799529075623, + "p99": 153.888002038002 + }, + "isolatedSum": { + "p50": 133.28000158071518, + "p90": 183.1360012292862, + "p95": 209.76000279188156, + "p99": 258.5280016064644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.01599669456482, + "p90": 98.27200323343277, + "p95": 101.47199779748917, + "p99": 114.33599889278412 + }, + "combine": { + "p50": 65.08799642324448, + "p90": 78.8159966468811, + "p95": 79.23199981451035, + "p99": 85.95199882984161 + }, + "roundtrip": { + "p50": 119.03999745845795, + "p90": 149.98400211334229, + "p95": 151.8079936504364, + "p99": 158.33599865436554 + }, + "isolatedSum": { + "p50": 135.1039931178093, + "p90": 177.08799988031387, + "p95": 180.7039976119995, + "p99": 200.28799772262573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.23999840021133, + "p90": 97.79199957847595, + "p95": 102.01600193977356, + "p99": 116.67200177907944 + }, + "combine": { + "p50": 65.47199934720993, + "p90": 79.0719985961914, + "p95": 79.64800298213959, + "p99": 87.67999708652496 + }, + "roundtrip": { + "p50": 118.367999792099, + "p90": 150.4639983177185, + "p95": 155.68000078201294, + "p99": 188.25599551200867 + }, + "isolatedSum": { + "p50": 135.71199774742126, + "p90": 176.86399817466736, + "p95": 181.66400492191315, + "p99": 204.3519988656044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.10400146245956, + "p90": 101.34399682283401, + "p95": 105.6319996714592, + "p99": 117.11999773979187 + }, + "combine": { + "p50": 69.11999732255936, + "p90": 79.42400127649307, + "p95": 80.03199845552444, + "p99": 86.87999844551086 + }, + "roundtrip": { + "p50": 120.03199756145477, + "p90": 147.039994597435, + "p95": 149.72800016403198, + "p99": 158.55999290943146 + }, + "isolatedSum": { + "p50": 164.22399878501892, + "p90": 180.7679980993271, + "p95": 185.66399812698364, + "p99": 203.99999618530273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.47200131416321, + "p90": 103.16800326108932, + "p95": 109.72800105810165, + "p99": 237.37600445747375 + }, + "combine": { + "p50": 71.99999690055847, + "p90": 87.13600039482117, + "p95": 95.20000219345093, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 146.14400267601013, + "p90": 166.52800142765045, + "p95": 171.1679995059967, + "p99": 366.0160005092621 + }, + "isolatedSum": { + "p50": 149.47199821472168, + "p90": 190.3040036559105, + "p95": 204.92800325155258, + "p99": 341.5360078215599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.67199784517288, + "p90": 111.04000359773636, + "p95": 113.79200220108032, + "p99": 126.68800354003906 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 88.67199718952179, + "p95": 95.45599669218063, + "p99": 96.28800302743912 + }, + "roundtrip": { + "p50": 147.5840061903, + "p90": 168.96000504493713, + "p95": 170.9440052509308, + "p99": 174.9120056629181 + }, + "isolatedSum": { + "p50": 173.75999689102173, + "p90": 199.71200078725815, + "p95": 209.24799889326096, + "p99": 222.97600656747818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.15200346708298, + "p90": 120.7680031657219, + "p95": 122.68800288438797, + "p99": 131.29599392414093 + }, + "combine": { + "p50": 95.90400010347366, + "p90": 104.67199981212616, + "p95": 112.60800063610077, + "p99": 267.5839960575104 + }, + "roundtrip": { + "p50": 173.0239987373352, + "p90": 194.17600333690643, + "p95": 195.90400159358978, + "p99": 308.351993560791 + }, + "isolatedSum": { + "p50": 201.05600357055664, + "p90": 225.44000297784805, + "p95": 235.29600352048874, + "p99": 398.8799899816513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d5af8f11", + "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ff7906f8", + "comparisonKey": "bb40f1d7fb8ef5bf", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:52:15.657129+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271714089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271714089", + "createdAt": "2026-06-26T23:51:20Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.51999717950821, + "p90": 104.99200224876404, + "p95": 123.16799908876419, + "p99": 153.05599570274353 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 82.36800134181976, + "p95": 90.65599739551544, + "p99": 115.13599753379822 + }, + "roundtrip": { + "p50": 144.73600685596466, + "p90": 151.7760008573532, + "p95": 153.9199948310852, + "p99": 191.74399971961975 + }, + "isolatedSum": { + "p50": 169.5679947733879, + "p90": 187.3600035905838, + "p95": 213.82399648427963, + "p99": 268.19199323654175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.26399874687195, + "p90": 102.4319976568222, + "p95": 104.47999835014343, + "p99": 143.48800480365753 + }, + "combine": { + "p50": 67.77600198984146, + "p90": 81.15199953317642, + "p95": 81.727996468544, + "p99": 87.71199733018875 + }, + "roundtrip": { + "p50": 124.03199821710587, + "p90": 153.02400290966034, + "p95": 154.94400262832642, + "p99": 158.36800634860992 + }, + "isolatedSum": { + "p50": 139.0400007367134, + "p90": 183.58399718999863, + "p95": 186.20799481868744, + "p99": 231.20000213384628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 91.58399701118469, + "p90": 103.20000350475311, + "p95": 104.032002389431, + "p99": 107.58399963378906 + }, + "combine": { + "p50": 74.20799881219864, + "p90": 80.64000308513641, + "p95": 81.31200075149536, + "p99": 82.49600231647491 + }, + "roundtrip": { + "p50": 145.79200744628906, + "p90": 152.38399803638458, + "p95": 154.55999970436096, + "p99": 172.38399386405945 + }, + "isolatedSum": { + "p50": 165.79199582338333, + "p90": 183.84000658988953, + "p95": 185.34400314092636, + "p99": 190.08000195026398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 92.6079973578453, + "p90": 103.00800204277039, + "p95": 114.46399986743927, + "p99": 149.98400211334229 + }, + "combine": { + "p50": 76.1599987745285, + "p90": 82.49600231647491, + "p95": 86.68799698352814, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 146.84799313545227, + "p90": 161.40800714492798, + "p95": 192.09599494934082, + "p99": 203.74399423599243 + }, + "isolatedSum": { + "p50": 168.7679961323738, + "p90": 185.5040043592453, + "p95": 201.1519968509674, + "p99": 245.7600012421608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 91.2960022687912, + "p90": 99.16800260543823, + "p95": 101.1200025677681, + "p99": 107.68000036478043 + }, + "combine": { + "p50": 77.37600058317184, + "p90": 81.53600245714188, + "p95": 82.24000036716461, + "p99": 87.13600039482117 + }, + "roundtrip": { + "p50": 150.30400454998016, + "p90": 157.05600380897522, + "p95": 158.9760035276413, + "p99": 162.49600052833557 + }, + "isolatedSum": { + "p50": 168.67200285196304, + "p90": 180.7040050625801, + "p95": 183.3600029349327, + "p99": 194.8160007596016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.03200107812881, + "p90": 103.90400141477585, + "p95": 107.68000036478043, + "p99": 194.815993309021 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 87.00799942016602, + "p95": 90.55999666452408, + "p99": 383.7119936943054 + }, + "roundtrip": { + "p50": 134.97599959373474, + "p90": 158.27199816703796, + "p95": 171.36000096797943, + "p99": 204.0960043668747 + }, + "isolatedSum": { + "p50": 176.54400318861008, + "p90": 190.91200083494186, + "p95": 198.2399970293045, + "p99": 578.5279870033264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.61599987745285, + "p90": 114.49600011110306, + "p95": 116.35199934244156, + "p99": 122.84799665212631 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 98.2080027461052, + "p95": 98.68799895048141, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 167.84000396728516, + "p90": 173.567995429039, + "p95": 175.90400576591492, + "p99": 179.4240027666092 + }, + "isolatedSum": { + "p50": 199.68000054359436, + "p90": 212.70400285720825, + "p95": 215.03999829292297, + "p99": 225.3119945526123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.26399981975555, + "p90": 130.68799674510956, + "p95": 132.83200562000275, + "p99": 148.0959951877594 + }, + "combine": { + "p50": 106.6880002617836, + "p90": 114.23999816179276, + "p95": 115.23199826478958, + "p99": 137.85600662231445 + }, + "roundtrip": { + "p50": 197.60000705718994, + "p90": 204.8639953136444, + "p95": 207.07200467586517, + "p99": 225.8879989385605 + }, + "isolatedSum": { + "p50": 229.95200008153915, + "p90": 244.9279949069023, + "p95": 248.06400388479233, + "p99": 285.95200181007385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7171c240", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "1fe2184d83233e7e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:32.898956+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272125238", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272125238", + "createdAt": "2026-06-27T00:04:22Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.7519987821579, + "p90": 118.49600076675415, + "p95": 129.60000336170197, + "p99": 144.31999623775482 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 87.2960016131401, + "p95": 90.52799642086029, + "p99": 103.10400277376175 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 170.30400037765503, + "p95": 184.89600718021393, + "p99": 195.6160068511963 + }, + "isolatedSum": { + "p50": 171.39200121164322, + "p90": 205.79200237989426, + "p95": 220.12799978256226, + "p99": 247.42399901151657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.68799895048141, + "p90": 122.17599898576736, + "p95": 138.7840062379837, + "p99": 191.9039934873581 + }, + "combine": { + "p50": 81.31200075149536, + "p90": 89.72799777984619, + "p95": 97.08800166845322, + "p99": 106.62399977445602 + }, + "roundtrip": { + "p50": 152.70400047302246, + "p90": 174.9120056629181, + "p95": 184.03199315071106, + "p99": 195.51999866962433 + }, + "isolatedSum": { + "p50": 179.99999970197678, + "p90": 211.90399676561356, + "p95": 235.87200790643692, + "p99": 298.5279932618141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.79200023412704, + "p90": 127.96799838542938, + "p95": 147.42399752140045, + "p99": 195.16800343990326 + }, + "combine": { + "p50": 89.66399729251862, + "p90": 103.4879982471466, + "p95": 113.02399635314941, + "p99": 128.1599998474121 + }, + "roundtrip": { + "p50": 162.88000345230103, + "p90": 193.53599846363068, + "p95": 214.08000588417053, + "p99": 247.71200120449066 + }, + "isolatedSum": { + "p50": 191.45599752664566, + "p90": 231.455996632576, + "p95": 260.44799387454987, + "p99": 323.32800328731537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.49599528312683, + "p90": 143.8719928264618, + "p95": 152.70400047302246, + "p99": 158.9760035276413 + }, + "combine": { + "p50": 114.81600254774094, + "p90": 127.23200023174286, + "p95": 131.071999669075, + "p99": 139.5840048789978 + }, + "roundtrip": { + "p50": 212.70400285720825, + "p90": 226.33600234985352, + "p95": 233.69599878787994, + "p99": 247.8400021791458 + }, + "isolatedSum": { + "p50": 245.31199783086777, + "p90": 271.10399305820465, + "p95": 283.7760001420975, + "p99": 298.5600084066391 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a4944c1", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||2baace5eca64609", + "colorKey": "h100_42947950", + "comparisonKey": "fb346b1019e55bb0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:20.307571+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2baace5eca64609", + "workloadId": "set:2:07d544ac2af401ec", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271533135", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271533135", + "createdAt": "2026-06-26T23:45:44Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.0080013871193, + "p90": 111.00800335407257, + "p95": 115.1999980211258, + "p99": 124.67200309038162 + }, + "combine": { + "p50": 80.1599994301796, + "p90": 88.03199976682663, + "p95": 88.48000317811966, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 159.04000401496887, + "p90": 169.11999881267548, + "p95": 173.69599640369415, + "p99": 179.61600422859192 + }, + "isolatedSum": { + "p50": 179.1680008172989, + "p90": 199.0400031208992, + "p95": 203.68000119924545, + "p99": 220.99200636148453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.95200341939926, + "p90": 126.56000256538391, + "p95": 130.46400249004364, + "p99": 137.2479945421219 + }, + "combine": { + "p50": 97.85600006580353, + "p90": 105.40799796581268, + "p95": 112.47999966144562, + "p99": 113.50400000810623 + }, + "roundtrip": { + "p50": 177.95200645923615, + "p90": 187.58399784564972, + "p95": 192.73599982261658, + "p99": 206.43199980258942 + }, + "isolatedSum": { + "p50": 211.8080034852028, + "p90": 231.9680005311966, + "p95": 242.94400215148926, + "p99": 250.75199455022812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c169b4e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "fb346b1019e55bb0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:30.292467+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271781761", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271781761", + "createdAt": "2026-06-26T23:53:23Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.43199700117111, + "p90": 107.32799768447876, + "p95": 114.20799791812897, + "p99": 163.71199488639832 + }, + "combine": { + "p50": 80.4160013794899, + "p90": 83.00799876451492, + "p95": 87.00799942016602, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 153.43999862670898, + "p90": 159.93599593639374, + "p95": 161.82400286197662, + "p99": 166.24000668525696 + }, + "isolatedSum": { + "p50": 178.847998380661, + "p90": 190.33599644899368, + "p95": 201.21599733829498, + "p99": 251.64799392223358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.08000177145004, + "p90": 106.62399977445602, + "p95": 110.30399799346924, + "p99": 118.56000125408173 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 87.3280018568039, + "p95": 87.93599903583527, + "p99": 95.39200365543365 + }, + "roundtrip": { + "p50": 156.2879979610443, + "p90": 163.13600540161133, + "p95": 169.21600699424744, + "p99": 271.2959945201874 + }, + "isolatedSum": { + "p50": 178.8799986243248, + "p90": 193.95200163125992, + "p95": 198.2399970293045, + "p99": 213.95200490951538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.27199643850327, + "p90": 111.32799834012985, + "p95": 115.80800265073776, + "p99": 124.03199821710587 + }, + "combine": { + "p50": 88.3840024471283, + "p90": 95.58399766683578, + "p95": 96.19200229644775, + "p99": 103.61599922180176 + }, + "roundtrip": { + "p50": 164.63999450206757, + "p90": 170.97599804401398, + "p95": 174.52800273895264, + "p99": 185.7600063085556 + }, + "isolatedSum": { + "p50": 190.65599888563156, + "p90": 206.91199600696564, + "p95": 212.00000494718552, + "p99": 227.64799743890762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.0799995660782, + "p90": 136.4160031080246, + "p95": 138.20800185203552, + "p99": 144.896000623703 + }, + "combine": { + "p50": 114.88000303506851, + "p90": 120.70400267839432, + "p95": 121.5360015630722, + "p99": 128.28800082206726 + }, + "roundtrip": { + "p50": 213.18399906158447, + "p90": 219.61599588394165, + "p95": 221.11999988555908, + "p99": 227.03999280929565 + }, + "isolatedSum": { + "p50": 244.9600026011467, + "p90": 257.1200057864189, + "p95": 259.7440034151077, + "p99": 273.18400144577026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7a284f4e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_42947950", + "comparisonKey": "fb346b1019e55bb0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:32.113885+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271543513", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271543513", + "createdAt": "2026-06-26T23:46:04Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.00799679756165, + "p90": 100.67199915647507, + "p95": 101.6319990158081, + "p99": 103.74400019645691 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 81.79199695587158, + "p95": 117.47200042009354, + "p99": 304.4799864292145 + }, + "roundtrip": { + "p50": 126.52799487113953, + "p90": 130.3360015153885, + "p95": 131.84000551700592, + "p99": 137.95199990272522 + }, + "isolatedSum": { + "p50": 144.3519964814186, + "p90": 182.46399611234665, + "p95": 219.10399943590164, + "p99": 408.2239866256714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.57600063085556, + "p90": 76.31999999284744, + "p95": 79.13599908351898, + "p99": 88.32000195980072 + }, + "combine": { + "p50": 72.54400104284286, + "p90": 73.98399710655212, + "p95": 74.36800003051758, + "p99": 78.84799689054489 + }, + "roundtrip": { + "p50": 126.81600451469421, + "p90": 131.1360001564026, + "p95": 134.24000144004822, + "p99": 137.69599795341492 + }, + "isolatedSum": { + "p50": 141.12000167369843, + "p90": 150.30399709939957, + "p95": 153.50399911403656, + "p99": 167.1679988503456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.05599749088287, + "p90": 103.67999970912933, + "p95": 108.51199924945831, + "p99": 261.34398579597473 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 80.03199845552444, + "p95": 87.0399996638298, + "p99": 87.87199854850769 + }, + "roundtrip": { + "p50": 130.52800297737122, + "p90": 157.4079990386963, + "p95": 160.76800227165222, + "p99": 164.22399878501892 + }, + "isolatedSum": { + "p50": 146.43199741840363, + "p90": 183.71199816465378, + "p95": 195.55199891328812, + "p99": 349.2159843444824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.45600020885468, + "p90": 98.88000041246414, + "p95": 103.00800204277039, + "p99": 109.69600081443787 + }, + "combine": { + "p50": 73.7600028514862, + "p90": 82.59200304746628, + "p95": 83.99999886751175, + "p99": 88.41600269079208 + }, + "roundtrip": { + "p50": 131.29599392414093, + "p90": 154.59200739860535, + "p95": 157.05600380897522, + "p99": 165.66400229930878 + }, + "isolatedSum": { + "p50": 145.21600306034088, + "p90": 181.47200345993042, + "p95": 187.00800091028214, + "p99": 198.11200350522995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.97599852085114, + "p90": 100.8640006184578, + "p95": 103.26399654150009, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 74.49600100517273, + "p90": 87.10400015115738, + "p95": 87.74399757385254, + "p99": 88.86399865150452 + }, + "roundtrip": { + "p50": 128.1919926404953, + "p90": 158.720001578331, + "p95": 161.53599321842194, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 157.47199952602386, + "p90": 187.96800076961517, + "p95": 191.00799411535263, + "p99": 197.31199741363525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.81599861383438, + "p90": 103.04000228643417, + "p95": 107.87200182676315, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 81.50400221347809, + "p90": 89.9519994854927, + "p95": 90.43200314044952, + "p99": 96.19200229644775 + }, + "roundtrip": { + "p50": 140.47999680042267, + "p90": 163.29599916934967, + "p95": 166.87999665737152, + "p99": 171.03999853134155 + }, + "isolatedSum": { + "p50": 172.32000082731247, + "p90": 192.99200177192688, + "p95": 198.30400496721268, + "p99": 207.71200209856033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.31200337409973, + "p90": 119.10399794578552, + "p95": 121.69600278139114, + "p99": 131.26400113105774 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 97.15200215578079, + "p95": 103.93600165843964, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 162.1759980916977, + "p90": 181.7920058965683, + "p95": 184.4799965620041, + "p99": 187.74400651454926 + }, + "isolatedSum": { + "p50": 187.52000480890274, + "p90": 216.25600010156631, + "p95": 225.63200443983078, + "p99": 235.74399948120117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.3119985461235, + "p90": 132.7359974384308, + "p95": 134.5919966697693, + "p99": 140.35199582576752 + }, + "combine": { + "p50": 108.41599851846695, + "p90": 120.44800072908401, + "p95": 120.7360029220581, + "p99": 121.47200107574463 + }, + "roundtrip": { + "p50": 198.2080042362213, + "p90": 216.86400473117828, + "p95": 221.24800086021423, + "p99": 223.80800545215607 + }, + "isolatedSum": { + "p50": 221.72799706459045, + "p90": 253.1839981675148, + "p95": 255.3279995918274, + "p99": 261.82399690151215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a231e73", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", + "colorKey": "h100_42947950", + "comparisonKey": "fb346b1019e55bb0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:23.336108+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": "set:3:07d544ac2af401ec", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272369133", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272369133", + "createdAt": "2026-06-27T00:12:24Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.88000041246414, + "p90": 104.8320010304451, + "p95": 107.96800255775452, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 79.93599772453308, + "p90": 87.5839963555336, + "p95": 87.99999952316284, + "p99": 92.28800237178802 + }, + "roundtrip": { + "p50": 154.11199629306793, + "p90": 159.2639982700348, + "p95": 161.43999993801117, + "p99": 167.29600727558136 + }, + "isolatedSum": { + "p50": 178.81599813699722, + "p90": 192.4159973859787, + "p95": 195.96800208091736, + "p99": 211.2639993429184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.04000228643417, + "p90": 108.22399705648422, + "p95": 110.43199896812439, + "p99": 116.64000153541565 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 94.94400024414062, + "p95": 96.03200107812881, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 162.4639928340912, + "p90": 170.3999936580658, + "p95": 172.31999337673187, + "p99": 178.9119988679886 + }, + "isolatedSum": { + "p50": 190.97600132226944, + "p90": 203.16799730062485, + "p95": 206.4640000462532, + "p99": 215.13599902391434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.40800189971924, + "p90": 137.7599984407425, + "p95": 139.45600390434265, + "p99": 143.48800480365753 + }, + "combine": { + "p50": 114.88000303506851, + "p90": 119.87199634313583, + "p95": 120.4800009727478, + "p99": 123.48800152540207 + }, + "roundtrip": { + "p50": 213.0880057811737, + "p90": 217.3759937286377, + "p95": 219.10400688648224, + "p99": 223.23200106620789 + }, + "isolatedSum": { + "p50": 244.28800493478775, + "p90": 257.6319947838783, + "p95": 259.93600487709045, + "p99": 266.9760063290596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-535aa40c", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "h100_42947950", + "comparisonKey": "f31dd87deba90285", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:53:48.998127+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28273506790", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273506790", + "createdAt": "2026-06-27T00:52:45Z", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.4959968328476, + "p90": 100.5759984254837, + "p95": 102.81600058078766, + "p99": 107.42399841547012 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 80.89599758386612, + "p95": 81.37600123882294, + "p99": 85.91999858617783 + }, + "roundtrip": { + "p50": 150.65599977970123, + "p90": 155.35999834537506, + "p95": 157.02399611473083, + "p99": 163.5199934244156 + }, + "isolatedSum": { + "p50": 171.4239940047264, + "p90": 181.47199600934982, + "p95": 184.1920018196106, + "p99": 193.34399700164795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 90.97599983215332, + "p90": 98.52799773216248, + "p95": 101.02400183677673, + "p99": 107.68000036478043 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 81.216000020504, + "p95": 82.71999657154083, + "p99": 87.55200356245041 + }, + "roundtrip": { + "p50": 149.47199821472168, + "p90": 154.91199493408203, + "p95": 157.151997089386, + "p99": 163.80800306797028 + }, + "isolatedSum": { + "p50": 168.09599846601486, + "p90": 179.74399775266647, + "p95": 183.74399840831757, + "p99": 195.23200392723083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 93.12000125646591, + "p90": 99.64799880981445, + "p95": 102.27199643850327, + "p99": 109.43999886512756 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 83.0719992518425, + "p95": 84.22400057315826, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 151.96800231933594, + "p90": 158.9439958333969, + "p95": 160.25599837303162, + "p99": 163.07200491428375 + }, + "isolatedSum": { + "p50": 172.4800020456314, + "p90": 182.71999806165695, + "p95": 186.49599701166153, + "p99": 197.9840025305748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.27200257778168, + "p90": 100.80000013113022, + "p95": 102.62399911880493, + "p99": 107.80800133943558 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 83.13599973917007, + "p95": 84.25600081682205, + "p99": 86.65599673986435 + }, + "roundtrip": { + "p50": 151.39199793338776, + "p90": 157.79200196266174, + "p95": 160.25599837303162, + "p99": 164.95999693870544 + }, + "isolatedSum": { + "p50": 172.96000570058823, + "p90": 183.9359998703003, + "p95": 186.87999993562698, + "p99": 194.46399807929993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.16000205278397, + "p90": 100.92800110578537, + "p95": 103.71199995279312, + "p99": 108.06400328874588 + }, + "combine": { + "p50": 81.85599744319916, + "p90": 87.26400136947632, + "p95": 88.8959988951683, + "p99": 90.04800021648407 + }, + "roundtrip": { + "p50": 153.6639928817749, + "p90": 160.35200655460358, + "p95": 161.95200383663177, + "p99": 165.3439998626709 + }, + "isolatedSum": { + "p50": 178.01599949598312, + "p90": 188.1920024752617, + "p95": 192.60799884796143, + "p99": 198.11200350522995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a3d925c", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "da8c4fcc63f5bf6e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:07.028525+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272117855", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272117855", + "createdAt": "2026-06-27T00:04:08Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.79199957847595, + "p90": 105.02400249242783, + "p95": 107.29599744081497, + "p99": 115.90400338172913 + }, + "combine": { + "p50": 79.77599650621414, + "p90": 82.11199939250946, + "p95": 86.91199868917465, + "p99": 88.79999816417694 + }, + "roundtrip": { + "p50": 152.44799852371216, + "p90": 158.59200060367584, + "p95": 160.44799983501434, + "p99": 165.40800034999847 + }, + "isolatedSum": { + "p50": 177.5679960846901, + "p90": 187.1360018849373, + "p95": 194.20799612998962, + "p99": 204.70400154590607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.01600128412247, + "p90": 103.87200117111206, + "p95": 106.01600259542465, + "p99": 113.11999708414078 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 87.71199733018875, + "p95": 87.96799927949905, + "p99": 89.50400352478027 + }, + "roundtrip": { + "p50": 155.16799688339233, + "p90": 160.38399934768677, + "p95": 162.23999857902527, + "p99": 166.87999665737152 + }, + "isolatedSum": { + "p50": 179.03999984264374, + "p90": 191.5839985013008, + "p95": 193.9840018749237, + "p99": 202.62400060892105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.98400169610977, + "p90": 106.6880002617836, + "p95": 109.95200276374817, + "p99": 120.35199999809265 + }, + "combine": { + "p50": 88.22400122880936, + "p90": 95.0080007314682, + "p95": 95.93600034713745, + "p99": 96.83199971914291 + }, + "roundtrip": { + "p50": 162.75200247764587, + "p90": 169.63200271129608, + "p95": 171.58399522304535, + "p99": 176.28799378871918 + }, + "isolatedSum": { + "p50": 190.20800292491913, + "p90": 201.6960009932518, + "p95": 205.88800311088562, + "p99": 217.18399971723557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.66400384902954, + "p90": 137.08800077438354, + "p95": 139.0399932861328, + "p99": 142.752006649971 + }, + "combine": { + "p50": 115.00799655914307, + "p90": 120.7680031657219, + "p95": 121.31199985742569, + "p99": 127.83999741077423 + }, + "roundtrip": { + "p50": 212.89600431919098, + "p90": 218.72000396251678, + "p95": 219.9680060148239, + "p99": 224.06400740146637 + }, + "isolatedSum": { + "p50": 244.6720004081726, + "p90": 257.85600394010544, + "p95": 260.3519931435585, + "p99": 270.59200406074524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49497b06", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "5ec10556693a8c2b", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:08.113815+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "wide-dynamic-range", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272121618", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272121618", + "createdAt": "2026-06-27T00:04:15Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.24000298976898, + "p90": 105.69600015878677, + "p95": 108.12799632549286, + "p99": 113.37599903345108 + }, + "combine": { + "p50": 79.68000322580338, + "p90": 82.07999914884567, + "p95": 82.97599852085114, + "p99": 87.61599659919739 + }, + "roundtrip": { + "p50": 146.464005112648, + "p90": 152.8320014476776, + "p95": 154.59200739860535, + "p99": 158.84800255298615 + }, + "isolatedSum": { + "p50": 177.92000621557236, + "p90": 187.77599930763245, + "p95": 191.103994846344, + "p99": 200.99199563264847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.0640019774437, + "p90": 107.32799768447876, + "p95": 110.27199774980545, + "p99": 160.92799603939056 + }, + "combine": { + "p50": 81.34400099515915, + "p90": 87.16800063848495, + "p95": 87.87199854850769, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 152.92799472808838, + "p90": 160.51200032234192, + "p95": 162.30399906635284, + "p99": 166.24000668525696 + }, + "isolatedSum": { + "p50": 181.40800297260284, + "p90": 194.49599832296371, + "p95": 198.14399629831314, + "p99": 251.19999796152115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.39199751615524, + "p90": 108.2879975438118, + "p95": 110.6560006737709, + "p99": 119.03999745845795 + }, + "combine": { + "p50": 89.75999802350998, + "p90": 95.20000219345093, + "p95": 95.93600034713745, + "p99": 98.68799895048141 + }, + "roundtrip": { + "p50": 161.6320013999939, + "p90": 169.08800601959229, + "p95": 170.68800330162048, + "p99": 175.64800381660461 + }, + "isolatedSum": { + "p50": 193.15199553966522, + "p90": 203.48799973726273, + "p95": 206.59200102090836, + "p99": 217.72799640893936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.46400249004364, + "p90": 136.9280070066452, + "p95": 139.23199474811554, + "p99": 143.5839980840683 + }, + "combine": { + "p50": 114.78400230407715, + "p90": 120.83200365304947, + "p95": 122.11199849843979, + "p99": 122.8799968957901 + }, + "roundtrip": { + "p50": 211.71200275421143, + "p90": 219.35999393463135, + "p95": 221.91999852657318, + "p99": 235.00800132751465 + }, + "isolatedSum": { + "p50": 245.2480047941208, + "p90": 257.7600106596947, + "p95": 261.3439932465553, + "p99": 266.4639949798584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3b04d344", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "8bd0272e65400ebd", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:11.747577+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272113941", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272113941", + "createdAt": "2026-06-27T00:04:01Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.15200215578079, + "p90": 103.67999970912933, + "p95": 105.85600137710571, + "p99": 108.99200290441513 + }, + "combine": { + "p50": 79.64800298213959, + "p90": 82.33600109815598, + "p95": 86.84799820184708, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 151.8400013446808, + "p90": 158.01599621772766, + "p95": 160.76800227165222, + "p99": 165.3120070695877 + }, + "isolatedSum": { + "p50": 176.80000513792038, + "p90": 186.0160008072853, + "p95": 192.7039995789528, + "p99": 196.96000218391418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.28000313043594, + "p90": 103.71199995279312, + "p95": 106.4319983124733, + "p99": 121.63200229406357 + }, + "combine": { + "p50": 79.93599772453308, + "p90": 87.39200234413147, + "p95": 87.93599903583527, + "p99": 90.04800021648407 + }, + "roundtrip": { + "p50": 153.72799336910248, + "p90": 159.55199301242828, + "p95": 160.7999950647354, + "p99": 165.6000018119812 + }, + "isolatedSum": { + "p50": 177.21600085496902, + "p90": 191.1040022969246, + "p95": 194.36799734830856, + "p99": 211.68000251054764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.00000214576721, + "p90": 108.44799876213074, + "p95": 111.68000102043152, + "p99": 126.75200402736664 + }, + "combine": { + "p50": 87.99999952316284, + "p90": 93.44000369310379, + "p95": 95.87199985980988, + "p99": 97.59999811649323 + }, + "roundtrip": { + "p50": 161.8880033493042, + "p90": 168.64000260829926, + "p95": 170.0800061225891, + "p99": 175.99999904632568 + }, + "isolatedSum": { + "p50": 192.00000166893005, + "p90": 201.88800245523453, + "p95": 207.5520008802414, + "p99": 224.35200214385986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.0159990787506, + "p90": 137.7280056476593, + "p95": 138.7840062379837, + "p99": 142.2719955444336 + }, + "combine": { + "p50": 115.167997777462, + "p90": 120.54400146007538, + "p95": 120.95999717712402, + "p99": 123.87199699878693 + }, + "roundtrip": { + "p50": 212.47999370098114, + "p90": 216.63999557495117, + "p95": 218.1439995765686, + "p99": 221.47199511528015 + }, + "isolatedSum": { + "p50": 245.18399685621262, + "p90": 258.2720071077347, + "p95": 259.7440034151077, + "p99": 266.1439925432205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d0428a76", + "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ff7906f8", + "comparisonKey": "e3488cf5058170e6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:47:28.813270+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271559607", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271559607", + "createdAt": "2026-06-26T23:46:31Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.79999947547913, + "p90": 103.39199751615524, + "p95": 104.80000078678131, + "p99": 109.43999886512756 + }, + "combine": { + "p50": 79.13599908351898, + "p90": 81.40800148248672, + "p95": 86.68799698352814, + "p99": 87.90399879217148 + }, + "roundtrip": { + "p50": 152.12799608707428, + "p90": 159.96800363063812, + "p95": 162.36799955368042, + "p99": 177.69600450992584 + }, + "isolatedSum": { + "p50": 175.9359985589981, + "p90": 184.79999899864197, + "p95": 191.48799777030945, + "p99": 197.34399765729904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.23199850320816, + "p90": 101.98400169610977, + "p95": 103.84000092744827, + "p99": 108.35199803113937 + }, + "combine": { + "p50": 72.54400104284286, + "p90": 81.40800148248672, + "p95": 82.62400329113007, + "p99": 87.77599781751633 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 158.2079976797104, + "p95": 159.58400070667267, + "p99": 165.02399742603302 + }, + "isolatedSum": { + "p50": 143.77599954605103, + "p90": 183.3920031785965, + "p95": 186.46400421857834, + "p99": 196.1279958486557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.52800059318542, + "p90": 99.84000027179718, + "p95": 105.72800040245056, + "p99": 115.07199704647064 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 80.99199831485748, + "p95": 86.94399893283844, + "p99": 103.55199873447418 + }, + "roundtrip": { + "p50": 129.43999469280243, + "p90": 156.19200468063354, + "p95": 159.07199680805206, + "p99": 162.56000101566315 + }, + "isolatedSum": { + "p50": 143.51999759674072, + "p90": 180.83199858665466, + "p95": 192.671999335289, + "p99": 218.62399578094482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.54399752616882, + "p90": 101.3759970664978, + "p95": 103.61599922180176, + "p99": 111.26399785280228 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 87.13600039482117, + "p95": 87.64799684286118, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 152.16000378131866, + "p90": 159.39199924468994, + "p95": 161.15200519561768, + "p99": 170.52799463272095 + }, + "isolatedSum": { + "p50": 176.06399953365326, + "p90": 188.51199746131897, + "p95": 191.26399606466293, + "p99": 199.99999552965164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.3520035147667, + "p90": 101.75999999046326, + "p95": 104.89600151777267, + "p99": 110.11199653148651 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 88.03199976682663, + "p95": 89.21600133180618, + "p99": 95.23200243711472 + }, + "roundtrip": { + "p50": 153.05599570274353, + "p90": 160.288006067276, + "p95": 162.432000041008, + "p99": 171.2000072002411 + }, + "isolatedSum": { + "p50": 180.83200603723526, + "p90": 189.7919997572899, + "p95": 194.11200284957886, + "p99": 205.34399896860123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.9519994854927, + "p90": 104.35199737548828, + "p95": 106.65600001811981, + "p99": 117.85600334405899 + }, + "combine": { + "p50": 81.216000020504, + "p90": 92.19200164079666, + "p95": 95.39200365543365, + "p99": 96.0640013217926 + }, + "roundtrip": { + "p50": 141.05600118637085, + "p90": 168.2880073785782, + "p95": 169.5680022239685, + "p99": 174.40000176429749 + }, + "isolatedSum": { + "p50": 171.1679995059967, + "p90": 196.54399901628494, + "p95": 202.04800367355347, + "p99": 213.9200046658516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 111.51999980211258, + "p90": 119.00799721479416, + "p95": 121.44000083208084, + "p99": 126.56000256538391 + }, + "combine": { + "p50": 95.0080007314682, + "p90": 103.04000228643417, + "p95": 103.35999727249146, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 164.63999450206757, + "p90": 182.3039948940277, + "p95": 185.12000143527985, + "p99": 188.7039989233017 + }, + "isolatedSum": { + "p50": 206.52800053358078, + "p90": 222.04799950122833, + "p95": 224.7999981045723, + "p99": 231.48800432682037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.99199676513672, + "p90": 133.2480013370514, + "p95": 135.51999628543854, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 106.88000172376633, + "p90": 119.55200135707855, + "p95": 120.2239990234375, + "p99": 127.55200266838074 + }, + "roundtrip": { + "p50": 199.3280053138733, + "p90": 215.45599400997162, + "p95": 217.56799519062042, + "p99": 258.91199707984924 + }, + "isolatedSum": { + "p50": 223.87199848890305, + "p90": 252.80000269412994, + "p95": 255.74399530887604, + "p99": 268.19200813770294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e96d722b", + "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_ff7906f8", + "comparisonKey": "c69daa1ab05193b6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:56.132475+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271667766", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271667766", + "createdAt": "2026-06-26T23:49:58Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.09600156545639, + "p90": 102.81600058078766, + "p95": 104.54399883747101, + "p99": 110.59200018644333 + }, + "combine": { + "p50": 79.03999835252762, + "p90": 81.50400221347809, + "p95": 82.11199939250946, + "p99": 87.90399879217148 + }, + "roundtrip": { + "p50": 145.56799829006195, + "p90": 153.31199765205383, + "p95": 155.71199357509613, + "p99": 159.39199924468994 + }, + "isolatedSum": { + "p50": 175.135999917984, + "p90": 184.32000279426575, + "p95": 186.65599822998047, + "p99": 198.4959989786148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.03999704122543, + "p90": 101.6319990158081, + "p95": 102.65599936246872, + "p99": 106.62399977445602 + }, + "combine": { + "p50": 72.28799909353256, + "p90": 80.54400235414505, + "p95": 81.40800148248672, + "p99": 87.00799942016602 + }, + "roundtrip": { + "p50": 129.18399274349213, + "p90": 152.70400047302246, + "p95": 156.92800283432007, + "p99": 160.76800227165222 + }, + "isolatedSum": { + "p50": 143.327996134758, + "p90": 182.17600136995316, + "p95": 184.06400084495544, + "p99": 193.63199919462204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.57600128650665, + "p90": 101.02400183677673, + "p95": 103.61599922180176, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 79.96799796819687, + "p95": 86.71999722719193, + "p99": 87.64799684286118 + }, + "roundtrip": { + "p50": 129.92000579833984, + "p90": 161.3759994506836, + "p95": 162.30399906635284, + "p99": 166.4319932460785 + }, + "isolatedSum": { + "p50": 144.83200013637543, + "p90": 180.9919998049736, + "p95": 190.33599644899368, + "p99": 198.46399873495102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.3200032711029, + "p90": 101.1200025677681, + "p95": 102.52799838781357, + "p99": 109.11999642848969 + }, + "combine": { + "p50": 79.23199981451035, + "p90": 82.11199939250946, + "p95": 87.00799942016602, + "p99": 87.71199733018875 + }, + "roundtrip": { + "p50": 151.5199989080429, + "p90": 159.2320054769516, + "p95": 160.60799360275269, + "p99": 165.21599888801575 + }, + "isolatedSum": { + "p50": 175.55200308561325, + "p90": 183.23200196027756, + "p95": 189.53599780797958, + "p99": 196.83199375867844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.22400254011154, + "p90": 102.36799716949463, + "p95": 105.05600273609161, + "p99": 110.30399799346924 + }, + "combine": { + "p50": 81.88799768686295, + "p90": 88.28800171613693, + "p95": 89.31200206279755, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 152.48000621795654, + "p90": 160.09600460529327, + "p95": 164.19200599193573, + "p99": 172.83199727535248 + }, + "isolatedSum": { + "p50": 178.1120002269745, + "p90": 190.65599888563156, + "p95": 194.36800479888916, + "p99": 204.73599433898926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.36800265312195, + "p90": 102.59199887514114, + "p95": 104.3199971318245, + "p99": 108.03200304508209 + }, + "combine": { + "p50": 80.92799782752991, + "p90": 90.01599997282028, + "p95": 95.13600170612335, + "p99": 96.41599655151367 + }, + "roundtrip": { + "p50": 142.46399700641632, + "p90": 169.95200514793396, + "p95": 174.55999553203583, + "p99": 181.7920058965683 + }, + "isolatedSum": { + "p50": 171.29600048065186, + "p90": 192.60799884796143, + "p95": 199.45599883794785, + "p99": 204.44799959659576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.71199864149094, + "p90": 116.54400080442429, + "p95": 118.59200149774551, + "p99": 125.63200294971466 + }, + "combine": { + "p50": 89.72799777984619, + "p90": 103.74400019645691, + "p95": 104.22399640083313, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 165.66400229930878, + "p90": 185.34399569034576, + "p95": 186.97600066661835, + "p99": 190.08000195026398 + }, + "isolatedSum": { + "p50": 185.43999642133713, + "p90": 220.2880010008812, + "p95": 222.81599789857864, + "p99": 231.6800057888031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.11999708414078, + "p90": 133.82400572299957, + "p95": 137.05599308013916, + "p99": 140.28799533843994 + }, + "combine": { + "p50": 106.46399855613708, + "p90": 120.12799829244614, + "p95": 120.51200121641159, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 196.8960016965866, + "p90": 216.99200570583344, + "p95": 218.9120054244995, + "p99": 220.99199891090393 + }, + "isolatedSum": { + "p50": 219.58399564027786, + "p90": 253.9520040154457, + "p95": 257.56799429655075, + "p99": 261.27999275922775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-10aeccec", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_648ede74", + "comparisonKey": "03a9af950bebf5a9", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:11:55.271848+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": "set:3:24add4cb1eb472b4", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272328109", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272328109", + "createdAt": "2026-06-27T00:11:02Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.28000313043594, + "p90": 104.06400263309479, + "p95": 106.72000050544739, + "p99": 117.34399944543839 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 81.82399719953537, + "p95": 83.29600095748901, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 147.5840061903, + "p90": 155.5519998073578, + "p95": 157.98400342464447, + "p99": 164.0319973230362 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 185.88799983263016, + "p95": 190.0160014629364, + "p99": 206.33599907159805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.81600058078766, + "p90": 106.78400099277496, + "p95": 110.46399921178818, + "p99": 137.40800321102142 + }, + "combine": { + "p50": 87.13600039482117, + "p90": 87.93599903583527, + "p95": 88.79999816417694, + "p99": 95.48799693584442 + }, + "roundtrip": { + "p50": 158.6879938840866, + "p90": 165.69599509239197, + "p95": 167.64800250530243, + "p99": 171.80800437927246 + }, + "isolatedSum": { + "p50": 189.95200097560883, + "p90": 194.72000002861023, + "p95": 199.26399737596512, + "p99": 232.89600014686584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.615997672081, + "p90": 146.464005112648, + "p95": 149.47199821472168, + "p99": 156.6080003976822 + }, + "combine": { + "p50": 119.87199634313583, + "p90": 121.31199985742569, + "p95": 127.58399546146393, + "p99": 128.92800569534302 + }, + "roundtrip": { + "p50": 225.92000663280487, + "p90": 230.27199506759644, + "p95": 232.06399381160736, + "p99": 238.0480021238327 + }, + "isolatedSum": { + "p50": 259.4879940152168, + "p90": 267.7760049700737, + "p95": 277.0559936761856, + "p99": 285.5360060930252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62470199", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_b681a3a4", + "comparisonKey": "03a9af950bebf5a9", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:00.195927+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s1", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272331593", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272331593", + "createdAt": "2026-06-27T00:11:09Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.20000219345093, + "p90": 101.24800354242325, + "p95": 103.42399775981903, + "p99": 115.84000289440155 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 80.92799782752991, + "p95": 81.79199695587158, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 148.03199470043182, + "p90": 153.24799716472626, + "p95": 156.41599893569946, + "p99": 176.06399953365326 + }, + "isolatedSum": { + "p50": 174.49600249528885, + "p90": 182.17600136995316, + "p95": 185.2159947156906, + "p99": 203.87200266122818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.5199978351593, + "p90": 107.39199817180634, + "p95": 118.72000247240067, + "p99": 229.95199263095856 + }, + "combine": { + "p50": 87.52000331878662, + "p90": 89.34400230646133, + "p95": 92.3520028591156, + "p99": 96.44799679517746 + }, + "roundtrip": { + "p50": 155.5519998073578, + "p90": 160.70400178432465, + "p95": 164.76799547672272, + "p99": 175.07199943065643 + }, + "isolatedSum": { + "p50": 187.04000115394592, + "p90": 196.73600047826767, + "p95": 211.07200533151627, + "p99": 326.399989426136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.82400572299957, + "p90": 141.08799397945404, + "p95": 142.62400567531586, + "p99": 146.40000462532043 + }, + "combine": { + "p50": 120.28799951076508, + "p90": 122.56000190973282, + "p95": 127.10399925708771, + "p99": 136.00000739097595 + }, + "roundtrip": { + "p50": 221.88800573349, + "p90": 225.79200565814972, + "p95": 227.26400196552277, + "p99": 233.024001121521 + }, + "isolatedSum": { + "p50": 254.11200523376465, + "p90": 263.64799588918686, + "p95": 269.72800493240356, + "p99": 282.4000120162964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62dda1f3", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_b981a85d", + "comparisonKey": "03a9af950bebf5a9", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:08.462042+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272335347", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272335347", + "createdAt": "2026-06-27T00:11:16Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 91.96799993515015, + "p90": 101.85600072145462, + "p95": 102.88000106811523, + "p99": 111.00800335407257 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 81.60000294446945, + "p95": 82.17599987983704, + "p99": 85.21600067615509 + }, + "roundtrip": { + "p50": 146.7839926481247, + "p90": 152.6080071926117, + "p95": 154.27200496196747, + "p99": 160.99199652671814 + }, + "isolatedSum": { + "p50": 168.57600212097168, + "p90": 183.45600366592407, + "p95": 185.05600094795227, + "p99": 196.22400403022766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.88000041246414, + "p90": 104.06400263309479, + "p95": 106.30399733781815, + "p99": 139.42399621009827 + }, + "combine": { + "p50": 84.60800349712372, + "p90": 86.30400151014328, + "p95": 86.81599795818329, + "p99": 92.51199662685394 + }, + "roundtrip": { + "p50": 154.65599298477173, + "p90": 160.64000129699707, + "p95": 162.59199380874634, + "p99": 168.09600591659546 + }, + "isolatedSum": { + "p50": 183.48800390958786, + "p90": 190.36800414323807, + "p95": 193.11999529600143, + "p99": 231.9359928369522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.50400459766388, + "p90": 146.68799936771393, + "p95": 147.67999947071075, + "p99": 152.41600573062897 + }, + "combine": { + "p50": 118.17599833011627, + "p90": 122.56000190973282, + "p95": 123.58400225639343, + "p99": 125.82400441169739 + }, + "roundtrip": { + "p50": 227.13600099086761, + "p90": 231.23200237751007, + "p95": 232.92799293994904, + "p99": 237.05600202083588 + }, + "isolatedSum": { + "p50": 259.68000292778015, + "p90": 269.24800127744675, + "p95": 271.2640017271042, + "p99": 278.24001014232635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f337d9a1", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_b881a6ca", + "comparisonKey": "03a9af950bebf5a9", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:29.724404+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s3", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272338723", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272338723", + "createdAt": "2026-06-27T00:11:23Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.84799951314926, + "p90": 121.37600034475327, + "p95": 148.8959938287735, + "p99": 189.56799805164337 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 96.6079980134964, + "p95": 113.0559965968132, + "p99": 123.77600371837616 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 183.20000171661377, + "p95": 218.78400444984436, + "p99": 249.79199469089508 + }, + "isolatedSum": { + "p50": 174.43200200796127, + "p90": 217.98399835824966, + "p95": 261.9519904255867, + "p99": 313.34400177001953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.41599720716476, + "p90": 127.55200266838074, + "p95": 156.5759927034378, + "p99": 182.81599879264832 + }, + "combine": { + "p50": 87.8399983048439, + "p90": 103.93600165843964, + "p95": 120.38400024175644, + "p99": 128.89599800109863 + }, + "roundtrip": { + "p50": 156.99200332164764, + "p90": 193.7599927186966, + "p95": 223.7119972705841, + "p99": 247.23200500011444 + }, + "isolatedSum": { + "p50": 188.25599551200867, + "p90": 231.48800432682037, + "p95": 276.95999294519424, + "p99": 311.71199679374695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 134.2719942331314, + "p90": 147.39200472831726, + "p95": 173.567995429039, + "p99": 188.1919950246811 + }, + "combine": { + "p50": 120.44800072908401, + "p90": 138.62399756908417, + "p95": 152.38399803638458, + "p99": 160.96000373363495 + }, + "roundtrip": { + "p50": 222.6880043745041, + "p90": 247.80799448490143, + "p95": 264.6079957485199, + "p99": 279.35999631881714 + }, + "isolatedSum": { + "p50": 254.71999496221542, + "p90": 286.0160022974014, + "p95": 325.9519934654236, + "p99": 349.15199875831604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cf5bc26b", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", + "colorKey": "h100_16047c28", + "comparisonKey": "64192d9d479bdd44", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:33.118563+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2279937619f3971", + "workloadId": "set:4:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271788376", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271788376", + "createdAt": "2026-06-26T23:53:36Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.68799829483032, + "p90": 101.1200025677681, + "p95": 104.41599786281586, + "p99": 111.10399663448334 + }, + "combine": { + "p50": 80.99199831485748, + "p90": 86.84799820184708, + "p95": 87.8399983048439, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 150.30400454998016, + "p90": 156.95999562740326, + "p95": 159.67999398708344, + "p99": 164.15999829769135 + }, + "isolatedSum": { + "p50": 175.6799966096878, + "p90": 187.96800076961517, + "p95": 192.25599616765976, + "p99": 201.05599611997604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.0080007314682, + "p90": 100.00000149011612, + "p95": 102.68799960613251, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 81.727996468544, + "p90": 88.51200342178345, + "p95": 89.37600255012512, + "p99": 90.59199690818787 + }, + "roundtrip": { + "p50": 150.65599977970123, + "p90": 159.58400070667267, + "p95": 161.50400042533875, + "p99": 167.42399334907532 + }, + "isolatedSum": { + "p50": 176.7359972000122, + "p90": 188.51200491189957, + "p95": 192.06400215625763, + "p99": 199.16799664497375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.63999956846237, + "p90": 112.28799819946289, + "p95": 114.14399743080139, + "p99": 119.84000355005264 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 97.69599884748459, + "p95": 98.39999675750732, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 164.000004529953, + "p90": 171.64799571037292, + "p95": 175.4560023546219, + "p99": 228.4799963235855 + }, + "isolatedSum": { + "p50": 196.8960016965866, + "p90": 209.98399704694748, + "p95": 212.54399418830872, + "p99": 224.32000190019608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 143.93599331378937, + "p90": 148.00000190734863, + "p95": 149.79200065135956, + "p99": 155.68000078201294 + }, + "combine": { + "p50": 132.06399977207184, + "p90": 138.75199854373932, + "p95": 139.29599523544312, + "p99": 145.6959992647171 + }, + "roundtrip": { + "p50": 241.2479966878891, + "p90": 247.6480007171631, + "p95": 249.15200471878052, + "p99": 252.76800990104675 + }, + "isolatedSum": { + "p50": 275.9999930858612, + "p90": 286.75200045108795, + "p95": 289.0879958868027, + "p99": 301.37600004673004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4d49fd79", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_16047c28", + "comparisonKey": "64192d9d479bdd44", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:13.030328+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271931349", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271931349", + "createdAt": "2026-06-26T23:58:18Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.93600034713745, + "p90": 101.56799852848053, + "p95": 103.13600301742554, + "p99": 107.744000852108 + }, + "combine": { + "p50": 80.89599758386612, + "p90": 87.07199990749359, + "p95": 87.8399983048439, + "p99": 89.40800279378891 + }, + "roundtrip": { + "p50": 151.42400562763214, + "p90": 160.12799739837646, + "p95": 172.86400496959686, + "p99": 232.12799429893494 + }, + "isolatedSum": { + "p50": 176.83199793100357, + "p90": 188.63999843597412, + "p95": 190.97600132226944, + "p99": 197.1520036458969 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.47999703884125, + "p90": 103.42399775981903, + "p95": 107.71200060844421, + "p99": 161.40800714492798 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 87.61599659919739, + "p95": 89.1840010881424, + "p99": 185.5359971523285 + }, + "roundtrip": { + "p50": 153.43999862670898, + "p90": 159.4880074262619, + "p95": 163.71199488639832, + "p99": 313.1200075149536 + }, + "isolatedSum": { + "p50": 177.59999632835388, + "p90": 191.03999435901642, + "p95": 196.8960016965866, + "p99": 346.94400429725647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.32800316810608, + "p90": 100.3199964761734, + "p95": 102.1760031580925, + "p99": 106.84800148010254 + }, + "combine": { + "p50": 80.32000064849854, + "p90": 84.22400057315826, + "p95": 88.41600269079208, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 150.94399452209473, + "p90": 158.4639996290207, + "p95": 159.90400314331055, + "p99": 163.32800686359406 + }, + "isolatedSum": { + "p50": 175.64800381660461, + "p90": 184.54399704933167, + "p95": 190.59200584888458, + "p99": 196.99200242757797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.87999975681305, + "p90": 98.43199700117111, + "p95": 100.3199964761734, + "p99": 105.3759977221489 + }, + "combine": { + "p50": 80.54400235414505, + "p90": 87.20000088214874, + "p95": 88.73599767684937, + "p99": 89.82399851083755 + }, + "roundtrip": { + "p50": 152.0960032939911, + "p90": 158.65600109100342, + "p95": 160.16000509262085, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 175.4240021109581, + "p90": 185.63199788331985, + "p95": 189.05599415302277, + "p99": 195.19999623298645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.93600034713745, + "p90": 103.20000350475311, + "p95": 106.20799660682678, + "p99": 168.57600212097168 + }, + "combine": { + "p50": 84.3840017914772, + "p90": 89.40800279378891, + "p95": 89.75999802350998, + "p99": 94.84799951314926 + }, + "roundtrip": { + "p50": 154.84799444675446, + "p90": 161.02400422096252, + "p95": 163.7440025806427, + "p99": 497.50399589538574 + }, + "isolatedSum": { + "p50": 180.32000213861465, + "p90": 192.60800629854202, + "p95": 195.96799463033676, + "p99": 263.42400163412094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.16800326108932, + "p90": 109.37599837779999, + "p95": 110.75200140476227, + "p99": 113.43999952077866 + }, + "combine": { + "p50": 88.79999816417694, + "p90": 95.74399888515472, + "p95": 97.120001912117, + "p99": 97.95200079679489 + }, + "roundtrip": { + "p50": 161.6639941930771, + "p90": 167.1999990940094, + "p95": 168.73599588871002, + "p99": 172.89599776268005 + }, + "isolatedSum": { + "p50": 191.96800142526627, + "p90": 205.1199972629547, + "p95": 207.87200331687927, + "p99": 211.39200031757355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 119.6800023317337, + "p90": 128.00000607967377, + "p95": 129.05600666999817, + "p99": 133.91999900341034 + }, + "combine": { + "p50": 103.16800326108932, + "p90": 106.55999928712845, + "p95": 107.90400207042694, + "p99": 113.63200098276138 + }, + "roundtrip": { + "p50": 186.71999871730804, + "p90": 194.65599954128265, + "p95": 196.31999731063843, + "p99": 199.48799908161163 + }, + "isolatedSum": { + "p50": 222.84800559282303, + "p90": 234.56000536680222, + "p95": 236.9600087404251, + "p99": 247.55199998617172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.66400516033173, + "p90": 146.7200070619583, + "p95": 147.8080004453659, + "p99": 151.10400319099426 + }, + "combine": { + "p50": 131.1360001564026, + "p90": 137.82399892807007, + "p95": 138.46400380134583, + "p99": 145.28000354766846 + }, + "roundtrip": { + "p50": 241.40800535678864, + "p90": 248.60799312591553, + "p95": 250.59199333190918, + "p99": 258.5600018501282 + }, + "isolatedSum": { + "p50": 268.8000053167343, + "p90": 284.5440059900284, + "p95": 286.27200424671173, + "p99": 296.3840067386627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-38b8b0c2", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_0c515f8b", + "comparisonKey": "47e8e48c891afabb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:43.774495+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": "set:4:2eebbed158fe1320", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271795429", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271795429", + "createdAt": "2026-06-26T23:53:50Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.03200107812881, + "p90": 102.49599814414978, + "p95": 105.66399991512299, + "p99": 117.88800358772278 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 73.98399710655212, + "p95": 77.18399912118912, + "p99": 81.56800270080566 + }, + "roundtrip": { + "p50": 142.04800128936768, + "p90": 149.98400211334229, + "p95": 151.45599842071533, + "p99": 159.07199680805206 + }, + "isolatedSum": { + "p50": 167.4880012869835, + "p90": 176.4799952507019, + "p95": 182.8479990363121, + "p99": 199.45600628852844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.9760011434555, + "p90": 106.62399977445602, + "p95": 110.07999628782272, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 73.69600236415863, + "p95": 78.52800190448761, + "p99": 80.22399991750717 + }, + "roundtrip": { + "p50": 143.26399564743042, + "p90": 150.14399588108063, + "p95": 153.1520038843155, + "p99": 162.88000345230103 + }, + "isolatedSum": { + "p50": 170.30400037765503, + "p90": 180.32000213861465, + "p95": 188.60799819231033, + "p99": 203.23199778795242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.09600222110748, + "p90": 107.61599987745285, + "p95": 112.31999844312668, + "p99": 163.16799819469452 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 87.16800063848495, + "p95": 87.74399757385254, + "p99": 95.8079993724823 + }, + "roundtrip": { + "p50": 154.01600301265717, + "p90": 161.47199273109436, + "p95": 164.5440012216568, + "p99": 176.83200538158417 + }, + "isolatedSum": { + "p50": 179.80800569057465, + "p90": 194.7840005159378, + "p95": 200.06399601697922, + "p99": 258.9759975671768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.38399761915207, + "p90": 108.44799876213074, + "p95": 111.455999314785, + "p99": 119.74400281906128 + }, + "combine": { + "p50": 83.26400071382523, + "p90": 88.03199976682663, + "p95": 88.22400122880936, + "p99": 92.83199906349182 + }, + "roundtrip": { + "p50": 154.9759954214096, + "p90": 161.18399798870087, + "p95": 165.0879979133606, + "p99": 170.01600563526154 + }, + "isolatedSum": { + "p50": 187.6479983329773, + "p90": 196.47999852895737, + "p95": 199.68000054359436, + "p99": 212.5760018825531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-94696c7b", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_c0c0ad86", + "comparisonKey": "00faf19eae8c1230", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:00.906485+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271935069", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271935069", + "createdAt": "2026-06-26T23:58:25Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.63199836015701, + "p90": 106.36799782514572, + "p95": 109.63200032711029, + "p99": 118.65600198507309 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 78.94399762153625, + "p95": 79.42400127649307, + "p99": 82.24000036716461 + }, + "roundtrip": { + "p50": 145.4080045223236, + "p90": 154.23999726772308, + "p95": 155.64799308776855, + "p99": 157.98400342464447 + }, + "isolatedSum": { + "p50": 169.0879985690117, + "p90": 185.31199544668198, + "p95": 189.05600160360336, + "p99": 200.8960023522377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.08799839019775, + "p90": 104.06400263309479, + "p95": 105.66399991512299, + "p99": 111.1999973654747 + }, + "combine": { + "p50": 65.05600363016129, + "p90": 74.5600014925003, + "p95": 79.00799810886383, + "p99": 82.33600109815598 + }, + "roundtrip": { + "p50": 122.8799968957901, + "p90": 151.64799988269806, + "p95": 153.24799716472626, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 142.14400202035904, + "p90": 178.6240041255951, + "p95": 184.67199802398682, + "p99": 193.53599846363068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.56799787282944, + "p90": 107.80800133943558, + "p95": 114.04799669981003, + "p99": 120.44800072908401 + }, + "combine": { + "p50": 65.69600105285645, + "p90": 78.87999713420868, + "p95": 79.32800054550171, + "p99": 87.13600039482117 + }, + "roundtrip": { + "p50": 123.99999797344208, + "p90": 158.720001578331, + "p95": 165.3439998626709, + "p99": 176.28799378871918 + }, + "isolatedSum": { + "p50": 163.26399892568588, + "p90": 186.68799847364426, + "p95": 193.37599724531174, + "p99": 207.58400112390518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.11200201511383, + "p90": 105.76000064611435, + "p95": 108.64000022411346, + "p99": 122.30399996042252 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 79.1039988398552, + "p95": 80.38400113582611, + "p99": 87.0399996638298 + }, + "roundtrip": { + "p50": 145.28000354766846, + "p90": 152.54400670528412, + "p95": 155.39200603961945, + "p99": 160.38399934768677 + }, + "isolatedSum": { + "p50": 170.33600062131882, + "p90": 184.86399948596954, + "p95": 189.02400135993958, + "p99": 209.34399962425232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.52799773216248, + "p90": 109.69600081443787, + "p95": 117.34399944543839, + "p99": 131.45600259304047 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 81.53600245714188, + "p95": 86.91199868917465, + "p99": 88.32000195980072 + }, + "roundtrip": { + "p50": 146.97599411010742, + "p90": 156.47999942302704, + "p95": 161.56800091266632, + "p99": 173.18400740623474 + }, + "isolatedSum": { + "p50": 177.12000012397766, + "p90": 191.23200327157974, + "p95": 204.25599813461304, + "p99": 219.7760045528412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.54400300979614, + "p90": 107.07200318574905, + "p95": 113.40799927711487, + "p99": 126.08000636100769 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 80.57600259780884, + "p95": 87.13600039482117, + "p99": 95.51999717950821 + }, + "roundtrip": { + "p50": 127.93600559234619, + "p90": 151.7760008573532, + "p95": 154.40000593662262, + "p99": 161.56800091266632 + }, + "isolatedSum": { + "p50": 155.64800053834915, + "p90": 187.6480057835579, + "p95": 200.54399967193604, + "p99": 221.6000035405159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.35199671983719, + "p90": 113.37599903345108, + "p95": 126.49600207805634, + "p99": 162.1759980916977 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 87.16800063848495, + "p95": 87.71199733018875, + "p99": 95.45599669218063 + }, + "roundtrip": { + "p50": 154.62400019168854, + "p90": 165.18400609493256, + "p95": 170.27199268341064, + "p99": 184.7359985113144 + }, + "isolatedSum": { + "p50": 179.9359992146492, + "p90": 200.54399967193604, + "p95": 214.2079994082451, + "p99": 257.6319947838783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.1760025024414, + "p90": 120.80000340938568, + "p95": 125.56800246238708, + "p99": 134.49600338935852 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 96.0640013217926, + "p95": 97.69599884748459, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 160.70400178432465, + "p90": 178.3680021762848, + "p95": 184.1920018196106, + "p99": 190.62399864196777 + }, + "isolatedSum": { + "p50": 185.95200031995773, + "p90": 216.86400473117828, + "p95": 223.26400130987167, + "p99": 241.85600131750107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b4d89049", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_1c83c0b0", + "comparisonKey": "b84a29c0643a5455", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:11:39.736162+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": "set:3:8fd05d9ebee41064", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272315381", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272315381", + "createdAt": "2026-06-27T00:10:35Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.1760025024414, + "p90": 105.12000322341919, + "p95": 107.4879989027977, + "p99": 114.43199962377548 + }, + "combine": { + "p50": 81.216000020504, + "p90": 87.8399983048439, + "p95": 88.19200098514557, + "p99": 89.08800035715103 + }, + "roundtrip": { + "p50": 154.4959992170334, + "p90": 160.99199652671814, + "p95": 162.59199380874634, + "p99": 167.35999286174774 + }, + "isolatedSum": { + "p50": 179.3920025229454, + "p90": 192.9600015282631, + "p95": 195.67999988794327, + "p99": 203.5199999809265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.3199971318245, + "p90": 109.98400300741196, + "p95": 111.77600175142288, + "p99": 118.81600320339203 + }, + "combine": { + "p50": 89.1840010881424, + "p90": 95.58399766683578, + "p95": 96.09600156545639, + "p99": 97.18400239944458 + }, + "roundtrip": { + "p50": 164.2560064792633, + "p90": 169.69600319862366, + "p95": 171.64799571037292, + "p99": 176.64000391960144 + }, + "isolatedSum": { + "p50": 193.5039982199669, + "p90": 205.56800067424774, + "p95": 207.87200331687927, + "p99": 216.0000056028366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.28000223636627, + "p90": 146.11199498176575, + "p95": 149.6639996767044, + "p99": 152.19199657440186 + }, + "combine": { + "p50": 128.48000228405, + "p90": 130.14400005340576, + "p95": 130.65600395202637, + "p99": 136.57599687576294 + }, + "roundtrip": { + "p50": 231.10400140285492, + "p90": 236.4799976348877, + "p95": 238.11200261116028, + "p99": 242.88000166416168 + }, + "isolatedSum": { + "p50": 265.76000452041626, + "p90": 276.2559950351715, + "p95": 280.3200036287308, + "p99": 288.7679934501648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c41b3617", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_52b1e978", + "comparisonKey": "b84a29c0643a5455", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:11:37.049465+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s1", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272318481", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272318481", + "createdAt": "2026-06-27T00:10:41Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.28800368309021, + "p90": 105.0880029797554, + "p95": 107.32799768447876, + "p99": 112.67200112342834 + }, + "combine": { + "p50": 80.22399991750717, + "p90": 87.10400015115738, + "p95": 87.42400258779526, + "p99": 89.40800279378891 + }, + "roundtrip": { + "p50": 155.5200070142746, + "p90": 160.70400178432465, + "p95": 162.04799711704254, + "p99": 166.30400717258453 + }, + "isolatedSum": { + "p50": 180.51200360059738, + "p90": 192.19200313091278, + "p95": 194.75200027227402, + "p99": 202.08000391721725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 105.24799674749374, + "p90": 198.68800044059753, + "p95": 200.80000162124634, + "p99": 207.68000185489655 + }, + "combine": { + "p50": 89.56799656152725, + "p90": 95.58399766683578, + "p95": 126.39999389648438, + "p99": 150.91200172901154 + }, + "roundtrip": { + "p50": 164.44799304008484, + "p90": 169.53599452972412, + "p95": 170.8800047636032, + "p99": 176.92799866199493 + }, + "isolatedSum": { + "p50": 194.815993309021, + "p90": 294.2719981074333, + "p95": 327.1999955177307, + "p99": 358.5920035839081 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.82399892807007, + "p90": 144.99199390411377, + "p95": 146.55999839305878, + "p99": 148.41599762439728 + }, + "combine": { + "p50": 128.00000607967377, + "p90": 131.9359987974167, + "p95": 133.31200182437897, + "p99": 136.51199638843536 + }, + "roundtrip": { + "p50": 235.07200181484222, + "p90": 240.86399376392365, + "p95": 242.71999299526215, + "p99": 245.27999758720398 + }, + "isolatedSum": { + "p50": 265.82400500774384, + "p90": 276.92799270153046, + "p95": 279.87200021743774, + "p99": 284.92799401283264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-595b6f36", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_55b1ee31", + "comparisonKey": "b84a29c0643a5455", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:11:41.163804+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272321917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272321917", + "createdAt": "2026-06-27T00:10:49Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.24000298976898, + "p90": 103.96800190210342, + "p95": 106.30399733781815, + "p99": 111.07199639081955 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 86.87999844551086, + "p95": 87.52000331878662, + "p99": 88.0960002541542 + }, + "roundtrip": { + "p50": 153.28000485897064, + "p90": 161.3759994506836, + "p95": 163.4880006313324, + "p99": 455.80801367759705 + }, + "isolatedSum": { + "p50": 177.76000499725342, + "p90": 190.8480003476143, + "p95": 193.82400065660477, + "p99": 199.16799664497375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.84000092744827, + "p90": 109.0560033917427, + "p95": 110.1439967751503, + "p99": 113.88800293207169 + }, + "combine": { + "p50": 87.87199854850769, + "p90": 95.32800316810608, + "p95": 95.90400010347366, + "p99": 96.25600278377533 + }, + "roundtrip": { + "p50": 161.98399662971497, + "p90": 168.99199783802032, + "p95": 170.56000232696533, + "p99": 175.80799758434296 + }, + "isolatedSum": { + "p50": 191.71199947595596, + "p90": 204.38400655984879, + "p95": 206.04799687862396, + "p99": 210.14400571584702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 136.80000603199005, + "p90": 145.4399973154068, + "p95": 146.68799936771393, + "p99": 149.4079977273941 + }, + "combine": { + "p50": 123.99999797344208, + "p90": 129.05600666999817, + "p95": 130.36799430847168, + "p99": 136.00000739097595 + }, + "roundtrip": { + "p50": 228.7999987602234, + "p90": 236.12800240516663, + "p95": 237.98400163650513, + "p99": 241.5039986371994 + }, + "isolatedSum": { + "p50": 260.80000400543213, + "p90": 274.49600398540497, + "p95": 277.0559936761856, + "p99": 285.40800511837006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5ba95c3", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_54b1ec9e", + "comparisonKey": "b84a29c0643a5455", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:09.752348+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s3", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272325031", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272325031", + "createdAt": "2026-06-27T00:10:55Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.98400104045868, + "p90": 104.44799810647964, + "p95": 107.84000158309937, + "p99": 116.06399714946747 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 87.64799684286118, + "p95": 88.06400001049042, + "p99": 96.00000083446503 + }, + "roundtrip": { + "p50": 156.41599893569946, + "p90": 162.62400150299072, + "p95": 165.75999557971954, + "p99": 176.7359972000122 + }, + "isolatedSum": { + "p50": 179.00799959897995, + "p90": 192.09599494934082, + "p95": 195.90400159358978, + "p99": 212.0639979839325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.20000350475311, + "p90": 107.39199817180634, + "p95": 111.51999980211258, + "p99": 119.00799721479416 + }, + "combine": { + "p50": 88.16000074148178, + "p90": 95.8079993724823, + "p95": 96.16000205278397, + "p99": 98.11200201511383 + }, + "roundtrip": { + "p50": 162.78399527072906, + "p90": 168.73599588871002, + "p95": 170.9440052509308, + "p99": 176.57600343227386 + }, + "isolatedSum": { + "p50": 191.3600042462349, + "p90": 203.19999754428864, + "p95": 207.68000185489655, + "p99": 217.119999229908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.28000223636627, + "p90": 149.1200029850006, + "p95": 151.0079950094223, + "p99": 153.18399667739868 + }, + "combine": { + "p50": 128.86400520801544, + "p90": 131.1360001564026, + "p95": 135.71199774742126, + "p99": 138.3039951324463 + }, + "roundtrip": { + "p50": 234.49599742889404, + "p90": 241.4720058441162, + "p95": 242.65600740909576, + "p99": 247.9040026664734 + }, + "isolatedSum": { + "p50": 266.1440074443817, + "p90": 280.2560031414032, + "p95": 286.71999275684357, + "p99": 291.48799180984497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fb3ea9d7", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", + "colorKey": "h100_b654f9b2", + "comparisonKey": "10b5062b8e23fcad", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:39.087780+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2ad5ef98d328fa1", + "workloadId": "set:4:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271817166", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271817166", + "createdAt": "2026-06-26T23:54:31Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.82399982213974, + "p90": 105.95200210809708, + "p95": 109.95200276374817, + "p99": 121.50400131940842 + }, + "combine": { + "p50": 80.25600016117096, + "p90": 81.88799768686295, + "p95": 83.3280012011528, + "p99": 89.37600255012512 + }, + "roundtrip": { + "p50": 152.12799608707428, + "p90": 158.78400206565857, + "p95": 160.64000129699707, + "p99": 166.81599617004395 + }, + "isolatedSum": { + "p50": 178.0799999833107, + "p90": 187.83999979496002, + "p95": 193.28000396490097, + "p99": 210.88000386953354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.08800166845322, + "p90": 103.39199751615524, + "p95": 107.51999914646149, + "p99": 115.93600362539291 + }, + "combine": { + "p50": 80.89599758386612, + "p90": 84.03199911117554, + "p95": 87.42400258779526, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 153.60000729560852, + "p90": 161.15200519561768, + "p95": 163.83999586105347, + "p99": 171.55200242996216 + }, + "isolatedSum": { + "p50": 177.98399925231934, + "p90": 187.42399662733078, + "p95": 194.94400173425674, + "p99": 205.4080069065094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.39199751615524, + "p90": 108.47999900579453, + "p95": 110.55999994277954, + "p99": 117.18399822711945 + }, + "combine": { + "p50": 89.34400230646133, + "p90": 95.551997423172, + "p95": 97.34400361776352, + "p99": 99.93600100278854 + }, + "roundtrip": { + "p50": 162.75200247764587, + "p90": 170.43200135231018, + "p95": 172.83199727535248, + "p99": 179.61600422859192 + }, + "isolatedSum": { + "p50": 192.73599982261658, + "p90": 204.03199642896652, + "p95": 207.90400356054306, + "p99": 217.119999229908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.85600662231445, + "p90": 144.41600441932678, + "p95": 147.0080018043518, + "p99": 151.16800367832184 + }, + "combine": { + "p50": 128.83199751377106, + "p90": 131.23199343681335, + "p95": 131.99999928474426, + "p99": 137.95199990272522 + }, + "roundtrip": { + "p50": 233.75999927520752, + "p90": 239.3919974565506, + "p95": 240.92799425125122, + "p99": 245.1840043067932 + }, + "isolatedSum": { + "p50": 266.6880041360855, + "p90": 275.64799785614014, + "p95": 279.00800108909607, + "p99": 289.12000358104706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e0ce741a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_b654f9b2", + "comparisonKey": "10b5062b8e23fcad", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:31.374180+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": "set:8:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272004392", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272004392", + "createdAt": "2026-06-27T00:00:35Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.67199850082397, + "p90": 104.60799932479858, + "p95": 106.11200332641602, + "p99": 113.56800049543381 + }, + "combine": { + "p50": 79.00799810886383, + "p90": 82.0159986615181, + "p95": 82.36800134181976, + "p99": 87.67999708652496 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 154.59200739860535, + "p95": 157.3439985513687, + "p99": 161.5999937057495 + }, + "isolatedSum": { + "p50": 175.6799966096878, + "p90": 186.62399798631668, + "p95": 188.48000466823578, + "p99": 201.24799758195877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 99.67999905347824, + "p90": 105.0880029797554, + "p95": 107.16799646615982, + "p99": 112.99200356006622 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 82.49600231647491, + "p95": 83.03999900817871, + "p99": 87.2960016131401 + }, + "roundtrip": { + "p50": 147.0080018043518, + "p90": 153.6639928817749, + "p95": 155.71199357509613, + "p99": 159.10400450229645 + }, + "isolatedSum": { + "p50": 180.79999834299088, + "p90": 187.58400529623032, + "p95": 190.20799547433853, + "p99": 200.28800517320633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.18400239944458, + "p90": 103.93600165843964, + "p95": 106.30399733781815, + "p99": 122.04799801111221 + }, + "combine": { + "p50": 78.94399762153625, + "p90": 82.43200182914734, + "p95": 86.40000224113464, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 148.15999567508698, + "p90": 158.55999290943146, + "p95": 160.3199988603592, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 176.12800002098083, + "p90": 186.36800348758698, + "p95": 192.7039995789528, + "p99": 225.50399601459503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.91200065612793, + "p90": 104.35199737548828, + "p95": 106.65600001811981, + "p99": 112.47999966144562 + }, + "combine": { + "p50": 81.24800026416779, + "p90": 83.3280012011528, + "p95": 87.0399996638298, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 153.4080058336258, + "p90": 159.61599349975586, + "p95": 161.47199273109436, + "p99": 165.21599888801575 + }, + "isolatedSum": { + "p50": 180.16000092029572, + "p90": 187.67999857664108, + "p95": 193.69599968194962, + "p99": 200.41599869728088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.92800045013428, + "p90": 104.3199971318245, + "p95": 110.55999994277954, + "p99": 161.9199961423874 + }, + "combine": { + "p50": 81.4720019698143, + "p90": 87.2960016131401, + "p95": 87.8399983048439, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 153.43999862670898, + "p90": 160.19199788570404, + "p95": 162.78399527072906, + "p99": 169.98399794101715 + }, + "isolatedSum": { + "p50": 178.40000241994858, + "p90": 191.6159987449646, + "p95": 198.39999824762344, + "p99": 252.19199806451797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.64799946546555, + "p90": 108.31999778747559, + "p95": 110.62400043010712, + "p99": 114.84800279140472 + }, + "combine": { + "p50": 87.5839963555336, + "p90": 91.839998960495, + "p95": 95.39200365543365, + "p99": 96.38399630784988 + }, + "roundtrip": { + "p50": 155.96799552440643, + "p90": 165.50399363040924, + "p95": 168.41599345207214, + "p99": 175.64800381660461 + }, + "isolatedSum": { + "p50": 191.23199582099915, + "p90": 200.15999674797058, + "p95": 206.01600408554077, + "p99": 211.2319990992546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 114.88000303506851, + "p90": 126.11199915409088, + "p95": 127.6479959487915, + "p99": 133.56800377368927 + }, + "combine": { + "p50": 98.43199700117111, + "p90": 103.96800190210342, + "p95": 105.8880016207695, + "p99": 119.71200257539749 + }, + "roundtrip": { + "p50": 180.38399517536163, + "p90": 191.39200448989868, + "p95": 194.39999759197235, + "p99": 201.9840031862259 + }, + "isolatedSum": { + "p50": 213.31200003623962, + "p90": 230.0800010561943, + "p95": 233.535997569561, + "p99": 253.28000634908676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.79199934005737, + "p90": 147.2959965467453, + "p95": 149.82399344444275, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 122.36800044775009, + "p90": 128.4160017967224, + "p95": 129.02399897575378, + "p99": 136.1600011587143 + }, + "roundtrip": { + "p50": 231.77599906921387, + "p90": 241.85599386692047, + "p95": 244.9280023574829, + "p99": 248.76800179481506 + }, + "isolatedSum": { + "p50": 264.15999978780746, + "p90": 275.7119983434677, + "p95": 278.84799242019653, + "p99": 289.5359992980957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-73951147", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_456a963c", + "comparisonKey": "12dbc31e8daf0a44", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:37.187210+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": "set:8:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272008867", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272008867", + "createdAt": "2026-06-27T00:00:42Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.41599941253662, + "p90": 76.1599987745285, + "p95": 77.69600301980972, + "p99": 84.83199775218964 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 73.11999797821045, + "p95": 73.7600028514862, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 126.46399438381195, + "p90": 130.62399625778198, + "p95": 131.55199587345123, + "p99": 136.4479959011078 + }, + "isolatedSum": { + "p50": 139.48799669742584, + "p90": 149.27999675273895, + "p95": 151.45600587129593, + "p99": 164.5760014653206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.70400160551071, + "p90": 76.9599974155426, + "p95": 81.727996468544, + "p99": 107.10400342941284 + }, + "combine": { + "p50": 71.48800045251846, + "p90": 73.15199822187424, + "p95": 73.56800138950348, + "p99": 79.55200225114822 + }, + "roundtrip": { + "p50": 127.77599692344666, + "p90": 131.23199343681335, + "p95": 132.60799646377563, + "p99": 138.7840062379837 + }, + "isolatedSum": { + "p50": 140.19200205802917, + "p90": 150.11199563741684, + "p95": 155.29599785804749, + "p99": 186.65600568056107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.8480030298233, + "p90": 77.79199630022049, + "p95": 80.09599894285202, + "p99": 87.0399996638298 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 73.56800138950348, + "p95": 74.27199929952621, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 126.94400548934937, + "p90": 131.77600502967834, + "p95": 133.4719955921173, + "p99": 137.2479945421219 + }, + "isolatedSum": { + "p50": 143.2960033416748, + "p90": 151.35999768972397, + "p95": 154.36799824237823, + "p99": 166.84799641370773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.11199742555618, + "p90": 76.9599974155426, + "p95": 79.3600007891655, + "p99": 86.14400029182434 + }, + "combine": { + "p50": 72.64000177383423, + "p90": 73.82400333881378, + "p95": 74.94399696588516, + "p99": 81.08799904584885 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 131.6480040550232, + "p95": 133.66399705410004, + "p99": 139.29599523544312 + }, + "isolatedSum": { + "p50": 142.7519991993904, + "p90": 150.78400075435638, + "p95": 154.30399775505066, + "p99": 167.2319993376732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.40800148248672, + "p90": 83.99999886751175, + "p95": 86.33600175380707, + "p99": 91.36000275611877 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 78.75200361013412, + "p95": 79.6160027384758, + "p99": 81.34400099515915 + }, + "roundtrip": { + "p50": 125.95200538635254, + "p90": 133.15199315547943, + "p95": 134.5919966697693, + "p99": 140.32000303268433 + }, + "isolatedSum": { + "p50": 154.78400141000748, + "p90": 162.75200247764587, + "p95": 165.95200449228287, + "p99": 172.70400375127792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.08000046014786, + "p90": 92.54399687051773, + "p95": 94.4959968328476, + "p99": 98.52799773216248 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 81.56800270080566, + "p95": 82.07999914884567, + "p99": 87.2960016131401 + }, + "roundtrip": { + "p50": 141.08799397945404, + "p90": 144.96000111103058, + "p95": 146.30399644374847, + "p99": 150.33599734306335 + }, + "isolatedSum": { + "p50": 170.17599940299988, + "p90": 174.1119995713234, + "p95": 176.57599598169327, + "p99": 185.82399934530258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.23200243711472, + "p90": 113.24799805879593, + "p95": 114.59200084209442, + "p99": 119.10399794578552 + }, + "combine": { + "p50": 89.85599875450134, + "p90": 98.2080027461052, + "p95": 114.3679991364479, + "p99": 130.49599528312683 + }, + "roundtrip": { + "p50": 159.39199924468994, + "p90": 165.53600132465363, + "p95": 167.87199676036835, + "p99": 179.51999604701996 + }, + "isolatedSum": { + "p50": 185.08800119161606, + "p90": 211.45600080490112, + "p95": 228.95999997854233, + "p99": 249.59999322891235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.29599809646606, + "p90": 117.21599847078323, + "p95": 118.43200027942657, + "p99": 122.72000312805176 + }, + "combine": { + "p50": 106.39999806880951, + "p90": 112.28799819946289, + "p95": 113.11999708414078, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 197.63199985027313, + "p90": 202.11200416088104, + "p95": 203.39199900627136, + "p99": 206.9759964942932 + }, + "isolatedSum": { + "p50": 217.69599616527557, + "p90": 229.50399667024612, + "p95": 231.55199736356735, + "p99": 237.05600202083588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fc133662", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h100_d54acd03", + "comparisonKey": "fb346b1019e55bb0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:31.132134+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform·empty-rank", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272375977", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272375977", + "createdAt": "2026-06-27T00:12:38Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 98.01600128412247, + "p90": 108.03200304508209, + "p95": 124.22399967908859, + "p99": 164.000004529953 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 89.63199704885483, + "p95": 104.63999956846237, + "p99": 112.5440001487732 + }, + "roundtrip": { + "p50": 154.1759967803955, + "p90": 160.35200655460358, + "p95": 162.08000481128693, + "p99": 175.3920018672943 + }, + "isolatedSum": { + "p50": 178.75199764966965, + "p90": 197.66400009393692, + "p95": 228.86399924755096, + "p99": 276.5440046787262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 104.76800054311752, + "p90": 134.0479999780655, + "p95": 136.1279934644699, + "p99": 144.41600441932678 + }, + "combine": { + "p50": 89.02399986982346, + "p90": 104.12800312042236, + "p95": 104.41599786281586, + "p99": 107.90400207042694 + }, + "roundtrip": { + "p50": 166.59200191497803, + "p90": 189.95200097560883, + "p95": 191.96799397468567, + "p99": 199.5840072631836 + }, + "isolatedSum": { + "p50": 193.79200041294098, + "p90": 238.17600309848785, + "p95": 240.54399132728577, + "p99": 252.32000648975372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 130.52800297737122, + "p90": 139.90400731563568, + "p95": 151.61600708961487, + "p99": 458.5599899291992 + }, + "combine": { + "p50": 120.7680031657219, + "p90": 127.93600559234619, + "p95": 128.54400277137756, + "p99": 129.50399518013 + }, + "roundtrip": { + "p50": 216.35200083255768, + "p90": 221.98399901390076, + "p95": 224.7679978609085, + "p99": 229.5359969139099 + }, + "isolatedSum": { + "p50": 251.2960061430931, + "p90": 267.8400129079819, + "p95": 280.16000986099243, + "p99": 588.0639851093292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7e5caec", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_f70758a0", + "comparisonKey": "fb346b1019e55bb0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:24.801629+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform·linear", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272372388", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272372388", + "createdAt": "2026-06-27T00:12:31Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.24000298976898, + "p90": 103.64799946546555, + "p95": 106.4319983124733, + "p99": 112.5119999051094 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 87.55200356245041, + "p95": 88.03199976682663, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 154.33600544929504, + "p90": 159.45599973201752, + "p95": 161.6639941930771, + "p99": 166.75199568271637 + }, + "isolatedSum": { + "p50": 178.97599935531616, + "p90": 191.20000302791595, + "p95": 194.46399807929993, + "p99": 202.59200036525726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.09600222110748, + "p90": 105.27999699115753, + "p95": 106.91200196743011, + "p99": 113.37599903345108 + }, + "combine": { + "p50": 89.53599631786346, + "p90": 96.16000205278397, + "p95": 96.73599898815155, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 163.39200735092163, + "p90": 168.99199783802032, + "p95": 170.43200135231018, + "p99": 174.81599748134613 + }, + "isolatedSum": { + "p50": 189.63199853897095, + "p90": 201.4399990439415, + "p95": 203.64800095558167, + "p99": 211.8079960346222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.75199723243713, + "p90": 136.99199259281158, + "p95": 138.7840062379837, + "p99": 143.42400431632996 + }, + "combine": { + "p50": 128.1599998474121, + "p90": 130.40000200271606, + "p95": 135.8720064163208, + "p99": 278.6880135536194 + }, + "roundtrip": { + "p50": 225.75999796390533, + "p90": 231.74400627613068, + "p95": 232.80000686645508, + "p99": 235.6480062007904 + }, + "isolatedSum": { + "p50": 258.91199707984924, + "p90": 267.39199459552765, + "p95": 274.6560126543045, + "p99": 422.11201786994934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5fad8218", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_fb5b86de", + "comparisonKey": "bba2bec66db838b4", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:15.450287+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271923814", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271923814", + "createdAt": "2026-06-26T23:58:04Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.99200093746185, + "p90": 104.89600151777267, + "p95": 107.04000294208527, + "p99": 111.68000102043152 + }, + "combine": { + "p50": 75.29599964618683, + "p90": 81.28000050783157, + "p95": 81.69600367546082, + "p99": 83.20000022649765 + }, + "roundtrip": { + "p50": 146.27200365066528, + "p90": 154.11199629306793, + "p95": 156.031996011734, + "p99": 158.6879938840866 + }, + "isolatedSum": { + "p50": 172.28800058364868, + "p90": 186.17600202560425, + "p95": 188.73600661754608, + "p99": 194.88000124692917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.09599763154984, + "p90": 103.87200117111206, + "p95": 106.4319983124733, + "p99": 113.76000195741653 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 81.18399977684021, + "p95": 81.82399719953537, + "p99": 84.28800106048584 + }, + "roundtrip": { + "p50": 127.48800218105316, + "p90": 153.76000106334686, + "p95": 156.3200056552887, + "p99": 158.720001578331 + }, + "isolatedSum": { + "p50": 144.76799964904785, + "p90": 185.05600094795227, + "p95": 188.25599551200867, + "p99": 198.04800301790237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.26399874687195, + "p90": 100.89600086212158, + "p95": 104.19200360774994, + "p99": 112.96000331640244 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 80.4160013794899, + "p95": 80.6720033288002, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 130.0159990787506, + "p90": 154.78399395942688, + "p95": 158.81599485874176, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 143.96800100803375, + "p90": 181.31200224161148, + "p95": 184.86400693655014, + "p99": 200.76800137758255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.79999947547913, + "p90": 103.16800326108932, + "p95": 105.79200088977814, + "p99": 110.46399921178818 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 81.98399841785431, + "p95": 82.36800134181976, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 150.2400040626526, + "p90": 156.47999942302704, + "p95": 158.91200304031372, + "p99": 168.2240068912506 + }, + "isolatedSum": { + "p50": 177.5359958410263, + "p90": 185.15200167894363, + "p95": 188.1600022315979, + "p99": 200.22399723529816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.83199971914291, + "p90": 102.24000364542007, + "p95": 104.44799810647964, + "p99": 107.77600109577179 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 87.80799806118011, + "p95": 88.70399743318558, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 152.73599326610565, + "p90": 160.73599457740784, + "p95": 162.75200247764587, + "p99": 167.55199432373047 + }, + "isolatedSum": { + "p50": 177.88799852132797, + "p90": 190.0480017066002, + "p95": 193.15199553966522, + "p99": 197.53599911928177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.32800251245499, + "p90": 101.9200012087822, + "p95": 104.19200360774994, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 81.216000020504, + "p90": 90.01599997282028, + "p95": 90.40000289678574, + "p99": 97.88800030946732 + }, + "roundtrip": { + "p50": 142.2400027513504, + "p90": 161.8880033493042, + "p95": 163.96799683570862, + "p99": 168.67199540138245 + }, + "isolatedSum": { + "p50": 172.54400253295898, + "p90": 191.93600118160248, + "p95": 194.59200650453568, + "p99": 206.4640000462532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.25599730014801, + "p90": 114.9120032787323, + "p95": 117.08799749612808, + "p99": 121.72800302505493 + }, + "combine": { + "p50": 96.0640013217926, + "p90": 97.85600006580353, + "p95": 102.11200267076492, + "p99": 108.96000266075134 + }, + "roundtrip": { + "p50": 166.46400094032288, + "p90": 181.63199722766876, + "p95": 186.0159933567047, + "p99": 189.91999328136444 + }, + "isolatedSum": { + "p50": 204.3199986219406, + "p90": 212.76800334453583, + "p95": 219.200000166893, + "p99": 230.68800568580627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.12799698114395, + "p90": 131.26400113105774, + "p95": 135.6479972600937, + "p99": 141.05600118637085 + }, + "combine": { + "p50": 106.36799782514572, + "p90": 117.37599968910217, + "p95": 120.80000340938568, + "p99": 121.8239963054657 + }, + "roundtrip": { + "p50": 195.68000733852386, + "p90": 214.59199488162994, + "p95": 216.60800278186798, + "p99": 221.91999852657318 + }, + "isolatedSum": { + "p50": 218.49599480628967, + "p90": 248.6400008201599, + "p95": 256.44800066947937, + "p99": 262.87999749183655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7f743bfe", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_aa268d13", + "comparisonKey": "791af0af2f802328", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:41.322977+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271945409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409", + "createdAt": "2026-06-26T23:58:46Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.37600320577621, + "p90": 101.59999877214432, + "p95": 103.16800326108932, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 78.17599922418594, + "p95": 79.99999821186066, + "p99": 82.59200304746628 + }, + "roundtrip": { + "p50": 142.59199798107147, + "p90": 150.62400698661804, + "p95": 152.54400670528412, + "p99": 159.5200002193451 + }, + "isolatedSum": { + "p50": 167.07200556993484, + "p90": 179.77599799633026, + "p95": 183.16800147294998, + "p99": 190.75199961662292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.0640013217926, + "p90": 100.89600086212158, + "p95": 101.82400047779083, + "p99": 107.07200318574905 + }, + "combine": { + "p50": 74.43200051784515, + "p90": 80.48000186681747, + "p95": 81.216000020504, + "p99": 82.11199939250946 + }, + "roundtrip": { + "p50": 143.39199662208557, + "p90": 147.87200093269348, + "p95": 153.31199765205383, + "p99": 168.60799491405487 + }, + "isolatedSum": { + "p50": 170.49600183963776, + "p90": 181.37600272893906, + "p95": 183.04000049829483, + "p99": 189.18400257825851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.87199985980988, + "p90": 100.73599964380264, + "p95": 102.81600058078766, + "p99": 109.95200276374817 + }, + "combine": { + "p50": 74.30399954319, + "p90": 80.89599758386612, + "p95": 81.4720019698143, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 142.752006649971, + "p90": 153.02400290966034, + "p95": 154.9759954214096, + "p99": 160.0639969110489 + }, + "isolatedSum": { + "p50": 170.17599940299988, + "p90": 181.63199722766876, + "p95": 184.28800255060196, + "p99": 194.14400309324265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.88800030946732, + "p90": 101.82400047779083, + "p95": 103.96800190210342, + "p99": 111.42399907112122 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 81.4720019698143, + "p95": 82.04799890518188, + "p99": 84.03199911117554 + }, + "roundtrip": { + "p50": 146.7519998550415, + "p90": 153.47200632095337, + "p95": 154.9759954214096, + "p99": 167.9680049419403 + }, + "isolatedSum": { + "p50": 173.50400239229202, + "p90": 183.29600244760513, + "p95": 186.0160008072853, + "p99": 195.45599818229675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.08800166845322, + "p90": 100.67199915647507, + "p95": 104.25599664449692, + "p99": 110.6560006737709 + }, + "combine": { + "p50": 78.94399762153625, + "p90": 82.04799890518188, + "p95": 82.78399705886841, + "p99": 89.40800279378891 + }, + "roundtrip": { + "p50": 150.7200002670288, + "p90": 159.10400450229645, + "p95": 161.69600188732147, + "p99": 167.07199811935425 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 182.71999806165695, + "p95": 187.03999370336533, + "p99": 200.06400346755981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.47999703884125, + "p90": 101.31199657917023, + "p95": 104.5759990811348, + "p99": 110.62400043010712 + }, + "combine": { + "p50": 86.46400272846222, + "p90": 90.11200070381165, + "p95": 90.62399715185165, + "p99": 93.18400174379349 + }, + "roundtrip": { + "p50": 158.75199437141418, + "p90": 163.55200111865997, + "p95": 164.89599645137787, + "p99": 169.21600699424744 + }, + "isolatedSum": { + "p50": 182.94399976730347, + "p90": 191.42399728298187, + "p95": 195.19999623298645, + "p99": 203.8080021739006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 106.9440022110939, + "p90": 138.36799561977386, + "p95": 143.0400013923645, + "p99": 250.2720057964325 + }, + "combine": { + "p50": 95.0080007314682, + "p90": 98.39999675750732, + "p95": 98.91200065612793, + "p99": 105.59999942779541 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 184.03199315071106, + "p95": 187.3600035905838, + "p99": 190.5599981546402 + }, + "isolatedSum": { + "p50": 201.9520029425621, + "p90": 236.7679923772812, + "p95": 241.95200204849243, + "p99": 355.8720052242279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.4160017967224, + "p90": 145.9520012140274, + "p95": 148.83199334144592, + "p99": 151.99999511241913 + }, + "combine": { + "p50": 119.74400281906128, + "p90": 122.56000190973282, + "p95": 123.80799651145935, + "p99": 129.7920048236847 + }, + "roundtrip": { + "p50": 228.2560020685196, + "p90": 233.88800024986267, + "p95": 236.12800240516663, + "p99": 240.28800427913666 + }, + "isolatedSum": { + "p50": 248.1600046157837, + "p90": 268.5120031237602, + "p95": 272.6399898529053, + "p99": 281.7919999361038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-456ed1f6", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "h100_aa268d13", + "comparisonKey": "791af0af2f802328", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:00.953910+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271802749", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271802749", + "createdAt": "2026-06-26T23:54:05Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.34400361776352, + "p90": 106.33599758148193, + "p95": 108.99200290441513, + "p99": 118.14399808645248 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 81.11999928951263, + "p95": 82.14399963617325, + "p99": 87.42400258779526 + }, + "roundtrip": { + "p50": 148.76799285411835, + "p90": 160.5439931154251, + "p95": 164.73600268363953, + "p99": 172.44799435138702 + }, + "isolatedSum": { + "p50": 176.06400698423386, + "p90": 187.45599687099457, + "p95": 191.13600254058838, + "p99": 205.56800067424774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.50399738550186, + "p90": 104.38399761915207, + "p95": 108.99200290441513, + "p99": 137.2479945421219 + }, + "combine": { + "p50": 79.39200103282928, + "p90": 86.68799698352814, + "p95": 87.52000331878662, + "p99": 103.90400141477585 + }, + "roundtrip": { + "p50": 152.99199521541595, + "p90": 162.9759967327118, + "p95": 165.69599509239197, + "p99": 171.55200242996216 + }, + "isolatedSum": { + "p50": 176.89599841833115, + "p90": 191.0719946026802, + "p95": 196.51200622320175, + "p99": 241.15199595689774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.33599692583084, + "p90": 111.68000102043152, + "p95": 115.68000167608261, + "p99": 123.74400347471237 + }, + "combine": { + "p50": 87.45600283145905, + "p90": 94.81599926948547, + "p95": 95.32800316810608, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 160.7999950647354, + "p90": 168.67199540138245, + "p95": 171.29600048065186, + "p99": 178.52799594402313 + }, + "isolatedSum": { + "p50": 189.7919997572899, + "p90": 206.496000289917, + "p95": 211.0080048441887, + "p99": 220.06400674581528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.39200341701508, + "p90": 145.34400403499603, + "p95": 147.5200057029724, + "p99": 163.71199488639832 + }, + "combine": { + "p50": 120.15999853610992, + "p90": 128.1599998474121, + "p95": 128.86400520801544, + "p99": 129.88799810409546 + }, + "roundtrip": { + "p50": 227.87199914455414, + "p90": 232.7360063791275, + "p95": 235.32800376415253, + "p99": 255.13601303100586 + }, + "isolatedSum": { + "p50": 259.552001953125, + "p90": 273.50400388240814, + "p95": 276.38401091098785, + "p99": 293.5999929904938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db353ddd", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_002beb29", + "comparisonKey": "d83561aeea03cdbc", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:11.693533+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271987393", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271987393", + "createdAt": "2026-06-27T00:00:08Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.14400160312653, + "p90": 104.41599786281586, + "p95": 109.8560020327568, + "p99": 133.69600474834442 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 75.03999769687653, + "p95": 80.86399734020233, + "p99": 237.34399676322937 + }, + "roundtrip": { + "p50": 141.2159949541092, + "p90": 150.39999783039093, + "p95": 151.8079936504364, + "p99": 244.73600089550018 + }, + "isolatedSum": { + "p50": 165.47200083732605, + "p90": 179.45599555969238, + "p95": 190.71999937295914, + "p99": 371.0400015115738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 94.43199634552002, + "p90": 101.50399804115295, + "p95": 103.04000228643417, + "p99": 105.85600137710571 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 73.95199686288834, + "p95": 74.5600014925003, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 141.02399349212646, + "p90": 147.77599275112152, + "p95": 150.176003575325, + "p99": 175.6799966096878 + }, + "isolatedSum": { + "p50": 166.46399348974228, + "p90": 175.4559949040413, + "p95": 177.60000377893448, + "p99": 185.66399812698364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.20000219345093, + "p90": 101.47199779748917, + "p95": 103.13600301742554, + "p99": 108.12799632549286 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 78.65600287914276, + "p95": 79.0719985961914, + "p99": 81.53600245714188 + }, + "roundtrip": { + "p50": 143.93599331378937, + "p90": 152.41600573062897, + "p95": 155.61600029468536, + "p99": 564.3519759178162 + }, + "isolatedSum": { + "p50": 166.04800522327423, + "p90": 180.12800067663193, + "p95": 182.20800161361694, + "p99": 189.66399878263474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.25600278377533, + "p90": 103.58399897813797, + "p95": 107.58399963378906, + "p99": 168.09600591659546 + }, + "combine": { + "p50": 75.71200281381607, + "p90": 80.1599994301796, + "p95": 80.83199709653854, + "p99": 82.30400085449219 + }, + "roundtrip": { + "p50": 144.73600685596466, + "p90": 150.81599354743958, + "p95": 152.79999375343323, + "p99": 157.95199573040009 + }, + "isolatedSum": { + "p50": 171.9680055975914, + "p90": 183.74399840831757, + "p95": 188.4159967303276, + "p99": 250.40000677108765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.3200032711029, + "p90": 102.39999741315842, + "p95": 104.51199859380722, + "p99": 110.27199774980545 + }, + "combine": { + "p50": 78.65600287914276, + "p90": 81.37600123882294, + "p95": 81.82399719953537, + "p99": 87.0399996638298 + }, + "roundtrip": { + "p50": 146.33600413799286, + "p90": 152.38399803638458, + "p95": 153.76000106334686, + "p99": 157.82399475574493 + }, + "isolatedSum": { + "p50": 174.97600615024567, + "p90": 183.77599865198135, + "p95": 186.3359957933426, + "p99": 197.31199741363525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.67199850082397, + "p90": 101.95200145244598, + "p95": 103.87200117111206, + "p99": 109.56799983978271 + }, + "combine": { + "p50": 83.20000022649765, + "p90": 88.639996945858, + "p95": 89.28000181913376, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 154.27200496196747, + "p90": 159.90400314331055, + "p95": 161.8880033493042, + "p99": 171.64799571037292 + }, + "isolatedSum": { + "p50": 179.87199872732162, + "p90": 190.59199839830399, + "p95": 193.15200299024582, + "p99": 199.8400017619133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.80000078678131, + "p90": 112.5440001487732, + "p95": 115.35999923944473, + "p99": 119.64800208806992 + }, + "combine": { + "p50": 95.32800316810608, + "p90": 97.6639986038208, + "p95": 98.14400225877762, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 173.21600019931793, + "p90": 177.47199535369873, + "p95": 178.97599935531616, + "p99": 184.09599363803864 + }, + "isolatedSum": { + "p50": 200.1280039548874, + "p90": 210.207998752594, + "p95": 213.50400149822235, + "p99": 223.10400009155273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.64000219106674, + "p90": 141.9840008020401, + "p95": 143.23200285434723, + "p99": 148.54399859905243 + }, + "combine": { + "p50": 119.48800086975098, + "p90": 122.04799801111221, + "p95": 122.56000190973282, + "p99": 123.58400225639343 + }, + "roundtrip": { + "p50": 219.84000504016876, + "p90": 226.17599368095398, + "p95": 227.29599475860596, + "p99": 232.16000199317932 + }, + "isolatedSum": { + "p50": 240.12800306081772, + "p90": 264.0319988131523, + "p95": 265.79200476408005, + "p99": 272.12800085544586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-acf36978", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", + "colorKey": "h100_002beb29", + "comparisonKey": "d83561aeea03cdbc", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:11.297271+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fddabb3277bec", + "workloadId": "set:4:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271810135", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271810135", + "createdAt": "2026-06-26T23:54:18Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.83999961614609, + "p90": 101.27999633550644, + "p95": 104.86400127410889, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 71.74400240182877, + "p90": 73.95199686288834, + "p95": 79.03999835252762, + "p99": 81.08799904584885 + }, + "roundtrip": { + "p50": 142.5279974937439, + "p90": 149.79200065135956, + "p95": 151.71200037002563, + "p99": 156.73600137233734 + }, + "isolatedSum": { + "p50": 167.58400201797485, + "p90": 175.23199319839478, + "p95": 183.9039996266365, + "p99": 192.60799884796143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.1760025024414, + "p90": 104.96000200510025, + "p95": 106.91200196743011, + "p99": 112.44799941778183 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 79.99999821186066, + "p95": 80.48000186681747, + "p99": 85.08799970149994 + }, + "roundtrip": { + "p50": 146.14400267601013, + "p90": 152.6080071926117, + "p95": 154.7520011663437, + "p99": 160.73599457740784 + }, + "isolatedSum": { + "p50": 171.52000218629837, + "p90": 184.9600002169609, + "p95": 187.3920038342476, + "p99": 197.53599911928177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.91200065612793, + "p90": 105.92000186443329, + "p95": 108.47999900579453, + "p99": 115.93600362539291 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 88.54400366544724, + "p95": 88.92799913883209, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 156.19200468063354, + "p90": 162.84799575805664, + "p95": 165.56799411773682, + "p99": 169.72799599170685 + }, + "isolatedSum": { + "p50": 181.7919984459877, + "p90": 194.46400552988052, + "p95": 197.40799814462662, + "p99": 206.2080055475235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.88799679279327, + "p90": 129.88799810409546, + "p95": 131.16799294948578, + "p99": 136.1279934644699 + }, + "combine": { + "p50": 114.68800157308578, + "p90": 121.18399888277054, + "p95": 122.079998254776, + "p99": 129.2160004377365 + }, + "roundtrip": { + "p50": 219.90400552749634, + "p90": 224.73600506782532, + "p95": 226.623997092247, + "p99": 230.30400276184082 + }, + "isolatedSum": { + "p50": 236.57599836587906, + "p90": 251.071996986866, + "p95": 253.24799120426178, + "p99": 265.3439939022064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-18fdfbeb", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_c44978e5", + "comparisonKey": "26b5ab23f62d3389", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:10.918377+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271992225", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271992225", + "createdAt": "2026-06-27T00:00:15Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.01600062847137, + "p90": 101.59999877214432, + "p95": 102.68799960613251, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 71.87200337648392, + "p90": 78.87999713420868, + "p95": 79.48800176382065, + "p99": 80.99199831485748 + }, + "roundtrip": { + "p50": 138.72000575065613, + "p90": 147.2640037536621, + "p95": 148.76799285411835, + "p99": 153.08800339698792 + }, + "isolatedSum": { + "p50": 165.8880040049553, + "p90": 180.479995906353, + "p95": 182.17600136995316, + "p99": 188.960000872612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.92000341415405, + "p90": 99.64799880981445, + "p95": 101.43999755382538, + "p99": 106.84800148010254 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 79.71200346946716, + "p95": 80.64000308513641, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 129.34400141239166, + "p90": 143.71199905872345, + "p95": 146.08000218868256, + "p99": 150.39999783039093 + }, + "isolatedSum": { + "p50": 141.56800508499146, + "p90": 179.36000227928162, + "p95": 182.0800006389618, + "p99": 188.76799941062927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.74400240182877, + "p90": 99.80800002813339, + "p95": 101.79200023412704, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 81.56800270080566, + "p95": 86.43200248479843, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 129.50399518013, + "p90": 156.47999942302704, + "p95": 159.13599729537964, + "p99": 162.6880019903183 + }, + "isolatedSum": { + "p50": 144.41600441932678, + "p90": 181.37600272893906, + "p95": 188.22400271892548, + "p99": 196.70400023460388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.12799787521362, + "p90": 96.16000205278397, + "p95": 98.30400347709656, + "p99": 103.64799946546555 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 81.08799904584885, + "p95": 81.60000294446945, + "p99": 87.13600039482117 + }, + "roundtrip": { + "p50": 127.9039978981018, + "p90": 152.16000378131866, + "p95": 155.90399503707886, + "p99": 157.24800527095795 + }, + "isolatedSum": { + "p50": 145.11999487876892, + "p90": 177.2480010986328, + "p95": 179.904006421566, + "p99": 190.7839998602867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.0159986615181, + "p90": 98.55999797582626, + "p95": 101.50399804115295, + "p99": 106.33599758148193 + }, + "combine": { + "p50": 73.56800138950348, + "p90": 87.87199854850769, + "p95": 88.8959988951683, + "p99": 89.88799899816513 + }, + "roundtrip": { + "p50": 127.71199643611908, + "p90": 159.32799875736237, + "p95": 160.99199652671814, + "p99": 163.90399634838104 + }, + "isolatedSum": { + "p50": 155.58400005102158, + "p90": 186.43199652433395, + "p95": 190.39999693632126, + "p99": 196.22399657964706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.82399851083755, + "p90": 101.27999633550644, + "p95": 102.65599936246872, + "p99": 107.29599744081497 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 89.4400030374527, + "p95": 89.85599875450134, + "p99": 95.42399644851685 + }, + "roundtrip": { + "p50": 141.59999787807465, + "p90": 158.9439958333969, + "p95": 161.18399798870087, + "p99": 167.32800006866455 + }, + "isolatedSum": { + "p50": 170.55999487638474, + "p90": 190.71999937295914, + "p95": 192.51199811697006, + "p99": 202.71999388933182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.09600156545639, + "p90": 118.1119978427887, + "p95": 120.57600170373917, + "p99": 127.83999741077423 + }, + "combine": { + "p50": 89.82399851083755, + "p90": 103.20000350475311, + "p95": 103.80800068378448, + "p99": 104.70400005578995 + }, + "roundtrip": { + "p50": 160.288006067276, + "p90": 180.95999956130981, + "p95": 185.18400192260742, + "p99": 188.60800564289093 + }, + "isolatedSum": { + "p50": 185.92000007629395, + "p90": 221.3120013475418, + "p95": 224.38400238752365, + "p99": 232.54399746656418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.52800035476685, + "p90": 135.0719928741455, + "p95": 136.6720050573349, + "p99": 140.00000059604645 + }, + "combine": { + "p50": 106.01600259542465, + "p90": 119.71200257539749, + "p95": 120.35199999809265, + "p99": 122.14399874210358 + }, + "roundtrip": { + "p50": 195.96800208091736, + "p90": 214.33599293231964, + "p95": 216.86400473117828, + "p99": 220.44800221920013 + }, + "isolatedSum": { + "p50": 220.5440029501915, + "p90": 254.783995449543, + "p95": 257.02400505542755, + "p99": 262.14399933815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-efff3174", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_9aa30544", + "comparisonKey": "c4aa2e0da9446ced", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:21.116102+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271958693", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271958693", + "createdAt": "2026-06-26T23:59:13Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.28000313043594, + "p90": 104.70400005578995, + "p95": 106.11200332641602, + "p99": 112.73600161075592 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 82.65600353479385, + "p95": 99.13600236177444, + "p99": 275.4560112953186 + }, + "roundtrip": { + "p50": 147.61599898338318, + "p90": 155.32800555229187, + "p95": 156.73600137233734, + "p99": 162.91199624538422 + }, + "isolatedSum": { + "p50": 176.9920065999031, + "p90": 187.3600035905838, + "p95": 205.24800568819046, + "p99": 388.1920129060745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.80000299215317, + "p90": 102.7199998497963, + "p95": 104.89600151777267, + "p99": 109.66400057077408 + }, + "combine": { + "p50": 73.15199822187424, + "p90": 81.44000172615051, + "p95": 81.88799768686295, + "p99": 82.91199803352356 + }, + "roundtrip": { + "p50": 129.4720023870468, + "p90": 153.3759981393814, + "p95": 156.15999698638916, + "p99": 164.92800414562225 + }, + "isolatedSum": { + "p50": 145.9520012140274, + "p90": 184.1600015759468, + "p95": 186.78399920463562, + "p99": 192.57599860429764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.25599950551987, + "p90": 102.62399911880493, + "p95": 105.24799674749374, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 73.31199944019318, + "p90": 81.4720019698143, + "p95": 86.20800077915192, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 129.56799566745758, + "p90": 157.9200029373169, + "p95": 160.35200655460358, + "p99": 166.04800522327423 + }, + "isolatedSum": { + "p50": 149.56799894571304, + "p90": 184.09600108861923, + "p95": 191.45599752664566, + "p99": 198.81600141525269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.83199644088745, + "p90": 101.79200023412704, + "p95": 105.02400249242783, + "p99": 109.31199789047241 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 82.04799890518188, + "p95": 86.40000224113464, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 130.23999333381653, + "p90": 159.39199924468994, + "p95": 161.82400286197662, + "p99": 165.98400473594666 + }, + "isolatedSum": { + "p50": 150.33599734306335, + "p90": 183.83999913930893, + "p95": 191.42400473356247, + "p99": 197.85600155591965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.00000083446503, + "p90": 104.73600029945374, + "p95": 108.51199924945831, + "p99": 115.74400216341019 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 87.23200112581253, + "p95": 88.51200342178345, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 135.1040005683899, + "p90": 161.40800714492798, + "p95": 164.5440012216568, + "p99": 169.50400173664093 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 191.96800142526627, + "p95": 197.02400267124176, + "p99": 205.76000213623047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.65599739551544, + "p90": 102.75200009346008, + "p95": 105.69600015878677, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 90.59199690818787, + "p95": 95.32800316810608, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 145.1839953660965, + "p90": 165.56799411773682, + "p95": 168.5439944267273, + "p99": 174.68799650669098 + }, + "isolatedSum": { + "p50": 172.2560003399849, + "p90": 193.34399700164795, + "p95": 201.02400332689285, + "p99": 206.84799551963806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.43999755382538, + "p90": 116.89600348472595, + "p95": 119.77600306272507, + "p99": 138.7840062379837 + }, + "combine": { + "p50": 90.59199690818787, + "p90": 103.35999727249146, + "p95": 104.3199971318245, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 168.7680035829544, + "p90": 185.88800728321075, + "p95": 188.6720061302185, + "p99": 193.37600469589233 + }, + "isolatedSum": { + "p50": 192.03199446201324, + "p90": 220.2560007572174, + "p95": 224.09600019454956, + "p99": 244.704008102417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.5920021533966, + "p90": 134.91199910640717, + "p95": 136.9280070066452, + "p99": 143.64799857139587 + }, + "combine": { + "p50": 115.07199704647064, + "p90": 128.63999605178833, + "p95": 130.40000200271606, + "p99": 139.71200585365295 + }, + "roundtrip": { + "p50": 215.5199944972992, + "p90": 233.66400599479675, + "p95": 235.35999655723572, + "p99": 240.12799561023712 + }, + "isolatedSum": { + "p50": 237.66399919986725, + "p90": 263.5519951581955, + "p95": 267.32800900936127, + "p99": 283.3600044250488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6d1780ec", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_e8b903ea", + "comparisonKey": "0d93a7b7a0fcf6d0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:17.527263+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271962037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271962037", + "createdAt": "2026-06-26T23:59:20Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.55999797582626, + "p90": 106.33599758148193, + "p95": 108.51199924945831, + "p99": 113.21599781513214 + }, + "combine": { + "p50": 79.39200103282928, + "p90": 81.85599744319916, + "p95": 82.56000280380249, + "p99": 87.10400015115738 + }, + "roundtrip": { + "p50": 145.50399780273438, + "p90": 154.7199934720993, + "p95": 156.8640023469925, + "p99": 160.7999950647354 + }, + "isolatedSum": { + "p50": 177.95199900865555, + "p90": 188.1919950246811, + "p95": 191.0720020532608, + "p99": 200.31999796628952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.60000163316727, + "p90": 108.31999778747559, + "p95": 109.66400057077408, + "p99": 115.13599753379822 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 81.60000294446945, + "p95": 82.36800134181976, + "p99": 87.20000088214874 + }, + "roundtrip": { + "p50": 129.05600666999817, + "p90": 156.47999942302704, + "p95": 160.0639969110489, + "p99": 162.1759980916977 + }, + "isolatedSum": { + "p50": 146.11200243234634, + "p90": 189.92000073194504, + "p95": 192.03200191259384, + "p99": 202.33599841594696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.120001912117, + "p90": 103.87200117111206, + "p95": 105.66399991512299, + "p99": 110.68800091743469 + }, + "combine": { + "p50": 79.55200225114822, + "p90": 82.20800012350082, + "p95": 86.30400151014328, + "p99": 88.3840024471283 + }, + "roundtrip": { + "p50": 151.32799744606018, + "p90": 159.61599349975586, + "p95": 161.15200519561768, + "p99": 167.71200299263 + }, + "isolatedSum": { + "p50": 176.67200416326523, + "p90": 186.08000129461288, + "p95": 191.96800142526627, + "p99": 199.072003364563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.83199971914291, + "p90": 103.07200253009796, + "p95": 104.47999835014343, + "p99": 111.48799955844879 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 82.49600231647491, + "p95": 87.0399996638298, + "p99": 88.76799792051315 + }, + "roundtrip": { + "p50": 152.38399803638458, + "p90": 159.96800363063812, + "p95": 162.20800578594208, + "p99": 166.59200191497803 + }, + "isolatedSum": { + "p50": 176.32000148296356, + "p90": 185.56800484657288, + "p95": 191.51999801397324, + "p99": 200.25599747896194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.92800045013428, + "p90": 102.01600193977356, + "p95": 104.76800054311752, + "p99": 113.02399635314941 + }, + "combine": { + "p50": 80.86399734020233, + "p90": 88.3840024471283, + "p95": 89.63199704885483, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 153.21600437164307, + "p90": 159.39199924468994, + "p95": 160.8320027589798, + "p99": 165.3759926557541 + }, + "isolatedSum": { + "p50": 177.7919977903366, + "p90": 190.40000438690186, + "p95": 194.39999759197235, + "p99": 207.67999440431595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.5519967675209, + "p90": 105.27999699115753, + "p95": 106.52799904346466, + "p99": 110.55999994277954 + }, + "combine": { + "p50": 81.216000020504, + "p90": 90.17600119113922, + "p95": 94.33600306510925, + "p99": 96.79999947547913 + }, + "roundtrip": { + "p50": 144.1279947757721, + "p90": 167.52000153064728, + "p95": 168.99199783802032, + "p99": 173.567995429039 + }, + "isolatedSum": { + "p50": 172.7679967880249, + "p90": 195.45599818229675, + "p95": 200.8640021085739, + "p99": 207.35999941825867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.80000078678131, + "p90": 116.35199934244156, + "p95": 118.81600320339203, + "p99": 122.97599762678146 + }, + "combine": { + "p50": 96.38399630784988, + "p90": 104.00000214576721, + "p95": 104.5759990811348, + "p99": 106.4319983124733 + }, + "roundtrip": { + "p50": 177.76000499725342, + "p90": 185.44000387191772, + "p95": 187.16800212860107, + "p99": 190.3039962053299 + }, + "isolatedSum": { + "p50": 201.1839970946312, + "p90": 220.35200148820877, + "p95": 223.39200228452682, + "p99": 229.40799593925476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.56800049543381, + "p90": 131.58400356769562, + "p95": 133.66399705410004, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 106.55999928712845, + "p90": 119.55200135707855, + "p95": 120.09599804878235, + "p99": 121.05599790811539 + }, + "roundtrip": { + "p50": 198.46400618553162, + "p90": 217.6000028848648, + "p95": 218.75199675559998, + "p99": 224.2880016565323 + }, + "isolatedSum": { + "p50": 220.12799978256226, + "p90": 251.13600492477417, + "p95": 253.75999510288239, + "p99": 261.02399080991745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9d829c00", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_552a4b73", + "comparisonKey": "95c165fc74bc43c0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:35.674306+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:120a8dc1dba92ca9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271971983", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271971983", + "createdAt": "2026-06-26T23:59:40Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.8079993724823, + "p90": 103.10400277376175, + "p95": 104.16000336408615, + "p99": 110.01600325107574 + }, + "combine": { + "p50": 74.33599978685379, + "p90": 81.56800270080566, + "p95": 81.98399841785431, + "p99": 83.29600095748901 + }, + "roundtrip": { + "p50": 142.2719955444336, + "p90": 148.67199957370758, + "p95": 150.4639983177185, + "p99": 154.11199629306793 + }, + "isolatedSum": { + "p50": 170.1439991593361, + "p90": 184.6720054745674, + "p95": 186.14400178194046, + "p99": 193.31200420856476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.88000327348709, + "p90": 101.98400169610977, + "p95": 102.94400155544281, + "p99": 106.01600259542465 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 81.40800148248672, + "p95": 81.95199817419052, + "p99": 85.7279971241951 + }, + "roundtrip": { + "p50": 128.7039965391159, + "p90": 147.71200716495514, + "p95": 149.59999918937683, + "p99": 152.79999375343323 + }, + "isolatedSum": { + "p50": 143.3280035853386, + "p90": 183.3920031785965, + "p95": 184.89599972963333, + "p99": 191.74399971961975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.18399846553802, + "p90": 93.82399916648865, + "p95": 96.41599655151367, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 77.82399654388428, + "p95": 78.59200239181519, + "p99": 83.45600217580795 + }, + "roundtrip": { + "p50": 125.44000148773193, + "p90": 151.74399316310883, + "p95": 154.1759967803955, + "p99": 160.09600460529327 + }, + "isolatedSum": { + "p50": 144.03200149536133, + "p90": 171.64799571037292, + "p95": 175.00799894332886, + "p99": 188.448004424572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.83999633789062, + "p90": 100.22400319576263, + "p95": 102.39999741315842, + "p99": 107.4879989027977 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 81.44000172615051, + "p95": 82.24000036716461, + "p99": 87.23200112581253 + }, + "roundtrip": { + "p50": 126.27199292182922, + "p90": 154.88000214099884, + "p95": 157.47199952602386, + "p99": 159.4880074262619 + }, + "isolatedSum": { + "p50": 149.02399480342865, + "p90": 181.66400492191315, + "p95": 184.63999778032303, + "p99": 194.72000002861023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.3600007891655, + "p90": 100.0640019774437, + "p95": 123.80799651145935, + "p99": 229.76000607013702 + }, + "combine": { + "p50": 73.88799637556076, + "p90": 82.2720006108284, + "p95": 83.36000144481659, + "p99": 89.28000181913376 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 154.62400019168854, + "p95": 157.3760062456131, + "p99": 162.7199947834015 + }, + "isolatedSum": { + "p50": 153.24799716472626, + "p90": 182.3360025882721, + "p95": 207.16799795627594, + "p99": 319.0400078892708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.44800293445587, + "p90": 103.71199995279312, + "p95": 105.76000064611435, + "p99": 110.1439967751503 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 89.6959975361824, + "p95": 90.27200192213058, + "p99": 91.80799871683121 + }, + "roundtrip": { + "p50": 141.34399592876434, + "p90": 161.98399662971497, + "p95": 163.455992937088, + "p99": 169.24799978733063 + }, + "isolatedSum": { + "p50": 170.04800587892532, + "p90": 193.40799748897552, + "p95": 196.03200256824493, + "p99": 201.9519954919815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 102.36799716949463, + "p90": 119.6800023317337, + "p95": 121.31199985742569, + "p99": 123.77600371837616 + }, + "combine": { + "p50": 89.9839997291565, + "p90": 96.03200107812881, + "p95": 99.48799759149551, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 165.69599509239197, + "p90": 182.43199586868286, + "p95": 184.1599941253662, + "p99": 187.51999735832214 + }, + "isolatedSum": { + "p50": 192.35199689865112, + "p90": 215.71200340986252, + "p95": 220.7999974489212, + "p99": 225.8240059018135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.91999769210815, + "p90": 144.70399916172028, + "p95": 145.9520012140274, + "p99": 148.00000190734863 + }, + "combine": { + "p50": 114.56000059843063, + "p90": 119.99999731779099, + "p95": 122.30399996042252, + "p99": 126.91199779510498 + }, + "roundtrip": { + "p50": 218.9760059118271, + "p90": 233.63199830055237, + "p95": 235.1360023021698, + "p99": 238.304004073143 + }, + "isolatedSum": { + "p50": 240.4799982905388, + "p90": 264.70399647951126, + "p95": 268.2560011744499, + "p99": 274.9119997024536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c61b6088", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_106a51ab", + "comparisonKey": "6643ae5a97d68820", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:43.354862+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:120a8dc1dba92ca9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271975554", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271975554", + "createdAt": "2026-06-26T23:59:47Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.72800195217133, + "p90": 76.7040029168129, + "p95": 82.24000036716461, + "p99": 100.09600222110748 + }, + "combine": { + "p50": 70.78400254249573, + "p90": 73.11999797821045, + "p95": 73.53600114583969, + "p99": 78.3040001988411 + }, + "roundtrip": { + "p50": 124.35200065374374, + "p90": 129.88799810409546, + "p95": 131.20000064373016, + "p99": 137.40800321102142 + }, + "isolatedSum": { + "p50": 140.51200449466705, + "p90": 149.82400089502335, + "p95": 155.7760015130043, + "p99": 178.40000241994858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.92000341415405, + "p90": 77.79199630022049, + "p95": 80.19199967384338, + "p99": 96.19200229644775 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 73.27999919652939, + "p95": 73.85600358247757, + "p99": 78.94399762153625 + }, + "roundtrip": { + "p50": 126.94400548934937, + "p90": 130.91200590133667, + "p95": 132.1280002593994, + "p99": 138.33600282669067 + }, + "isolatedSum": { + "p50": 141.08800143003464, + "p90": 151.07199549674988, + "p95": 154.04800325632095, + "p99": 175.135999917984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.07999783754349, + "p90": 101.34399682283401, + "p95": 103.13600301742554, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 82.0159986615181, + "p95": 87.00799942016602, + "p99": 89.31200206279755 + }, + "roundtrip": { + "p50": 131.32800161838531, + "p90": 158.59200060367584, + "p95": 163.13600540161133, + "p99": 169.69600319862366 + }, + "isolatedSum": { + "p50": 147.07199484109879, + "p90": 183.3599954843521, + "p95": 190.14400243759155, + "p99": 200.70400089025497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.6480023264885, + "p90": 100.76799988746643, + "p95": 102.01600193977356, + "p99": 105.95200210809708 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 79.68000322580338, + "p95": 80.6720033288002, + "p99": 85.88799834251404 + }, + "roundtrip": { + "p50": 129.63199615478516, + "p90": 154.91199493408203, + "p95": 156.47999942302704, + "p99": 159.96800363063812 + }, + "isolatedSum": { + "p50": 148.6399993300438, + "p90": 180.4480031132698, + "p95": 182.68800526857376, + "p99": 191.84000045061111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.49600231647491, + "p90": 100.73599964380264, + "p95": 103.04000228643417, + "p99": 106.81600123643875 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 87.0399996638298, + "p95": 87.90399879217148, + "p99": 89.63199704885483 + }, + "roundtrip": { + "p50": 132.38400220870972, + "p90": 161.02400422096252, + "p95": 162.81600296497345, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 156.8640023469925, + "p90": 187.77599930763245, + "p95": 190.94400107860565, + "p99": 196.44799828529358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.30400216579437, + "p90": 103.32799702882767, + "p95": 104.35199737548828, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 81.31200075149536, + "p90": 89.75999802350998, + "p95": 90.43200314044952, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 142.20799505710602, + "p90": 158.65600109100342, + "p95": 161.50400042533875, + "p99": 167.39200055599213 + }, + "isolatedSum": { + "p50": 171.61600291728973, + "p90": 193.08799505233765, + "p95": 194.7840005159378, + "p99": 201.21599733829498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.89600020647049, + "p90": 116.60800129175186, + "p95": 118.43200027942657, + "p99": 124.32000041007996 + }, + "combine": { + "p50": 90.30400216579437, + "p90": 103.32799702882767, + "p95": 103.74400019645691, + "p99": 104.25599664449692 + }, + "roundtrip": { + "p50": 162.08000481128693, + "p90": 178.8800060749054, + "p95": 181.85600638389587, + "p99": 186.49600446224213 + }, + "isolatedSum": { + "p50": 187.20000237226486, + "p90": 219.93599832057953, + "p95": 222.17600047588348, + "p99": 228.57599705457687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.07999759912491, + "p90": 135.3279948234558, + "p95": 138.2399946451187, + "p99": 140.57600498199463 + }, + "combine": { + "p50": 106.84800148010254, + "p90": 119.45600062608719, + "p95": 119.74400281906128, + "p99": 120.54400146007538 + }, + "roundtrip": { + "p50": 198.84799420833588, + "p90": 216.2880003452301, + "p95": 219.67999637126923, + "p99": 221.47199511528015 + }, + "isolatedSum": { + "p50": 224.92799907922745, + "p90": 254.783995449543, + "p95": 257.98399746418, + "p99": 261.12000644207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a38d13e8", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_769b9c4b", + "comparisonKey": "115d84ad1ee38d09", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:11.807854+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271948775", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775", + "createdAt": "2026-06-26T23:58:53Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.39999961853027, + "p90": 100.832000374794, + "p95": 105.56799918413162, + "p99": 192.73599982261658 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 88.44800293445587, + "p95": 188.38399648666382, + "p99": 344.2560136318207 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 133.08799266815186, + "p95": 149.4400054216385, + "p99": 156.12800419330597 + }, + "isolatedSum": { + "p50": 143.5839980840683, + "p90": 189.28000330924988, + "p95": 293.95199567079544, + "p99": 536.9920134544373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.38399916887283, + "p90": 75.71200281381607, + "p95": 77.11999863386154, + "p99": 95.61599791049957 + }, + "combine": { + "p50": 71.29599899053574, + "p90": 73.44000041484833, + "p95": 74.36800003051758, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 126.68800354003906, + "p90": 130.87999820709229, + "p95": 133.56800377368927, + "p99": 142.59199798107147 + }, + "isolatedSum": { + "p50": 139.67999815940857, + "p90": 149.1520032286644, + "p95": 151.48799866437912, + "p99": 177.88799852132797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.54400104284286, + "p90": 99.2640033364296, + "p95": 102.08000242710114, + "p99": 107.39199817180634 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 79.71200346946716, + "p95": 84.22400057315826, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 130.23999333381653, + "p90": 156.41599893569946, + "p95": 160.22400557994843, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 145.53599804639816, + "p90": 178.97600680589676, + "p95": 186.3040030002594, + "p99": 194.7840005159378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.19200229644775, + "p90": 109.56799983978271, + "p95": 112.73600161075592, + "p99": 155.87200224399567 + }, + "combine": { + "p50": 75.45600086450577, + "p90": 88.06400001049042, + "p95": 89.4400030374527, + "p99": 97.37599641084671 + }, + "roundtrip": { + "p50": 130.94399869441986, + "p90": 154.4319987297058, + "p95": 156.44800662994385, + "p99": 176.67199671268463 + }, + "isolatedSum": { + "p50": 171.64800316095352, + "p90": 197.63199985027313, + "p95": 202.17600464820862, + "p99": 253.24799865484238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.97599852085114, + "p90": 100.16000270843506, + "p95": 103.55199873447418, + "p99": 106.72000050544739 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 87.3280018568039, + "p95": 88.95999938249588, + "p99": 89.82399851083755 + }, + "roundtrip": { + "p50": 131.6480040550232, + "p90": 158.9760035276413, + "p95": 161.31199896335602, + "p99": 166.78400337696075 + }, + "isolatedSum": { + "p50": 157.1199968457222, + "p90": 187.48800456523895, + "p95": 192.51199811697006, + "p99": 196.54399901628494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.30400216579437, + "p90": 105.6319996714592, + "p95": 106.6880002617836, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 80.99199831485748, + "p90": 89.15200084447861, + "p95": 89.88799899816513, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 142.17600226402283, + "p90": 157.6640009880066, + "p95": 160.44799983501434, + "p99": 164.8319959640503 + }, + "isolatedSum": { + "p50": 171.29600048065186, + "p90": 194.7840005159378, + "p95": 196.57599925994873, + "p99": 201.9520029425621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.74399888515472, + "p90": 116.2559986114502, + "p95": 121.98399752378464, + "p99": 398.6560106277466 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 101.1200025677681, + "p95": 104.25599664449692, + "p99": 111.55200004577637 + }, + "roundtrip": { + "p50": 160.76800227165222, + "p90": 181.536003947258, + "p95": 185.37600338459015, + "p99": 188.35200369358063 + }, + "isolatedSum": { + "p50": 185.95200031995773, + "p90": 217.3760011792183, + "p95": 226.23999416828156, + "p99": 510.20801067352295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.3679991364479, + "p90": 133.44000279903412, + "p95": 137.31199502944946, + "p99": 142.7839994430542 + }, + "combine": { + "p50": 108.15999656915665, + "p90": 120.2239990234375, + "p95": 121.24799937009811, + "p99": 123.99999797344208 + }, + "roundtrip": { + "p50": 199.35999810695648, + "p90": 217.31199324131012, + "p95": 220.15999257564545, + "p99": 380.8319866657257 + }, + "isolatedSum": { + "p50": 222.52799570560455, + "p90": 253.66400182247162, + "p95": 258.5599943995476, + "p99": 266.7839974164963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4ad32f1a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "2a087c80bac58077", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:27:59.966964+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "unknown", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247603308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308", + "createdAt": "2026-06-26T15:22:55Z", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.73599898815155, + "p90": 102.49599814414978, + "p95": 104.12800312042236, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 79.42400127649307, + "p90": 81.4720019698143, + "p95": 82.14399963617325, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 146.84799313545227, + "p90": 156.15999698638916, + "p95": 159.13599729537964, + "p99": 164.000004529953 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 183.96800011396408, + "p95": 186.2720027565956, + "p99": 200.1279965043068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.33600372076035, + "p90": 103.93600165843964, + "p95": 106.52799904346466, + "p99": 111.58400028944016 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 86.84799820184708, + "p95": 87.61599659919739, + "p99": 88.06400001049042 + }, + "roundtrip": { + "p50": 151.64799988269806, + "p90": 159.16800498962402, + "p95": 160.35200655460358, + "p99": 165.50399363040924 + }, + "isolatedSum": { + "p50": 178.3680021762848, + "p90": 190.7839998602867, + "p95": 194.14399564266205, + "p99": 199.64800029993057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.90400075912476, + "p90": 105.76000064611435, + "p95": 108.15999656915665, + "p99": 116.60800129175186 + }, + "combine": { + "p50": 87.90399879217148, + "p90": 90.55999666452408, + "p95": 95.23200243711472, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 157.82399475574493, + "p90": 163.7759953737259, + "p95": 166.78400337696075, + "p99": 169.95200514793396 + }, + "isolatedSum": { + "p50": 187.80799955129623, + "p90": 196.31999731063843, + "p95": 203.39199900627136, + "p99": 213.18399906158447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.60800325870514, + "p90": 133.53599607944489, + "p95": 135.51999628543854, + "p99": 138.49599659442902 + }, + "combine": { + "p50": 112.57600039243698, + "p90": 120.4800009727478, + "p95": 120.7680031657219, + "p99": 122.40000069141388 + }, + "roundtrip": { + "p50": 208.3519995212555, + "p90": 215.71199595928192, + "p95": 217.56799519062042, + "p99": 220.5439954996109 + }, + "isolatedSum": { + "p50": 241.18400365114212, + "p90": 254.0159970521927, + "p95": 256.28799945116043, + "p99": 260.8959972858429 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b5d97134", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.1|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:27:16.815311+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.1, + "achievedFraction": 0.0985, + "configuredUnits": 13, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254271442", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254271442", + "createdAt": "2026-06-26T17:26:00Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.24800288677216, + "p90": 103.39199751615524, + "p95": 105.8880016207695, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 78.84799689054489, + "p90": 81.727996468544, + "p95": 85.11999994516373, + "p99": 89.02399986982346 + }, + "roundtrip": { + "p50": 151.36000514030457, + "p90": 157.53600001335144, + "p95": 159.67999398708344, + "p99": 164.63999450206757 + }, + "isolatedSum": { + "p50": 176.09599977731705, + "p90": 185.11999398469925, + "p95": 191.00800156593323, + "p99": 200.15999674797058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.29600358009338, + "p90": 104.70400005578995, + "p95": 106.72000050544739, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 86.97599917650223, + "p95": 87.39200234413147, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 153.85599434375763, + "p90": 161.28000617027283, + "p95": 162.432000041008, + "p99": 166.07999801635742 + }, + "isolatedSum": { + "p50": 178.8800060749054, + "p90": 191.67999923229218, + "p95": 194.11200284957886, + "p99": 205.08799701929092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.29599678516388, + "p90": 107.64800012111664, + "p95": 109.98400300741196, + "p99": 121.40800058841705 + }, + "combine": { + "p50": 87.74399757385254, + "p90": 95.20000219345093, + "p95": 95.48799693584442, + "p99": 97.18400239944458 + }, + "roundtrip": { + "p50": 161.6639941930771, + "p90": 169.50400173664093, + "p95": 170.9440052509308, + "p99": 175.52000284194946 + }, + "isolatedSum": { + "p50": 191.03999435901642, + "p90": 202.84800231456757, + "p95": 205.47199994325638, + "p99": 218.59200298786163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.66400384902954, + "p90": 137.79200613498688, + "p95": 139.55199718475342, + "p99": 143.93599331378937 + }, + "combine": { + "p50": 113.72800171375275, + "p90": 120.15999853610992, + "p95": 120.83200365304947, + "p99": 123.55200201272964 + }, + "roundtrip": { + "p50": 211.776003241539, + "p90": 217.21599996089935, + "p95": 218.9439982175827, + "p99": 222.75200486183167 + }, + "isolatedSum": { + "p50": 243.3920055627823, + "p90": 257.9520046710968, + "p95": 260.3840008378029, + "p99": 267.487995326519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f9f6948", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:02.253264+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:26:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.74399888515472, + "p90": 102.78400033712387, + "p95": 104.99200224876404, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 79.32800054550171, + "p90": 82.07999914884567, + "p95": 82.87999778985977, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 147.74399995803833, + "p90": 154.6880006790161, + "p95": 157.44000673294067, + "p99": 171.9360053539276 + }, + "isolatedSum": { + "p50": 175.07199943065643, + "p90": 184.86399948596954, + "p95": 187.8720000386238, + "p99": 197.40799814462662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.23199850320816, + "p90": 101.27999633550644, + "p95": 102.52799838781357, + "p99": 107.87200182676315 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 80.92799782752991, + "p95": 81.44000172615051, + "p99": 84.76799726486206 + }, + "roundtrip": { + "p50": 127.45599448680878, + "p90": 153.02400290966034, + "p95": 155.64799308776855, + "p99": 159.4880074262619 + }, + "isolatedSum": { + "p50": 143.45599710941315, + "p90": 182.20799416303635, + "p95": 183.96800011396408, + "p99": 192.6399990916252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.23200243711472, + "p90": 102.36799716949463, + "p95": 107.84000158309937, + "p99": 439.64800238609314 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 81.66400343179703, + "p95": 86.81599795818329, + "p99": 88.92799913883209 + }, + "roundtrip": { + "p50": 128.7360042333603, + "p90": 159.19999778270721, + "p95": 161.31199896335602, + "p99": 167.1680063009262 + }, + "isolatedSum": { + "p50": 168.19199919700623, + "p90": 184.03200060129166, + "p95": 194.65599954128265, + "p99": 528.5760015249252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.42399644851685, + "p90": 102.52799838781357, + "p95": 104.89600151777267, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 82.91199803352356, + "p95": 87.07199990749359, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 151.48800611495972, + "p90": 159.90400314331055, + "p95": 162.20800578594208, + "p99": 169.47199404239655 + }, + "isolatedSum": { + "p50": 175.00799894332886, + "p90": 185.43999642133713, + "p95": 191.96800142526627, + "p99": 201.50399953126907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.71199864149094, + "p90": 100.8640006184578, + "p95": 102.68799960613251, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 80.64000308513641, + "p90": 87.90399879217148, + "p95": 89.24800157546997, + "p99": 95.23200243711472 + }, + "roundtrip": { + "p50": 152.319997549057, + "p90": 160.19199788570404, + "p95": 162.23999857902527, + "p99": 168.92799735069275 + }, + "isolatedSum": { + "p50": 176.35200172662735, + "p90": 188.76799941062927, + "p95": 191.93600118160248, + "p99": 201.7280012369156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.86399734020233, + "p90": 103.26399654150009, + "p95": 105.47199845314026, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 80.35200089216232, + "p90": 89.31200206279755, + "p95": 90.04800021648407, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 136.48000359535217, + "p90": 164.60800170898438, + "p95": 167.10400581359863, + "p99": 175.10400712490082 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 192.57599860429764, + "p95": 195.51999866962433, + "p99": 208.92799645662308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.4879982471466, + "p90": 112.8000020980835, + "p95": 114.3679991364479, + "p99": 125.72799623012543 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 104.12800312042236, + "p95": 104.99200224876404, + "p99": 106.33599758148193 + }, + "roundtrip": { + "p50": 170.71999609470367, + "p90": 181.21600151062012, + "p95": 182.91200697422028, + "p99": 186.81600689888 + }, + "isolatedSum": { + "p50": 200.31999796628952, + "p90": 216.92800521850586, + "p95": 219.36000138521194, + "p99": 232.06399381160736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.29599809646606, + "p90": 130.87999820709229, + "p95": 133.5040032863617, + "p99": 139.93600010871887 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 119.58400160074234, + "p95": 119.99999731779099, + "p99": 122.3360002040863 + }, + "roundtrip": { + "p50": 197.56799936294556, + "p90": 215.80800414085388, + "p95": 217.92000532150269, + "p99": 219.80799734592438 + }, + "isolatedSum": { + "p50": 217.56799519062042, + "p90": 250.46399980783463, + "p95": 253.50400060415268, + "p99": 262.2720003128052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4d84166e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:26:50.881953+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "unknown", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247565431", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247565431", + "createdAt": "2026-06-26T15:22:16Z", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.4079966545105, + "p90": 298.2720136642456, + "p95": 307.0400059223175, + "p99": 323.61599802970886 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 171.9360053539276, + "p95": 204.96000349521637, + "p99": 212.8639966249466 + }, + "roundtrip": { + "p50": 150.62400698661804, + "p90": 249.439999461174, + "p95": 253.53598594665527, + "p99": 263.0079984664917 + }, + "isolatedSum": { + "p50": 178.49599570035934, + "p90": 470.2080190181732, + "p95": 512.0000094175339, + "p99": 536.4799946546555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 97.02400118112564, + "p90": 167.93599724769592, + "p95": 188.51199746131897, + "p99": 195.42400538921356 + }, + "combine": { + "p50": 82.0159986615181, + "p90": 120.12799829244614, + "p95": 129.37599420547485, + "p99": 155.008003115654 + }, + "roundtrip": { + "p50": 151.93599462509155, + "p90": 232.2559952735901, + "p95": 258.9440047740936, + "p99": 280.8319926261902 + }, + "isolatedSum": { + "p50": 179.03999984264374, + "p90": 288.06399554014206, + "p95": 317.8879916667938, + "p99": 350.43200850486755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 102.39999741315842, + "p90": 305.4080009460449, + "p95": 323.3279883861542, + "p99": 333.6640000343323 + }, + "combine": { + "p50": 82.07999914884567, + "p90": 138.7840062379837, + "p95": 148.67199957370758, + "p99": 171.77599668502808 + }, + "roundtrip": { + "p50": 155.61600029468536, + "p90": 289.8240089416504, + "p95": 331.6799998283386, + "p99": 397.3439931869507 + }, + "isolatedSum": { + "p50": 184.4799965620041, + "p90": 444.1920071840286, + "p95": 471.99998795986176, + "p99": 505.43999671936035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.37599641084671, + "p90": 105.12000322341919, + "p95": 109.69600081443787, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 81.727996468544, + "p90": 83.36000144481659, + "p95": 87.45600283145905, + "p99": 90.30400216579437 + }, + "roundtrip": { + "p50": 154.81600165367126, + "p90": 297.88801074028015, + "p95": 379.2319893836975, + "p99": 438.4320080280304 + }, + "isolatedSum": { + "p50": 179.10399287939072, + "p90": 188.48000466823578, + "p95": 197.1520036458969, + "p99": 208.48000049591064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.33600372076035, + "p90": 176.70400440692902, + "p95": 189.37599658966064, + "p99": 203.8400024175644 + }, + "combine": { + "p50": 83.10399949550629, + "p90": 112.64000087976456, + "p95": 115.29599875211716, + "p99": 132.51200318336487 + }, + "roundtrip": { + "p50": 157.98400342464447, + "p90": 187.16800212860107, + "p95": 204.8639953136444, + "p99": 235.9360009431839 + }, + "isolatedSum": { + "p50": 181.44000321626663, + "p90": 289.3440052866936, + "p95": 304.6719953417778, + "p99": 336.35200560092926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-85608159", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.35|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:27:03.132747+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.35, + "achievedFraction": 0.3485, + "configuredUnits": 46, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254279368", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254279368", + "createdAt": "2026-06-26T17:26:09Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.63999825716019, + "p90": 102.84800082445145, + "p95": 105.18400371074677, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 81.37600123882294, + "p95": 81.85599744319916, + "p99": 88.44800293445587 + }, + "roundtrip": { + "p50": 146.464005112648, + "p90": 155.35999834537506, + "p95": 157.60000050067902, + "p99": 163.35999965667725 + }, + "isolatedSum": { + "p50": 175.80799758434296, + "p90": 184.22400206327438, + "p95": 187.04000115394592, + "p99": 199.583999812603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.16000270843506, + "p90": 136.63999736309052, + "p95": 139.77600634098053, + "p99": 245.728000998497 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 82.62400329113007, + "p95": 86.62399649620056, + "p99": 89.37600255012512 + }, + "roundtrip": { + "p50": 151.71200037002563, + "p90": 158.27199816703796, + "p95": 160.09600460529327, + "p99": 165.47200083732605 + }, + "isolatedSum": { + "p50": 180.86400628089905, + "p90": 219.26400065422058, + "p95": 226.4000028371811, + "p99": 335.10400354862213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.85600072145462, + "p90": 138.72000575065613, + "p95": 140.99200069904327, + "p99": 146.7839926481247 + }, + "combine": { + "p50": 88.99199962615967, + "p90": 104.06400263309479, + "p95": 104.25599664449692, + "p99": 111.455999314785 + }, + "roundtrip": { + "p50": 158.49600732326508, + "p90": 194.84800100326538, + "p95": 196.99199497699738, + "p99": 201.37600600719452 + }, + "isolatedSum": { + "p50": 190.8480003476143, + "p90": 242.78400838375092, + "p95": 245.2479973435402, + "p99": 258.2399919629097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.74400281906128, + "p90": 127.3919939994812, + "p95": 128.89599800109863, + "p99": 132.9279989004135 + }, + "combine": { + "p50": 113.18399757146835, + "p90": 116.19199812412262, + "p95": 117.44000017642975, + "p99": 120.67200243473053 + }, + "roundtrip": { + "p50": 202.27199792861938, + "p90": 208.54400098323822, + "p95": 233.95200073719025, + "p99": 249.79199469089508 + }, + "isolatedSum": { + "p50": 232.92800039052963, + "p90": 243.58399212360382, + "p95": 246.33599817752838, + "p99": 253.60000133514404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3752524d", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.6|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:39.045176+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.6, + "achievedFraction": 0.5985, + "configuredUnits": 79, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254286950", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254286950", + "createdAt": "2026-06-26T17:26:18Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.28800302743912, + "p90": 103.55199873447418, + "p95": 105.66399991512299, + "p99": 108.51199924945831 + }, + "combine": { + "p50": 79.1039988398552, + "p90": 81.37600123882294, + "p95": 84.89599823951721, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 146.27200365066528, + "p90": 156.38400614261627, + "p95": 161.82400286197662, + "p99": 219.2319929599762 + }, + "isolatedSum": { + "p50": 175.3920018672943, + "p90": 184.92799997329712, + "p95": 190.5599981546402, + "p99": 198.43199849128723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.70399874448776, + "p90": 102.30399668216705, + "p95": 104.51199859380722, + "p99": 112.22399771213531 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 87.3280018568039, + "p95": 87.80799806118011, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 153.3759981393814, + "p90": 161.21600568294525, + "p95": 162.56000101566315, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 176.28800123929977, + "p90": 189.63199853897095, + "p95": 192.31999665498734, + "p99": 202.17599719762802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.88000106811523, + "p90": 106.81600123643875, + "p95": 109.0560033917427, + "p99": 114.3679991364479 + }, + "combine": { + "p50": 87.99999952316284, + "p90": 95.48799693584442, + "p95": 96.22400254011154, + "p99": 119.1679984331131 + }, + "roundtrip": { + "p50": 161.95200383663177, + "p90": 170.0800061225891, + "p95": 172.5119948387146, + "p99": 460.7999920845032 + }, + "isolatedSum": { + "p50": 190.88000059127808, + "p90": 202.30399817228317, + "p95": 205.28000593185425, + "p99": 233.535997569561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.08799946308136, + "p90": 135.80800592899323, + "p95": 137.56799697875977, + "p99": 142.14399456977844 + }, + "combine": { + "p50": 113.27999830245972, + "p90": 120.44800072908401, + "p95": 120.67200243473053, + "p99": 123.74400347471237 + }, + "roundtrip": { + "p50": 211.5200012922287, + "p90": 218.176007270813, + "p95": 219.64800357818604, + "p99": 223.68000447750092 + }, + "isolatedSum": { + "p50": 242.36799776554108, + "p90": 256.25600665807724, + "p95": 258.2399994134903, + "p99": 265.8879980444908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7db267e7", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", + "colorKey": "h100_716e65b9", + "comparisonKey": "259b0e9f1092ac0e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:32:00.320566+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254367516", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", + "createdAt": "2026-06-26T17:27:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.93600034713745, + "p90": 103.00800204277039, + "p95": 104.38399761915207, + "p99": 107.64800012111664 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 87.93599903583527, + "p95": 88.60799670219421, + "p99": 90.36800265312195 + }, + "roundtrip": { + "p50": 151.2639969587326, + "p90": 158.9760035276413, + "p95": 160.73599457740784, + "p99": 164.06400501728058 + }, + "isolatedSum": { + "p50": 177.0239993929863, + "p90": 190.94400107860565, + "p95": 192.99199432134628, + "p99": 198.0160027742386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.23999905586243, + "p90": 96.79999947547913, + "p95": 100.00000149011612, + "p99": 103.7760004401207 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 87.64799684286118, + "p95": 88.54400366544724, + "p99": 89.66399729251862 + }, + "roundtrip": { + "p50": 127.32799351215363, + "p90": 158.1439971923828, + "p95": 159.32799875736237, + "p99": 162.52799332141876 + }, + "isolatedSum": { + "p50": 148.22399616241455, + "p90": 184.4479963183403, + "p95": 188.54400515556335, + "p99": 193.4399977326393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.87999647855759, + "p90": 99.5199978351593, + "p95": 103.20000350475311, + "p99": 106.62399977445602 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 87.74399757385254, + "p95": 88.06400001049042, + "p99": 88.76799792051315 + }, + "roundtrip": { + "p50": 127.80800461769104, + "p90": 156.3519984483719, + "p95": 158.81599485874176, + "p99": 162.33600676059723 + }, + "isolatedSum": { + "p50": 148.83199334144592, + "p90": 187.26399540901184, + "p95": 191.26400351524353, + "p99": 195.39199769496918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.36800330877304, + "p90": 100.09600222110748, + "p95": 101.95200145244598, + "p99": 107.4879989027977 + }, + "combine": { + "p50": 80.92799782752991, + "p90": 88.03199976682663, + "p95": 88.86399865150452, + "p99": 89.79199826717377 + }, + "roundtrip": { + "p50": 149.85600113868713, + "p90": 156.95999562740326, + "p95": 158.1760048866272, + "p99": 161.98399662971497 + }, + "isolatedSum": { + "p50": 175.29600113630295, + "p90": 188.1280019879341, + "p95": 190.8160001039505, + "p99": 197.27999716997147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 94.36800330877304, + "p90": 104.80000078678131, + "p95": 106.78400099277496, + "p99": 115.00799655914307 + }, + "combine": { + "p50": 86.59200370311737, + "p90": 88.76799792051315, + "p95": 89.56799656152725, + "p99": 96.83199971914291 + }, + "roundtrip": { + "p50": 150.11200308799744, + "p90": 161.50400042533875, + "p95": 166.24000668525696, + "p99": 490.62401056289673 + }, + "isolatedSum": { + "p50": 180.9600070118904, + "p90": 193.56799870729446, + "p95": 196.35199755430222, + "p99": 211.83999627828598 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 87.0399996638298, + "p90": 106.04800283908844, + "p95": 110.1439967751503, + "p99": 123.83999675512314 + }, + "combine": { + "p50": 82.5280025601387, + "p90": 96.3200032711029, + "p95": 96.73599898815155, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 143.5839980840683, + "p90": 166.55999422073364, + "p95": 168.7680035829544, + "p99": 175.55199563503265 + }, + "isolatedSum": { + "p50": 169.5680022239685, + "p90": 202.36800611019135, + "p95": 206.87999576330185, + "p99": 221.40799462795258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 116.92799627780914, + "p90": 126.3359934091568, + "p95": 128.63999605178833, + "p99": 132.6719969511032 + }, + "combine": { + "p50": 104.19200360774994, + "p90": 112.06399649381638, + "p95": 112.99200356006622, + "p99": 113.76000195741653 + }, + "roundtrip": { + "p50": 190.49599766731262, + "p90": 199.74400103092194, + "p95": 202.36800611019135, + "p99": 204.76800203323364 + }, + "isolatedSum": { + "p50": 221.11999988555908, + "p90": 238.39998990297318, + "p95": 241.63199961185455, + "p99": 246.43199890851974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.85600531101227, + "p90": 152.96000242233276, + "p95": 154.78399395942688, + "p99": 158.87999534606934 + }, + "combine": { + "p50": 121.2799996137619, + "p90": 129.43999469280243, + "p95": 130.3360015153885, + "p99": 145.34400403499603 + }, + "roundtrip": { + "p50": 226.8799990415573, + "p90": 240.31999707221985, + "p95": 242.01600253582, + "p99": 245.02399563789368 + }, + "isolatedSum": { + "p50": 251.13600492477417, + "p90": 282.3999971151352, + "p95": 285.11999547481537, + "p99": 304.22399938106537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c5b168ae", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", + "colorKey": "h100_f7ec28aa", + "comparisonKey": "9896b8e4d81bc6a5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:32:03.917674+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254376151", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", + "createdAt": "2026-06-26T17:28:02Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.89600020647049, + "p90": 104.032002389431, + "p95": 106.04800283908844, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 80.03199845552444, + "p95": 81.31200075149536, + "p99": 82.68799632787704 + }, + "roundtrip": { + "p50": 145.82400023937225, + "p90": 153.76000106334686, + "p95": 160.0639969110489, + "p99": 226.30399465560913 + }, + "isolatedSum": { + "p50": 171.26400023698807, + "p90": 184.06400084495544, + "p95": 187.3600035905838, + "p99": 193.7279999256134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.72000205516815, + "p90": 103.93600165843964, + "p95": 105.18400371074677, + "p99": 113.63200098276138 + }, + "combine": { + "p50": 71.35999947786331, + "p90": 80.32000064849854, + "p95": 81.18399977684021, + "p99": 88.16000074148178 + }, + "roundtrip": { + "p50": 126.68800354003906, + "p90": 152.5759994983673, + "p95": 155.32800555229187, + "p99": 159.29600596427917 + }, + "isolatedSum": { + "p50": 142.08000153303146, + "p90": 184.25600230693817, + "p95": 186.36800348758698, + "p99": 201.79200172424316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.14399766921997, + "p90": 100.28800368309021, + "p95": 102.55999863147736, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 79.55200225114822, + "p95": 79.74400371313095, + "p99": 84.22400057315826 + }, + "roundtrip": { + "p50": 127.77599692344666, + "p90": 153.50399911403656, + "p95": 155.2640050649643, + "p99": 160.73599457740784 + }, + "isolatedSum": { + "p50": 141.75999909639359, + "p90": 179.84000593423843, + "p95": 182.3040023446083, + "p99": 215.93600511550903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.97600048780441, + "p90": 100.832000374794, + "p95": 102.30399668216705, + "p99": 114.3999993801117 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 81.18399977684021, + "p95": 81.7599967122078, + "p99": 86.94399893283844 + }, + "roundtrip": { + "p50": 125.31200051307678, + "p90": 153.05599570274353, + "p95": 156.0640037059784, + "p99": 159.42400693893433 + }, + "isolatedSum": { + "p50": 166.49600118398666, + "p90": 182.01600015163422, + "p95": 184.06399339437485, + "p99": 201.34399831295013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.551997423172, + "p90": 100.89600086212158, + "p95": 103.26399654150009, + "p99": 112.31999844312668 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 86.87999844551086, + "p95": 87.71199733018875, + "p99": 88.22400122880936 + }, + "roundtrip": { + "p50": 149.79200065135956, + "p90": 158.24000537395477, + "p95": 160.0320041179657, + "p99": 165.69599509239197 + }, + "isolatedSum": { + "p50": 175.03999918699265, + "p90": 187.77599930763245, + "p95": 190.97599387168884, + "p99": 200.54399967193604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.16799998283386, + "p90": 99.96800124645233, + "p95": 104.96000200510025, + "p99": 109.11999642848969 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 87.93599903583527, + "p95": 89.28000181913376, + "p99": 95.39200365543365 + }, + "roundtrip": { + "p50": 135.26399433612823, + "p90": 159.19999778270721, + "p95": 161.72799468040466, + "p99": 166.6560024023056 + }, + "isolatedSum": { + "p50": 163.03999722003937, + "p90": 187.9040002822876, + "p95": 194.240003824234, + "p99": 204.51200008392334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.832000374794, + "p90": 114.68800157308578, + "p95": 116.67200177907944, + "p99": 134.91199910640717 + }, + "combine": { + "p50": 90.27200192213058, + "p90": 103.32799702882767, + "p95": 104.16000336408615, + "p99": 152.12799608707428 + }, + "roundtrip": { + "p50": 164.70399498939514, + "p90": 182.8480064868927, + "p95": 186.49600446224213, + "p99": 189.40800428390503 + }, + "isolatedSum": { + "p50": 191.1040022969246, + "p90": 218.01599860191345, + "p95": 220.8320051431656, + "p99": 287.03999519348145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.31199985742569, + "p90": 139.67999815940857, + "p95": 144.57599818706512, + "p99": 150.87999403476715 + }, + "combine": { + "p50": 112.99200356006622, + "p90": 120.64000219106674, + "p95": 120.80000340938568, + "p99": 128.51199507713318 + }, + "roundtrip": { + "p50": 212.67199516296387, + "p90": 228.4799963235855, + "p95": 230.0799936056137, + "p99": 235.74399948120117 + }, + "isolatedSum": { + "p50": 234.3040034174919, + "p90": 260.3200003504753, + "p95": 265.3760015964508, + "p99": 279.39198911190033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cf899bce", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", + "colorKey": "h100_93503624", + "comparisonKey": "74d307ed048ea3b5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:46:24.194442+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255296001", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", + "createdAt": "2026-06-26T17:45:26Z", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.72800195217133, + "p90": 75.83999633789062, + "p95": 77.85599678754807, + "p99": 83.39200168848038 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 73.40800017118454, + "p95": 74.0479975938797, + "p99": 78.87999713420868 + }, + "roundtrip": { + "p50": 121.85599654912949, + "p90": 128.12800705432892, + "p95": 130.3039938211441, + "p99": 134.71999764442444 + }, + "isolatedSum": { + "p50": 140.99200069904327, + "p90": 149.24799650907516, + "p95": 151.90399438142776, + "p99": 162.27199882268906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.3359991312027, + "p90": 76.25599950551987, + "p95": 78.59200239181519, + "p99": 84.6719965338707 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 73.53600114583969, + "p95": 74.27199929952621, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 127.20000743865967, + "p90": 131.00799918174744, + "p95": 133.27999413013458, + "p99": 138.08000087738037 + }, + "isolatedSum": { + "p50": 141.50399714708328, + "p90": 149.79200065135956, + "p95": 152.8640016913414, + "p99": 164.47999328374863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.18399846553802, + "p90": 102.14400291442871, + "p95": 105.50399869680405, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 81.82399719953537, + "p95": 87.10400015115738, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 131.8719983100891, + "p90": 160.3199988603592, + "p95": 162.88000345230103, + "p99": 167.1680063009262 + }, + "isolatedSum": { + "p50": 146.59199863672256, + "p90": 183.96800011396408, + "p95": 192.60799884796143, + "p99": 197.40799814462662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.30399888753891, + "p90": 78.20799946784973, + "p95": 81.02399855852127, + "p99": 89.4400030374527 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 73.91999661922455, + "p95": 74.27199929952621, + "p99": 79.58400249481201 + }, + "roundtrip": { + "p50": 128.67200374603271, + "p90": 132.83200562000275, + "p95": 135.0719928741455, + "p99": 140.22399485111237 + }, + "isolatedSum": { + "p50": 143.0080011487007, + "p90": 152.12799608707428, + "p95": 155.29599785804749, + "p99": 169.0240055322647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.97599852085114, + "p90": 102.14400291442871, + "p95": 104.70400005578995, + "p99": 109.56799983978271 + }, + "combine": { + "p50": 74.30399954319, + "p90": 87.87199854850769, + "p95": 89.12000060081482, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 132.4480026960373, + "p90": 161.47199273109436, + "p95": 163.26400637626648, + "p99": 166.9120043516159 + }, + "isolatedSum": { + "p50": 157.27999806404114, + "p90": 190.0160014629364, + "p95": 193.82400065660477, + "p99": 199.51999932527542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.40800148248672, + "p90": 103.80800068378448, + "p95": 105.3759977221489, + "p99": 108.0000028014183 + }, + "combine": { + "p50": 79.77599650621414, + "p90": 90.08000046014786, + "p95": 90.71999788284302, + "p99": 247.67999351024628 + }, + "roundtrip": { + "p50": 138.17599415779114, + "p90": 156.3519984483719, + "p95": 159.7760021686554, + "p99": 163.83999586105347 + }, + "isolatedSum": { + "p50": 161.18399798870087, + "p90": 193.88800114393234, + "p95": 196.0959956049919, + "p99": 355.6799963116646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.74399954080582, + "p90": 115.35999923944473, + "p95": 117.37599968910217, + "p99": 125.2799928188324 + }, + "combine": { + "p50": 90.55999666452408, + "p90": 103.61599922180176, + "p95": 104.19200360774994, + "p99": 104.8320010304451 + }, + "roundtrip": { + "p50": 163.87200355529785, + "p90": 178.0479997396469, + "p95": 180.2240014076233, + "p99": 185.47199666500092 + }, + "isolatedSum": { + "p50": 190.3039962053299, + "p90": 218.9759984612465, + "p95": 221.5680032968521, + "p99": 230.1119938492775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.66400122642517, + "p90": 132.22399353981018, + "p95": 133.88800621032715, + "p99": 139.64800536632538 + }, + "combine": { + "p50": 106.59199953079224, + "p90": 114.75200206041336, + "p95": 119.99999731779099, + "p99": 121.91999703645706 + }, + "roundtrip": { + "p50": 198.91199469566345, + "p90": 213.69600296020508, + "p95": 216.0319983959198, + "p99": 220.60799598693848 + }, + "isolatedSum": { + "p50": 220.2560007572174, + "p90": 246.97599560022354, + "p95": 253.88800352811813, + "p99": 261.56800240278244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4eb12954", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_5df912ff", + "comparisonKey": "5074d4febd922e2d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:28:11.272284+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254332840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", + "createdAt": "2026-06-26T17:27:12Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.91199737787247, + "p90": 85.21600067615509, + "p95": 87.20000088214874, + "p99": 93.34400296211243 + }, + "combine": { + "p50": 79.68000322580338, + "p90": 81.60000294446945, + "p95": 86.91199868917465, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 133.69600474834442, + "p90": 141.184002161026, + "p95": 143.2960033416748, + "p99": 151.48800611495972 + }, + "isolatedSum": { + "p50": 158.59200060367584, + "p90": 166.81600362062454, + "p95": 174.1119995713234, + "p99": 181.88800662755966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 62.39999830722809, + "p90": 84.35200154781342, + "p95": 87.00799942016602, + "p99": 96.57599776983261 + }, + "combine": { + "p50": 71.99999690055847, + "p90": 81.02399855852127, + "p95": 81.44000172615051, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 116.7680025100708, + "p90": 140.00000059604645, + "p95": 141.6960060596466, + "p99": 143.96800100803375 + }, + "isolatedSum": { + "p50": 134.39999520778656, + "p90": 165.3760001063347, + "p95": 168.44800114631653, + "p99": 184.38399583101273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 62.01599910855293, + "p90": 82.56000280380249, + "p95": 84.76799726486206, + "p99": 91.90399944782257 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 86.94399893283844, + "p95": 87.61599659919739, + "p99": 88.22400122880936 + }, + "roundtrip": { + "p50": 116.57600104808807, + "p90": 143.13599467277527, + "p95": 144.96000111103058, + "p99": 189.40800428390503 + }, + "isolatedSum": { + "p50": 134.91200283169746, + "p90": 169.50400173664093, + "p95": 172.38399386405945, + "p99": 180.12800067663193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 66.78400188684464, + "p90": 82.46400207281113, + "p95": 85.1840004324913, + "p99": 90.65599739551544 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 86.87999844551086, + "p95": 87.55200356245041, + "p99": 88.57599645853043 + }, + "roundtrip": { + "p50": 116.67200177907944, + "p90": 142.4960047006607, + "p95": 143.64799857139587, + "p99": 149.1200029850006 + }, + "isolatedSum": { + "p50": 139.80799913406372, + "p90": 169.344000518322, + "p95": 172.7360039949417, + "p99": 179.23199385404587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.97599786520004, + "p90": 84.83199775218964, + "p95": 86.94399893283844, + "p99": 90.87999910116196 + }, + "combine": { + "p50": 80.4160013794899, + "p90": 87.99999952316284, + "p95": 88.25600147247314, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 116.73600226640701, + "p90": 140.00000059604645, + "p95": 143.23200285434723, + "p99": 146.94400131702423 + }, + "isolatedSum": { + "p50": 159.39199924468994, + "p90": 172.83199727535248, + "p95": 175.20000040531158, + "p99": 180.63999712467194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.58400183916092, + "p90": 84.6719965338707, + "p95": 86.20800077915192, + "p99": 90.97599983215332 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 88.51200342178345, + "p95": 95.10400146245956, + "p99": 111.77600175142288 + }, + "roundtrip": { + "p50": 143.16800236701965, + "p90": 153.28000485897064, + "p95": 154.7520011663437, + "p99": 170.6240028142929 + }, + "isolatedSum": { + "p50": 155.7760015130043, + "p90": 173.18399995565414, + "p95": 181.31200224161148, + "p99": 202.7520015835762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.15200018882751, + "p90": 96.67199850082397, + "p95": 98.30400347709656, + "p99": 158.65600109100342 + }, + "combine": { + "p50": 91.20000153779984, + "p90": 105.02400249242783, + "p95": 106.04800283908844, + "p99": 127.87200510501862 + }, + "roundtrip": { + "p50": 151.8079936504364, + "p90": 167.67999529838562, + "p95": 172.06400632858276, + "p99": 198.2399970293045 + }, + "isolatedSum": { + "p50": 176.35200172662735, + "p90": 201.6960009932518, + "p95": 204.352006316185, + "p99": 286.52800619602203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.73600029945374, + "p90": 119.64800208806992, + "p95": 128.03199887275696, + "p99": 401.43999457359314 + }, + "combine": { + "p50": 106.49599879980087, + "p90": 120.83200365304947, + "p95": 121.47200107574463, + "p99": 128.00000607967377 + }, + "roundtrip": { + "p50": 187.45599687099457, + "p90": 201.34399831295013, + "p95": 202.55999267101288, + "p99": 206.68800175189972 + }, + "isolatedSum": { + "p50": 211.2319990992546, + "p90": 240.48000574111938, + "p95": 249.5039999485016, + "p99": 529.4400006532669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-76b84ec2", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_17694d2c", + "comparisonKey": "d31efe4aa43e0223", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:47:16.080205+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271551406", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271551406", + "createdAt": "2026-06-26T23:46:18Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 77.7600035071373, + "p90": 84.25600081682205, + "p95": 86.496002972126, + "p99": 92.57599711418152 + }, + "combine": { + "p50": 75.9039968252182, + "p90": 81.95199817419052, + "p95": 82.40000158548355, + "p99": 87.2960016131401 + }, + "roundtrip": { + "p50": 131.45600259304047, + "p90": 136.25599443912506, + "p95": 138.59200477600098, + "p99": 142.68800616264343 + }, + "isolatedSum": { + "p50": 153.6640003323555, + "p90": 166.20799899101257, + "p95": 168.89600455760956, + "p99": 179.87199872732162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 66.23999774456024, + "p90": 80.99199831485748, + "p95": 83.13599973917007, + "p99": 87.52000331878662 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 81.85599744319916, + "p95": 82.11199939250946, + "p99": 85.91999858617783 + }, + "roundtrip": { + "p50": 115.55200070142746, + "p90": 136.06399297714233, + "p95": 137.9839926958084, + "p99": 142.4960047006607 + }, + "isolatedSum": { + "p50": 138.3039951324463, + "p90": 162.84799575805664, + "p95": 165.24799913167953, + "p99": 173.44000190496445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.60000228881836, + "p90": 81.69600367546082, + "p95": 83.93599838018417, + "p99": 89.02399986982346 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 82.20800012350082, + "p95": 83.16799998283386, + "p99": 87.2960016131401 + }, + "roundtrip": { + "p50": 133.82400572299957, + "p90": 140.86399972438812, + "p95": 143.10400187969208, + "p99": 149.72800016403198 + }, + "isolatedSum": { + "p50": 157.1200042963028, + "p90": 163.90400379896164, + "p95": 167.10399836301804, + "p99": 176.32000148296356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.66400277614594, + "p90": 83.13599973917007, + "p95": 87.8399983048439, + "p99": 131.67999684810638 + }, + "combine": { + "p50": 81.216000020504, + "p90": 82.71999657154083, + "p95": 84.03199911117554, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 134.68800485134125, + "p90": 139.55199718475342, + "p95": 142.752006649971, + "p99": 145.56799829006195 + }, + "isolatedSum": { + "p50": 158.88000279664993, + "p90": 165.8559963107109, + "p95": 171.87199741601944, + "p99": 221.8879982829094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.79199630022049, + "p90": 81.66400343179703, + "p95": 84.73599702119827, + "p99": 87.23200112581253 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 84.79999750852585, + "p95": 88.95999938249588, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 135.29600203037262, + "p90": 143.5839980840683, + "p95": 144.96000111103058, + "p99": 150.30400454998016 + }, + "isolatedSum": { + "p50": 159.4879999756813, + "p90": 166.46400094032288, + "p95": 173.69599640369415, + "p99": 177.50400304794312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.42400193214417, + "p90": 88.3840024471283, + "p95": 89.28000181913376, + "p99": 95.20000219345093 + }, + "combine": { + "p50": 81.44000172615051, + "p90": 89.9839997291565, + "p95": 90.27200192213058, + "p99": 92.47999638319016 + }, + "roundtrip": { + "p50": 129.18399274349213, + "p90": 144.51199769973755, + "p95": 147.0080018043518, + "p99": 152.73599326610565 + }, + "isolatedSum": { + "p50": 164.86400365829468, + "p90": 178.3680021762848, + "p95": 179.55200374126434, + "p99": 187.67999857664108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.56799721717834, + "p90": 101.40799731016159, + "p95": 102.36799716949463, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 94.81599926948547, + "p90": 99.61599856615067, + "p95": 102.33599692583084, + "p99": 105.82400113344193 + }, + "roundtrip": { + "p50": 158.78400206565857, + "p90": 165.72800278663635, + "p95": 167.04000532627106, + "p99": 170.01600563526154 + }, + "isolatedSum": { + "p50": 188.38399648666382, + "p90": 201.02399587631226, + "p95": 204.70399409532547, + "p99": 215.29600024223328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.41599917411804, + "p90": 120.4800009727478, + "p95": 123.48800152540207, + "p99": 303.6800026893616 + }, + "combine": { + "p50": 111.90400272607803, + "p90": 117.34399944543839, + "p95": 120.03199756145477, + "p99": 125.08800625801086 + }, + "roundtrip": { + "p50": 192.80000030994415, + "p90": 199.74400103092194, + "p95": 201.9519954919815, + "p99": 206.9759964942932 + }, + "isolatedSum": { + "p50": 224.32000190019608, + "p90": 237.8240004181862, + "p95": 243.51999908685684, + "p99": 428.76800894737244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6f4d88a5", + "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_8abde1a9", + "comparisonKey": "a63125ec759ccc03", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:24.132792+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271587010", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271587010", + "createdAt": "2026-06-26T23:47:25Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 47.29599878191948, + "p90": 48.70399832725525, + "p95": 49.02400076389313, + "p99": 54.75199967622757 + }, + "combine": { + "p50": 36.57599911093712, + "p90": 37.408001720905304, + "p95": 38.59199956059456, + "p99": 44.60800066590309 + }, + "roundtrip": { + "p50": 58.97599831223488, + "p90": 66.6240006685257, + "p95": 67.1359971165657, + "p99": 67.6800012588501 + }, + "isolatedSum": { + "p50": 83.8719978928566, + "p90": 86.11200004816055, + "p95": 87.61600032448769, + "p99": 99.36000034213066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 40.32000154256821, + "p90": 48.51200059056282, + "p95": 48.73599857091904, + "p99": 53.82400006055832 + }, + "combine": { + "p50": 35.77600046992302, + "p90": 37.02399879693985, + "p95": 38.94399851560593, + "p99": 44.47999969124794 + }, + "roundtrip": { + "p50": 56.57599866390228, + "p90": 65.05600363016129, + "p95": 66.27199798822403, + "p99": 67.07199662923813 + }, + "isolatedSum": { + "p50": 76.09600201249123, + "p90": 85.53599938750267, + "p95": 87.67999708652496, + "p99": 98.30399975180626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.27200150489807, + "p90": 48.70399832725525, + "p95": 49.056001007556915, + "p99": 55.39200082421303 + }, + "combine": { + "p50": 36.70400008559227, + "p90": 37.50399872660637, + "p95": 43.07200014591217, + "p99": 45.05600035190582 + }, + "roundtrip": { + "p50": 59.167999774217606, + "p90": 66.880002617836, + "p95": 67.45599955320358, + "p99": 68.57600063085556 + }, + "isolatedSum": { + "p50": 78.97600159049034, + "p90": 86.20799705386162, + "p95": 92.12800115346909, + "p99": 100.44800117611885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 47.359999269247055, + "p90": 48.70399832725525, + "p95": 48.895999789237976, + "p99": 55.26399984955788 + }, + "combine": { + "p50": 36.57599911093712, + "p90": 43.2640016078949, + "p95": 43.776001781225204, + "p99": 45.024000108242035 + }, + "roundtrip": { + "p50": 64.67200070619583, + "p90": 67.10399687290192, + "p95": 67.29599833488464, + "p99": 69.47200000286102 + }, + "isolatedSum": { + "p50": 83.93599838018417, + "p90": 91.96799993515015, + "p95": 92.67200157046318, + "p99": 100.28799995779991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 48.448000103235245, + "p90": 55.64799904823303, + "p95": 56.2559999525547, + "p99": 56.89600110054016 + }, + "combine": { + "p50": 43.776001781225204, + "p90": 44.73600164055824, + "p95": 44.89599913358688, + "p99": 48.22399839758873 + }, + "roundtrip": { + "p50": 66.880002617836, + "p90": 73.82400333881378, + "p95": 74.68800246715546, + "p99": 75.29599964618683 + }, + "isolatedSum": { + "p50": 92.22400188446045, + "p90": 100.38400068879128, + "p95": 101.15199908614159, + "p99": 105.11999949812889 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 49.60000142455101, + "p90": 56.8000003695488, + "p95": 57.08799883723259, + "p99": 59.167999774217606 + }, + "combine": { + "p50": 51.00800096988678, + "p90": 52.86400020122528, + "p95": 53.0879981815815, + "p99": 53.98400127887726 + }, + "roundtrip": { + "p50": 75.39200037717819, + "p90": 83.26400071382523, + "p95": 83.74399691820145, + "p99": 84.63999629020691 + }, + "isolatedSum": { + "p50": 100.60800239443779, + "p90": 109.66400057077408, + "p95": 110.17599701881409, + "p99": 113.15200105309486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 64.92800265550613, + "p90": 67.45599955320358, + "p95": 72.41600006818771, + "p99": 74.0479975938797 + }, + "combine": { + "p50": 61.055999249219894, + "p90": 63.1679967045784, + "p95": 68.54400038719177, + "p99": 77.18399912118912 + }, + "roundtrip": { + "p50": 105.76000064611435, + "p90": 108.67200046777725, + "p95": 109.18399691581726, + "p99": 113.69600147008896 + }, + "isolatedSum": { + "p50": 125.98400190472603, + "p90": 130.62399625778198, + "p95": 140.9600004553795, + "p99": 151.23199671506882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.04799956083298, + "p90": 91.71199798583984, + "p95": 92.83199906349182, + "p99": 94.62399780750275 + }, + "combine": { + "p50": 94.36800330877304, + "p90": 96.79999947547913, + "p95": 97.82399982213974, + "p99": 218.78400444984436 + }, + "roundtrip": { + "p50": 152.8960019350052, + "p90": 158.91200304031372, + "p95": 159.67999398708344, + "p99": 163.2000058889389 + }, + "isolatedSum": { + "p50": 180.41600286960602, + "p90": 188.51199746131897, + "p95": 190.65599888563156, + "p99": 313.4080022573471 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fecf5035", + "identity": "h100|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_45e1ef29", + "comparisonKey": "b17b52153b29fbde", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:28.951078+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271590306", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271590306", + "createdAt": "2026-06-26T23:47:32Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.17600077390671, + "p90": 48.928000032901764, + "p95": 49.8879998922348, + "p99": 51.77599936723709 + }, + "combine": { + "p50": 36.99199855327606, + "p90": 38.176000118255615, + "p95": 38.40000182390213, + "p99": 44.03200000524521 + }, + "roundtrip": { + "p50": 59.42400172352791, + "p90": 61.216000467538834, + "p95": 61.63199990987778, + "p99": 69.31199878454208 + }, + "isolatedSum": { + "p50": 79.16799932718277, + "p90": 87.10400015115738, + "p95": 88.28800171613693, + "p99": 95.8079993724823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.30400174856186, + "p90": 49.375999718904495, + "p95": 49.95200037956238, + "p99": 51.80799961090088 + }, + "combine": { + "p50": 38.11199963092804, + "p90": 39.0079990029335, + "p95": 39.84000161290169, + "p99": 45.9199994802475 + }, + "roundtrip": { + "p50": 60.47999858856201, + "p90": 61.69600039720535, + "p95": 63.90400230884552, + "p99": 69.21599805355072 + }, + "isolatedSum": { + "p50": 80.4160013794899, + "p90": 88.383998721838, + "p95": 89.79200199246407, + "p99": 97.72799909114838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.367998510599136, + "p90": 49.855999648571014, + "p95": 50.20799860358238, + "p99": 57.95200169086456 + }, + "combine": { + "p50": 37.47199848294258, + "p90": 38.7520007789135, + "p95": 39.03999924659729, + "p99": 46.30399867892265 + }, + "roundtrip": { + "p50": 59.26400050520897, + "p90": 61.983998864889145, + "p95": 63.19999694824219, + "p99": 69.50400024652481 + }, + "isolatedSum": { + "p50": 79.83999699354172, + "p90": 88.60800042748451, + "p95": 89.24799785017967, + "p99": 104.25600036978722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.81599819660187, + "p90": 49.247998744249344, + "p95": 49.855999648571014, + "p99": 51.42400041222572 + }, + "combine": { + "p50": 37.9519984126091, + "p90": 38.784001022577286, + "p95": 40.352001786231995, + "p99": 46.39999940991402 + }, + "roundtrip": { + "p50": 60.63999980688095, + "p90": 68.35199892520905, + "p95": 68.80000233650208, + "p99": 69.88800317049026 + }, + "isolatedSum": { + "p50": 80.76799660921097, + "p90": 88.03199976682663, + "p95": 90.20800143480301, + "p99": 97.82399982213974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 43.74400153756142, + "p90": 50.23999884724617, + "p95": 50.84799975156784, + "p99": 57.18399956822395 + }, + "combine": { + "p50": 38.2080003619194, + "p90": 45.791998505592346, + "p95": 46.08000069856644, + "p99": 49.056001007556915 + }, + "roundtrip": { + "p50": 66.91200286149979, + "p90": 69.15199756622314, + "p95": 69.98399645090103, + "p99": 76.7040029168129 + }, + "isolatedSum": { + "p50": 81.95200189948082, + "p90": 96.03199735283852, + "p95": 96.92800045013428, + "p99": 106.24000057578087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 50.464000552892685, + "p90": 52.352000027894974, + "p95": 57.023998349905014, + "p99": 59.90400165319443 + }, + "combine": { + "p50": 46.68800160288811, + "p90": 48.128001391887665, + "p95": 49.056001007556915, + "p99": 54.84800040721893 + }, + "roundtrip": { + "p50": 76.76800340414047, + "p90": 84.44800227880478, + "p95": 85.21600067615509, + "p99": 86.30400151014328 + }, + "isolatedSum": { + "p50": 97.15200215578079, + "p90": 100.48000141978264, + "p95": 106.07999935746193, + "p99": 114.75200206041336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 61.69600039720535, + "p90": 66.6240006685257, + "p95": 67.55200028419495, + "p99": 73.7600028514862 + }, + "combine": { + "p50": 62.17600032687187, + "p90": 63.551999628543854, + "p95": 64.06400352716446, + "p99": 70.49600034952164 + }, + "roundtrip": { + "p50": 102.11200267076492, + "p90": 109.8560020327568, + "p95": 110.27199774980545, + "p99": 111.39199882745743 + }, + "isolatedSum": { + "p50": 123.87200072407722, + "p90": 130.17600029706955, + "p95": 131.6160038113594, + "p99": 144.25600320100784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 85.56800335645676, + "p90": 89.50400352478027, + "p95": 90.14400094747543, + "p99": 95.45599669218063 + }, + "combine": { + "p50": 91.45600348711014, + "p90": 99.16800260543823, + "p95": 99.80800002813339, + "p99": 101.05600208044052 + }, + "roundtrip": { + "p50": 158.52800011634827, + "p90": 164.60800170898438, + "p95": 166.52800142765045, + "p99": 168.38400065898895 + }, + "isolatedSum": { + "p50": 177.0240068435669, + "p90": 188.6720061302185, + "p95": 189.95200097560883, + "p99": 196.51199877262115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f1655975", + "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_81ce2214", + "comparisonKey": "16f06985ac4d7bde", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:24.570568+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 LL (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254350430", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254350430", + "createdAt": "2026-06-26T17:27:32Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.49599948525429, + "p90": 50.27199909090996, + "p95": 50.87999999523163, + "p99": 57.920001447200775 + }, + "combine": { + "p50": 37.98399865627289, + "p90": 39.135999977588654, + "p95": 45.3759990632534, + "p99": 46.911999583244324 + }, + "roundtrip": { + "p50": 60.83200126886368, + "p90": 62.272001057863235, + "p95": 67.90400296449661, + "p99": 69.88800317049026 + }, + "isolatedSum": { + "p50": 80.47999814152718, + "p90": 89.40799906849861, + "p95": 96.25599905848503, + "p99": 104.8320010304451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.02400076389313, + "p90": 50.40000006556511, + "p95": 50.87999999523163, + "p99": 57.11999908089638 + }, + "combine": { + "p50": 38.2080003619194, + "p90": 38.84800150990486, + "p95": 39.64800015091896, + "p99": 45.85599899291992 + }, + "roundtrip": { + "p50": 61.216000467538834, + "p90": 67.84000247716904, + "p95": 68.9919963479042, + "p99": 69.88800317049026 + }, + "isolatedSum": { + "p50": 87.23200112581253, + "p90": 89.24800157546997, + "p95": 90.52800014615059, + "p99": 102.9759980738163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.75200143456459, + "p90": 50.04800111055374, + "p95": 50.52800104022026, + "p99": 57.88800120353699 + }, + "combine": { + "p50": 37.9519984126091, + "p90": 38.84800150990486, + "p95": 40.44799879193306, + "p99": 46.52800038456917 + }, + "roundtrip": { + "p50": 60.736000537872314, + "p90": 62.431998550891876, + "p95": 67.9360032081604, + "p99": 70.0799971818924 + }, + "isolatedSum": { + "p50": 80.70399984717369, + "p90": 88.8960026204586, + "p95": 90.97599983215332, + "p99": 104.41600158810616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 49.12000149488449, + "p90": 50.36799982190132, + "p95": 50.783999264240265, + "p99": 56.44800141453743 + }, + "combine": { + "p50": 38.2080003619194, + "p90": 39.8080013692379, + "p95": 44.89599913358688, + "p99": 46.23999819159508 + }, + "roundtrip": { + "p50": 61.08799949288368, + "p90": 68.54400038719177, + "p95": 69.023996591568, + "p99": 70.01599669456482 + }, + "isolatedSum": { + "p50": 87.3280018568039, + "p90": 90.17600119113922, + "p95": 95.67999839782715, + "p99": 102.68799960613251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 49.536000937223434, + "p90": 50.783999264240265, + "p95": 52.73599922657013, + "p99": 58.079998940229416 + }, + "combine": { + "p50": 45.24800181388855, + "p90": 46.431999653577805, + "p95": 46.68800160288811, + "p99": 48.48000034689903 + }, + "roundtrip": { + "p50": 68.67200136184692, + "p90": 70.30399888753891, + "p95": 75.42400062084198, + "p99": 77.504001557827 + }, + "isolatedSum": { + "p50": 94.78400275111198, + "p90": 97.21599891781807, + "p95": 99.42400082945824, + "p99": 106.55999928712845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 50.52800104022026, + "p90": 57.5999990105629, + "p95": 58.079998940229416, + "p99": 58.97599831223488 + }, + "combine": { + "p50": 46.592000871896744, + "p90": 53.568001836538315, + "p95": 54.207999259233475, + "p99": 55.10399863123894 + }, + "roundtrip": { + "p50": 77.56800204515457, + "p90": 85.34400165081024, + "p95": 85.79199761152267, + "p99": 86.496002972126 + }, + "isolatedSum": { + "p50": 97.120001912117, + "p90": 111.16800084710121, + "p95": 112.28799819946289, + "p99": 114.07999694347382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 66.01600348949432, + "p90": 107.35999792814255, + "p95": 108.06400328874588, + "p99": 109.40799862146378 + }, + "combine": { + "p50": 62.52799928188324, + "p90": 63.93600255250931, + "p95": 65.85600227117538, + "p99": 79.29600030183792 + }, + "roundtrip": { + "p50": 102.39999741315842, + "p90": 110.1439967751503, + "p95": 110.68800091743469, + "p99": 112.89600282907486 + }, + "isolatedSum": { + "p50": 128.54400277137756, + "p90": 171.29600048065186, + "p95": 173.92000555992126, + "p99": 188.7039989233017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.2960016131401, + "p90": 90.91199934482574, + "p95": 94.08000111579895, + "p99": 95.51999717950821 + }, + "combine": { + "p50": 88.86399865150452, + "p90": 95.64799815416336, + "p95": 96.3520035147667, + "p99": 97.43999689817429 + }, + "roundtrip": { + "p50": 153.21600437164307, + "p90": 159.90400314331055, + "p95": 160.67199409008026, + "p99": 161.95200383663177 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 186.5599974989891, + "p95": 190.43200463056564, + "p99": 192.9599940776825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1bb82fc0", + "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_97196257", + "comparisonKey": "efcc4c7d487df84c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:08.338542+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271676478", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271676478", + "createdAt": "2026-06-26T23:50:12Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 184.7359985113144, + "p90": 193.08799505233765, + "p95": 196.86399400234222, + "p99": 204.25599813461304 + }, + "combine": { + "p50": 49.79199916124344, + "p90": 51.96800082921982, + "p95": 53.79199981689453, + "p99": 56.86400085687637 + }, + "roundtrip": { + "p50": 218.9760059118271, + "p90": 226.52800381183624, + "p95": 230.0799936056137, + "p99": 235.6480062007904 + }, + "isolatedSum": { + "p50": 234.52799767255783, + "p90": 245.05599588155746, + "p95": 250.65599381923676, + "p99": 261.1199989914894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 183.87199938297272, + "p90": 192.19200313091278, + "p95": 195.16800343990326, + "p99": 201.56799256801605 + }, + "combine": { + "p50": 50.87999999523163, + "p90": 54.17599901556969, + "p95": 55.67999929189682, + "p99": 59.328000992536545 + }, + "roundtrip": { + "p50": 220.12799978256226, + "p90": 227.87199914455414, + "p95": 230.43200373649597, + "p99": 237.31200397014618 + }, + "isolatedSum": { + "p50": 234.75199937820435, + "p90": 246.36800214648247, + "p95": 250.84800273180008, + "p99": 260.8959935605526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 187.77599930763245, + "p90": 268.0320143699646, + "p95": 271.36000990867615, + "p99": 282.49600529670715 + }, + "combine": { + "p50": 52.44800075888634, + "p90": 63.90400230884552, + "p95": 64.86400216817856, + "p99": 69.76000219583511 + }, + "roundtrip": { + "p50": 225.3440022468567, + "p90": 308.9280128479004, + "p95": 312.48000264167786, + "p99": 320.5440044403076 + }, + "isolatedSum": { + "p50": 240.22400006651878, + "p90": 331.9360166788101, + "p95": 336.2240120768547, + "p99": 352.25600749254227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 184.03199315071106, + "p90": 193.31200420856476, + "p95": 197.79199361801147, + "p99": 205.9839963912964 + }, + "combine": { + "p50": 51.7439991235733, + "p90": 55.296000093221664, + "p95": 57.312000542879105, + "p99": 63.19999694824219 + }, + "roundtrip": { + "p50": 220.8320051431656, + "p90": 228.7680059671402, + "p95": 231.455996632576, + "p99": 239.55200612545013 + }, + "isolatedSum": { + "p50": 235.77599227428436, + "p90": 248.60800430178642, + "p95": 255.10399416089058, + "p99": 269.1839933395386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.96800076961517, + "p90": 273.24798703193665, + "p95": 286.6879999637604, + "p99": 400.06399154663086 + }, + "combine": { + "p50": 53.75999957323074, + "p90": 65.15199691057205, + "p95": 67.45599955320358, + "p99": 75.23199915885925 + }, + "roundtrip": { + "p50": 225.600004196167, + "p90": 310.8479976654053, + "p95": 322.6880133152008, + "p99": 449.7919976711273 + }, + "isolatedSum": { + "p50": 241.72800034284592, + "p90": 338.3999839425087, + "p95": 354.14399951696396, + "p99": 475.2959907054901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 189.11999464035034, + "p90": 271.36000990867615, + "p95": 286.9440019130707, + "p99": 324.0959942340851 + }, + "combine": { + "p50": 56.44800141453743, + "p90": 68.57600063085556, + "p95": 69.11999732255936, + "p99": 73.56800138950348 + }, + "roundtrip": { + "p50": 226.27200186252594, + "p90": 234.14400219917297, + "p95": 238.68800699710846, + "p99": 254.27201390266418 + }, + "isolatedSum": { + "p50": 245.56799605488777, + "p90": 339.9360105395317, + "p95": 356.06399923563004, + "p99": 397.66399562358856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 189.34400379657745, + "p90": 270.08000016212463, + "p95": 275.2639949321747, + "p99": 289.98398780822754 + }, + "combine": { + "p50": 64.60800021886826, + "p90": 76.89599692821503, + "p95": 78.23999971151352, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 238.3359968662262, + "p90": 318.015992641449, + "p95": 321.4719891548157, + "p99": 329.72800731658936 + }, + "isolatedSum": { + "p50": 253.9520040154457, + "p90": 346.97599709033966, + "p95": 353.5039946436882, + "p99": 372.25598841905594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 192.19200313091278, + "p90": 272.15999364852905, + "p95": 275.7120132446289, + "p99": 291.29600524902344 + }, + "combine": { + "p50": 78.17599922418594, + "p90": 87.93599903583527, + "p95": 89.15200084447861, + "p99": 95.20000219345093 + }, + "roundtrip": { + "p50": 255.3279995918274, + "p90": 335.6480002403259, + "p95": 343.9359962940216, + "p99": 380.0320029258728 + }, + "isolatedSum": { + "p50": 270.3680023550987, + "p90": 360.0959926843643, + "p95": 364.8640140891075, + "p99": 386.49600744247437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c961a187", + "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_97196257", + "comparisonKey": "994b6e44326c8d14", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:36.382828+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271691858", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271691858", + "createdAt": "2026-06-26T23:50:38Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 196.03200256824493, + "p90": 203.48800718784332, + "p95": 207.32800662517548, + "p99": 214.9759978055954 + }, + "combine": { + "p50": 53.727999329566956, + "p90": 55.48800155520439, + "p95": 57.760000228881836, + "p99": 60.80000102519989 + }, + "roundtrip": { + "p50": 231.26399517059326, + "p90": 238.91200125217438, + "p95": 242.36799776554108, + "p99": 250.0160038471222 + }, + "isolatedSum": { + "p50": 249.7600018978119, + "p90": 258.9760087430477, + "p95": 265.0880068540573, + "p99": 275.7759988307953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 195.80799341201782, + "p90": 202.78400182724, + "p95": 205.1199972629547, + "p99": 212.12799847126007 + }, + "combine": { + "p50": 55.93600124120712, + "p90": 57.53599852323532, + "p95": 59.93599817156792, + "p99": 62.880001962184906 + }, + "roundtrip": { + "p50": 233.60000550746918, + "p90": 240.9600019454956, + "p95": 243.13600361347198, + "p99": 255.10400533676147 + }, + "isolatedSum": { + "p50": 251.74399465322495, + "p90": 260.3200003504753, + "p95": 265.05599543452263, + "p99": 275.008000433445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 200.15999674797058, + "p90": 287.48801350593567, + "p95": 290.2719974517822, + "p99": 298.17599058151245 + }, + "combine": { + "p50": 57.11999908089638, + "p90": 68.67200136184692, + "p95": 69.56800073385239, + "p99": 75.3600001335144 + }, + "roundtrip": { + "p50": 238.01599442958832, + "p90": 328.5120129585266, + "p95": 332.73598551750183, + "p99": 340.1600122451782 + }, + "isolatedSum": { + "p50": 257.27999582886696, + "p90": 356.1600148677826, + "p95": 359.8399981856346, + "p99": 373.53599071502686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 199.072003364563, + "p90": 282.1120023727417, + "p95": 285.8240008354187, + "p99": 292.7359938621521 + }, + "combine": { + "p50": 57.5999990105629, + "p90": 66.14399701356888, + "p95": 66.72000139951706, + "p99": 71.48800045251846 + }, + "roundtrip": { + "p50": 236.32000386714935, + "p90": 315.3280019760132, + "p95": 318.91199946403503, + "p99": 326.2079954147339 + }, + "isolatedSum": { + "p50": 256.6720023751259, + "p90": 348.2559993863106, + "p95": 352.54400223493576, + "p99": 364.22399431467056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 199.71199333667755, + "p90": 288.86398673057556, + "p95": 291.23198986053467, + "p99": 296.4160144329071 + }, + "combine": { + "p50": 58.62399935722351, + "p90": 70.14399766921997, + "p95": 71.03999704122543, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 239.19999599456787, + "p90": 329.75998520851135, + "p95": 332.5439989566803, + "p99": 338.3359909057617 + }, + "isolatedSum": { + "p50": 258.33599269390106, + "p90": 359.00798439979553, + "p95": 362.2719869017601, + "p99": 370.5280125141144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.3519982099533, + "p90": 288.2559895515442, + "p95": 290.49599170684814, + "p99": 295.1360046863556 + }, + "combine": { + "p50": 63.040003180503845, + "p90": 73.44000041484833, + "p95": 73.95199686288834, + "p99": 79.45600152015686 + }, + "roundtrip": { + "p50": 244.25600469112396, + "p90": 330.7200074195862, + "p95": 333.24798941612244, + "p99": 339.35999870300293 + }, + "isolatedSum": { + "p50": 263.39200139045715, + "p90": 361.6959899663925, + "p95": 364.4479885697365, + "p99": 374.59200620651245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.5519995689392, + "p90": 287.55199909210205, + "p95": 291.6480004787445, + "p99": 305.5360019207001 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 85.02399921417236, + "p95": 86.5280032157898, + "p99": 89.72799777984619 + }, + "roundtrip": { + "p50": 254.72000241279602, + "p90": 339.83999490737915, + "p95": 342.97600388526917, + "p99": 349.5680093765259 + }, + "isolatedSum": { + "p50": 272.8959992527962, + "p90": 372.5759983062744, + "p95": 378.1760036945343, + "p99": 395.26399970054626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 206.33600652217865, + "p90": 288.32000494003296, + "p95": 292.4480140209198, + "p99": 296.671986579895 + }, + "combine": { + "p50": 86.87999844551086, + "p90": 100.19200295209885, + "p95": 104.63999956846237, + "p99": 326.24000310897827 + }, + "roundtrip": { + "p50": 274.944007396698, + "p90": 355.0719916820526, + "p95": 358.8480055332184, + "p99": 364.8959994316101 + }, + "isolatedSum": { + "p50": 293.2160049676895, + "p90": 388.5120078921318, + "p95": 397.0880135893822, + "p99": 622.9119896888733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-55a4c230", + "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_97196257", + "comparisonKey": "8ab5124e24ec36ab", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:52:02.860609+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271706435", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271706435", + "createdAt": "2026-06-26T23:51:06Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 196.8960016965866, + "p90": 227.77600586414337, + "p95": 297.40801453590393, + "p99": 503.32802534103394 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 62.144000083208084, + "p95": 67.10399687290192, + "p99": 282.0799946784973 + }, + "roundtrip": { + "p50": 237.40799725055695, + "p90": 243.77599358558655, + "p95": 245.31200528144836, + "p99": 250.0160038471222 + }, + "isolatedSum": { + "p50": 254.81600314378738, + "p90": 289.92000594735146, + "p95": 364.51201140880585, + "p99": 785.4080200195312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 197.1839964389801, + "p90": 204.92799580097198, + "p95": 207.45599269866943, + "p99": 214.6880030632019 + }, + "combine": { + "p50": 58.49599838256836, + "p90": 60.92799827456474, + "p95": 63.26399743556976, + "p99": 70.65600156784058 + }, + "roundtrip": { + "p50": 237.56800591945648, + "p90": 243.96799504756927, + "p95": 247.29600548744202, + "p99": 255.61600923538208 + }, + "isolatedSum": { + "p50": 255.67999482154846, + "p90": 265.8559940755367, + "p95": 270.7199901342392, + "p99": 285.3440046310425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 200.47999918460846, + "p90": 282.71999955177307, + "p95": 291.20001196861267, + "p99": 401.2480080127716 + }, + "combine": { + "p50": 59.90400165319443, + "p90": 66.84800237417221, + "p95": 69.5360004901886, + "p99": 75.68000257015228 + }, + "roundtrip": { + "p50": 243.20000410079956, + "p90": 321.9839930534363, + "p95": 326.7199993133545, + "p99": 334.75199341773987 + }, + "isolatedSum": { + "p50": 260.3840008378029, + "p90": 349.5680019259453, + "p95": 360.73601245880127, + "p99": 476.9280105829239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 200.6399929523468, + "p90": 261.9200050830841, + "p95": 265.6959891319275, + "p99": 275.1680016517639 + }, + "combine": { + "p50": 60.99199876189232, + "p90": 69.2799985408783, + "p95": 69.88800317049026, + "p99": 75.32799988985062 + }, + "roundtrip": { + "p50": 239.9040013551712, + "p90": 296.9599962234497, + "p95": 299.8400032520294, + "p99": 307.5200021266937 + }, + "isolatedSum": { + "p50": 261.6319917142391, + "p90": 331.2000036239624, + "p95": 335.58399230241776, + "p99": 350.49600154161453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 201.75999402999878, + "p90": 280.3199887275696, + "p95": 284.89598631858826, + "p99": 351.48799419403076 + }, + "combine": { + "p50": 61.76000088453293, + "p90": 69.72800195217133, + "p95": 72.92799651622772, + "p99": 133.82400572299957 + }, + "roundtrip": { + "p50": 245.82399427890778, + "p90": 325.53601264953613, + "p95": 328.8959860801697, + "p99": 600.3199815750122 + }, + "isolatedSum": { + "p50": 263.5199949145317, + "p90": 350.0479906797409, + "p95": 357.823982834816, + "p99": 485.31199991703033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.73600113391876, + "p90": 285.0559949874878, + "p95": 287.9680097103119, + "p99": 303.42400074005127 + }, + "combine": { + "p50": 66.78400188684464, + "p90": 78.20799946784973, + "p95": 79.93599772453308, + "p99": 83.8719978928566 + }, + "roundtrip": { + "p50": 249.9839961528778, + "p90": 319.487988948822, + "p95": 328.8959860801697, + "p99": 336.35199069976807 + }, + "isolatedSum": { + "p50": 267.5200030207634, + "p90": 363.2639944553375, + "p95": 367.90400743484497, + "p99": 387.29599863290787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 200.73600113391876, + "p90": 281.2480032444, + "p95": 289.11998867988586, + "p99": 304.9919903278351 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 84.1279998421669, + "p95": 86.40000224113464, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 259.5840096473694, + "p90": 337.8559947013855, + "p95": 341.3439989089966, + "p99": 350.5280017852783 + }, + "isolatedSum": { + "p50": 277.8559997677803, + "p90": 365.3760030865669, + "p95": 375.5199909210205, + "p99": 400.7679894566536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 212.5760018825531, + "p90": 282.1759879589081, + "p95": 286.5920066833496, + "p99": 307.96799063682556 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 98.11200201511383, + "p95": 99.48799759149551, + "p99": 103.74400019645691 + }, + "roundtrip": { + "p50": 289.44000601768494, + "p90": 355.3279936313629, + "p95": 359.71200466156006, + "p99": 366.91200733184814 + }, + "isolatedSum": { + "p50": 304.6400025486946, + "p90": 380.2879899740219, + "p95": 386.0800042748451, + "p99": 411.71199083328247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-416fcf7d", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_a96c99f3", + "comparisonKey": "2a90693171512d11", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:47:58.225003+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271567087", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271567087", + "createdAt": "2026-06-26T23:46:45Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.9760011434555, + "p90": 106.65600001811981, + "p95": 116.06399714946747, + "p99": 156.5759927034378 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 74.65600222349167, + "p95": 77.60000228881836, + "p99": 90.04800021648407 + }, + "roundtrip": { + "p50": 190.8160001039505, + "p90": 195.2960044145584, + "p95": 198.04799556732178, + "p99": 214.08000588417053 + }, + "isolatedSum": { + "p50": 171.00799828767776, + "p90": 181.31200224161148, + "p95": 193.66399943828583, + "p99": 246.62399291992188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.39999961853027, + "p90": 100.3199964761734, + "p95": 102.20800340175629, + "p99": 106.72000050544739 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 74.87999647855759, + "p95": 75.55200159549713, + "p99": 79.13599908351898 + }, + "roundtrip": { + "p50": 151.296004652977, + "p90": 195.5839991569519, + "p95": 197.79199361801147, + "p99": 202.17600464820862 + }, + "isolatedSum": { + "p50": 133.4720030426979, + "p90": 175.199992954731, + "p95": 177.76000499725342, + "p99": 185.85599958896637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.1359977722168, + "p90": 100.73599964380264, + "p95": 103.58399897813797, + "p99": 112.0000034570694 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 75.13599842786789, + "p95": 76.31999999284744, + "p99": 80.1599994301796 + }, + "roundtrip": { + "p50": 152.96000242233276, + "p90": 197.2160041332245, + "p95": 200.28799772262573, + "p99": 206.7199945449829 + }, + "isolatedSum": { + "p50": 134.71999764442444, + "p90": 175.87199807167053, + "p95": 179.9039989709854, + "p99": 192.160002887249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.39199686050415, + "p90": 103.61599922180176, + "p95": 106.46399855613708, + "p99": 123.3920007944107 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 75.9039968252182, + "p95": 78.59200239181519, + "p99": 99.48799759149551 + }, + "roundtrip": { + "p50": 194.68800723552704, + "p90": 199.48799908161163, + "p95": 203.0079960823059, + "p99": 231.80800676345825 + }, + "isolatedSum": { + "p50": 173.5359951853752, + "p90": 179.51999604701996, + "p95": 185.05600094795227, + "p99": 222.87999838590622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.39999675750732, + "p90": 103.16800326108932, + "p95": 105.31199723482132, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 78.23999971151352, + "p95": 79.45600152015686, + "p99": 81.88799768686295 + }, + "roundtrip": { + "p50": 195.96800208091736, + "p90": 201.6959935426712, + "p95": 204.57600057125092, + "p99": 208.3519995212555 + }, + "isolatedSum": { + "p50": 175.03999918699265, + "p90": 181.40800297260284, + "p95": 184.76799875497818, + "p99": 191.8720006942749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.57600128650665, + "p90": 100.63999891281128, + "p95": 102.62399911880493, + "p99": 107.2319969534874 + }, + "combine": { + "p50": 71.42399996519089, + "p90": 83.0719992518425, + "p95": 84.35200154781342, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 160.16000509262085, + "p90": 205.05599677562714, + "p95": 208.3519995212555, + "p99": 231.1680018901825 + }, + "isolatedSum": { + "p50": 144.00000125169754, + "p90": 183.71199816465378, + "p95": 186.97600066661835, + "p99": 195.96799463033676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 81.28000050783157, + "p90": 103.13600301742554, + "p95": 105.6319996714592, + "p99": 113.92000317573547 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 92.73599833250046, + "p95": 93.6959981918335, + "p99": 98.91200065612793 + }, + "roundtrip": { + "p50": 172.5119948387146, + "p90": 216.60800278186798, + "p95": 219.87199783325195, + "p99": 227.48799622058868 + }, + "isolatedSum": { + "p50": 162.3999997973442, + "p90": 195.872001349926, + "p95": 199.3279978632927, + "p99": 212.8320038318634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.86399865150452, + "p90": 108.96000266075134, + "p95": 111.23199760913849, + "p99": 116.28799885511398 + }, + "combine": { + "p50": 98.14400225877762, + "p90": 108.96000266075134, + "p95": 110.49599945545197, + "p99": 114.68800157308578 + }, + "roundtrip": { + "p50": 216.67200326919556, + "p90": 236.00000143051147, + "p95": 238.49600553512573, + "p99": 242.46400594711304 + }, + "isolatedSum": { + "p50": 187.00800091028214, + "p90": 217.92000532150269, + "p95": 221.72799706459045, + "p99": 230.97600042819977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d4dbb29d", + "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_97196257", + "comparisonKey": "9687217877b9ce9c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:10.138934+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271579958", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271579958", + "createdAt": "2026-06-26T23:47:12Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 193.05600225925446, + "p90": 204.3839991092682, + "p95": 210.52800118923187, + "p99": 277.9200077056885 + }, + "combine": { + "p50": 60.95999851822853, + "p90": 63.29599767923355, + "p95": 65.31199812889099, + "p99": 68.76800209283829 + }, + "roundtrip": { + "p50": 237.63200640678406, + "p90": 244.25600469112396, + "p95": 246.14399671554565, + "p99": 269.4079875946045 + }, + "isolatedSum": { + "p50": 254.016000777483, + "p90": 267.67999678850174, + "p95": 275.83999931812286, + "p99": 346.68800979852676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 192.9280012845993, + "p90": 200.6720006465912, + "p95": 204.79999482631683, + "p99": 264.5759880542755 + }, + "combine": { + "p50": 62.272001057863235, + "p90": 64.7680014371872, + "p95": 67.391999065876, + "p99": 73.08799773454666 + }, + "roundtrip": { + "p50": 235.6480062007904, + "p90": 243.0720031261444, + "p95": 245.60000002384186, + "p99": 259.71201062202454 + }, + "isolatedSum": { + "p50": 255.20000234246254, + "p90": 265.4400020837784, + "p95": 272.19199389219284, + "p99": 337.6639857888222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.24799692630768, + "p90": 286.080002784729, + "p95": 290.71998596191406, + "p99": 302.2400140762329 + }, + "combine": { + "p50": 63.32799792289734, + "p90": 71.32799923419952, + "p95": 75.45600086450577, + "p99": 82.62400329113007 + }, + "roundtrip": { + "p50": 242.94400215148926, + "p90": 349.40800070762634, + "p95": 354.4960021972656, + "p99": 367.13600158691406 + }, + "isolatedSum": { + "p50": 260.575994849205, + "p90": 357.4080020189285, + "p95": 366.17598682641983, + "p99": 384.864017367363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 196.383997797966, + "p90": 251.583993434906, + "p95": 254.8159956932068, + "p99": 268.15998554229736 + }, + "combine": { + "p50": 63.87200206518173, + "p90": 72.73600250482559, + "p95": 73.5040009021759, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 242.11199581623077, + "p90": 299.3920147418976, + "p95": 304.1599988937378, + "p99": 410.8160138130188 + }, + "isolatedSum": { + "p50": 260.25599986314774, + "p90": 324.3199959397316, + "p95": 328.3199965953827, + "p99": 346.1119830608368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 197.63199985027313, + "p90": 288.35201263427734, + "p95": 294.048011302948, + "p99": 322.04800844192505 + }, + "combine": { + "p50": 66.46399945020676, + "p90": 79.9039974808693, + "p95": 106.33599758148193, + "p99": 204.25599813461304 + }, + "roundtrip": { + "p50": 246.62399291992188, + "p90": 330.24001121520996, + "p95": 333.5359990596771, + "p99": 341.18399024009705 + }, + "isolatedSum": { + "p50": 264.0959993004799, + "p90": 368.25601011514664, + "p95": 400.38400888442993, + "p99": 526.3040065765381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 198.40000569820404, + "p90": 284.35200452804565, + "p95": 288.06400299072266, + "p99": 295.9040105342865 + }, + "combine": { + "p50": 70.97599655389786, + "p90": 79.96799796819687, + "p95": 80.70400357246399, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 250.36799907684326, + "p90": 306.5919876098633, + "p95": 310.2079927921295, + "p99": 368.8639998435974 + }, + "isolatedSum": { + "p50": 269.3760022521019, + "p90": 364.3200024962425, + "p95": 368.76800656318665, + "p99": 379.424013197422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 198.65599274635315, + "p90": 284.8320007324219, + "p95": 289.69600796699524, + "p99": 304.4480085372925 + }, + "combine": { + "p50": 80.48000186681747, + "p90": 88.83199840784073, + "p95": 90.52799642086029, + "p99": 101.31199657917023 + }, + "roundtrip": { + "p50": 260.96001267433167, + "p90": 351.80801153182983, + "p95": 355.55198788642883, + "p99": 367.0400083065033 + }, + "isolatedSum": { + "p50": 279.1359946131706, + "p90": 373.6639991402626, + "p95": 380.22400438785553, + "p99": 405.7600051164627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 216.8319970369339, + "p90": 312.8640055656433, + "p95": 320.73599100112915, + "p99": 336.41600608825684 + }, + "combine": { + "p50": 98.94400089979172, + "p90": 112.83200234174728, + "p95": 113.79200220108032, + "p99": 119.13599818944931 + }, + "roundtrip": { + "p50": 303.2959997653961, + "p90": 388.0000114440918, + "p95": 392.2879993915558, + "p99": 401.2480080127716 + }, + "isolatedSum": { + "p50": 315.7759979367256, + "p90": 425.6960079073906, + "p95": 434.5279932022095, + "p99": 455.55200427770615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e5c4d34", + "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_97196257", + "comparisonKey": "969c3964291e1270", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:43.012530+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271660154", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271660154", + "createdAt": "2026-06-26T23:49:43Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 198.7520009279251, + "p90": 206.2399983406067, + "p95": 209.56799387931824, + "p99": 221.69600427150726 + }, + "combine": { + "p50": 60.83200126886368, + "p90": 64.31999802589417, + "p95": 65.98400324583054, + "p99": 69.05599683523178 + }, + "roundtrip": { + "p50": 242.71999299526215, + "p90": 250.07998943328857, + "p95": 254.5279860496521, + "p99": 290.0159955024719 + }, + "isolatedSum": { + "p50": 259.5840021967888, + "p90": 270.55999636650085, + "p95": 275.5519971251488, + "p99": 290.75200110673904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 205.53599298000336, + "p90": 313.6320114135742, + "p95": 323.8399922847748, + "p99": 375.5840063095093 + }, + "combine": { + "p50": 62.81600147485733, + "p90": 76.1599987745285, + "p95": 79.19999957084656, + "p99": 83.0719992518425 + }, + "roundtrip": { + "p50": 242.49599874019623, + "p90": 250.43201446533203, + "p95": 253.08799743652344, + "p99": 294.1119968891144 + }, + "isolatedSum": { + "p50": 268.3519944548607, + "p90": 389.7920101881027, + "p95": 403.03999185562134, + "p99": 458.6560055613518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 203.5519927740097, + "p90": 291.55200719833374, + "p95": 296.09599709510803, + "p99": 303.6159873008728 + }, + "combine": { + "p50": 63.26399743556976, + "p90": 73.98399710655212, + "p95": 75.83999633789062, + "p99": 80.09599894285202 + }, + "roundtrip": { + "p50": 247.42400646209717, + "p90": 336.67200803756714, + "p95": 339.4559919834137, + "p99": 346.20800614356995 + }, + "isolatedSum": { + "p50": 266.81599020957947, + "p90": 365.53600430488586, + "p95": 371.93599343299866, + "p99": 383.7119862437248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 199.45600628852844, + "p90": 207.8080028295517, + "p95": 213.02400529384613, + "p99": 235.29599606990814 + }, + "combine": { + "p50": 62.72000074386597, + "p90": 67.16799736022949, + "p95": 68.64000111818314, + "p99": 73.60000163316727 + }, + "roundtrip": { + "p50": 245.85600197315216, + "p90": 253.1839907169342, + "p95": 256.9279968738556, + "p99": 269.3119943141937 + }, + "isolatedSum": { + "p50": 262.1760070323944, + "p90": 274.9760001897812, + "p95": 281.66400641202927, + "p99": 308.8959977030754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 204.22400534152985, + "p90": 292.60799288749695, + "p95": 296.3840067386627, + "p99": 434.30399894714355 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 75.55200159549713, + "p95": 76.1599987745285, + "p99": 79.8719972372055 + }, + "roundtrip": { + "p50": 250.59199333190918, + "p90": 335.32801270484924, + "p95": 340.2239978313446, + "p99": 366.5919899940491 + }, + "isolatedSum": { + "p50": 270.3680023550987, + "p90": 368.1599944829941, + "p95": 372.5440055131912, + "p99": 514.1759961843491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.99200308322906, + "p90": 286.3039970397949, + "p95": 293.3120131492615, + "p99": 305.11999130249023 + }, + "combine": { + "p50": 70.88000327348709, + "p90": 75.83999633789062, + "p95": 78.11199873685837, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 253.31199169158936, + "p90": 259.71201062202454, + "p95": 262.4959945678711, + "p99": 270.9439992904663 + }, + "isolatedSum": { + "p50": 271.87200635671616, + "p90": 362.14399337768555, + "p95": 371.42401188611984, + "p99": 391.9679895043373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 204.22400534152985, + "p90": 293.8239872455597, + "p95": 299.74400997161865, + "p99": 323.4559893608093 + }, + "combine": { + "p50": 81.82399719953537, + "p90": 93.40800344944, + "p95": 96.63999825716019, + "p99": 99.64799880981445 + }, + "roundtrip": { + "p50": 268.73600482940674, + "p90": 351.6159951686859, + "p95": 354.4960021972656, + "p99": 361.6639971733093 + }, + "isolatedSum": { + "p50": 286.0480025410652, + "p90": 387.2319906949997, + "p95": 396.38400822877884, + "p99": 423.1039881706238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 224.2240011692047, + "p90": 294.5919930934906, + "p95": 298.4960079193115, + "p99": 310.8159899711609 + }, + "combine": { + "p50": 99.90400075912476, + "p90": 110.33599823713303, + "p95": 111.35999858379364, + "p99": 114.68800157308578 + }, + "roundtrip": { + "p50": 310.88000535964966, + "p90": 375.2320110797882, + "p95": 378.04800271987915, + "p99": 386.46399974823 + }, + "isolatedSum": { + "p50": 324.12800192832947, + "p90": 404.9279913306236, + "p95": 409.85600650310516, + "p99": 425.5039915442467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4e4a7f2d", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_91aa6e56", + "comparisonKey": "511cf861d6b2e142", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:28:00.849157+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254323956", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", + "createdAt": "2026-06-26T17:27:01Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.98400104045868, + "p90": 102.88000106811523, + "p95": 104.38399761915207, + "p99": 110.20799726247787 + }, + "combine": { + "p50": 72.28799909353256, + "p90": 74.14399832487106, + "p95": 75.29599964618683, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 190.65600633621216, + "p90": 195.90400159358978, + "p95": 198.30399751663208, + "p99": 202.72000133991241 + }, + "isolatedSum": { + "p50": 170.27200013399124, + "p90": 177.0239993929863, + "p95": 179.6799972653389, + "p99": 188.86400014162064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.15999811887741, + "p90": 99.90400075912476, + "p95": 102.52799838781357, + "p99": 105.0880029797554 + }, + "combine": { + "p50": 63.35999816656113, + "p90": 73.18399846553802, + "p95": 73.98399710655212, + "p99": 78.46400141716003 + }, + "roundtrip": { + "p50": 153.82400155067444, + "p90": 194.43200528621674, + "p95": 196.28800451755524, + "p99": 201.05600357055664 + }, + "isolatedSum": { + "p50": 135.51999628543854, + "p90": 173.08799922466278, + "p95": 176.5119954943657, + "p99": 183.55200439691544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.31999933719635, + "p90": 103.4879982471466, + "p95": 107.26399719715118, + "p99": 115.48800021409988 + }, + "combine": { + "p50": 64.03200328350067, + "p90": 76.28799974918365, + "p95": 77.82399654388428, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 156.09599649906158, + "p90": 202.36800611019135, + "p95": 205.63200116157532, + "p99": 212.51200139522552 + }, + "isolatedSum": { + "p50": 136.35200262069702, + "p90": 179.77599799633026, + "p95": 185.08799374103546, + "p99": 197.4719986319542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.50399738550186, + "p90": 102.30399668216705, + "p95": 105.85600137710571, + "p99": 113.40799927711487 + }, + "combine": { + "p50": 63.80800157785416, + "p90": 74.94399696588516, + "p95": 76.28799974918365, + "p99": 80.89599758386612 + }, + "roundtrip": { + "p50": 154.6880006790161, + "p90": 194.7840005159378, + "p95": 199.0399956703186, + "p99": 203.87199521064758 + }, + "isolatedSum": { + "p50": 161.31199896335602, + "p90": 177.24799364805222, + "p95": 182.14400112628937, + "p99": 194.303996860981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.08800166845322, + "p90": 104.3199971318245, + "p95": 107.39199817180634, + "p99": 113.43999952077866 + }, + "combine": { + "p50": 75.74400305747986, + "p90": 78.49600166082382, + "p95": 80.06399869918823, + "p99": 83.36000144481659 + }, + "roundtrip": { + "p50": 195.2960044145584, + "p90": 205.85599541664124, + "p95": 209.85600352287292, + "p99": 223.83999824523926 + }, + "isolatedSum": { + "p50": 172.83200472593307, + "p90": 182.81599879264832, + "p95": 187.45599687099457, + "p99": 196.80000096559525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.11999797821045, + "p90": 104.16000336408615, + "p95": 106.84800148010254, + "p99": 112.09599673748016 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 81.88799768686295, + "p95": 82.87999778985977, + "p99": 88.28800171613693 + }, + "roundtrip": { + "p50": 161.21600568294525, + "p90": 206.65599405765533, + "p95": 210.84800362586975, + "p99": 216.22399985790253 + }, + "isolatedSum": { + "p50": 142.39999651908875, + "p90": 186.0480010509491, + "p95": 189.7279992699623, + "p99": 200.3839984536171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 78.65600287914276, + "p90": 106.9440022110939, + "p95": 110.55999994277954, + "p99": 125.44000148773193 + }, + "combine": { + "p50": 83.64800363779068, + "p90": 96.38399630784988, + "p95": 97.69599884748459, + "p99": 100.00000149011612 + }, + "roundtrip": { + "p50": 175.7120043039322, + "p90": 222.6880043745041, + "p95": 225.24799406528473, + "p99": 231.74400627613068 + }, + "isolatedSum": { + "p50": 162.30400651693344, + "p90": 203.3279985189438, + "p95": 208.25599879026413, + "p99": 225.44000297784805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.33600240945816, + "p90": 110.84800213575363, + "p95": 113.82400244474411, + "p99": 117.11999773979187 + }, + "combine": { + "p50": 98.78399968147278, + "p90": 111.00800335407257, + "p95": 112.0000034570694, + "p99": 117.21599847078323 + }, + "roundtrip": { + "p50": 216.12800657749176, + "p90": 240.60800671577454, + "p95": 244.25600469112396, + "p99": 250.2720057964325 + }, + "isolatedSum": { + "p50": 189.12000209093094, + "p90": 221.8560054898262, + "p95": 225.8240059018135, + "p99": 234.3359962105751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-750e874d", + "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_7f10961a", + "comparisonKey": "f145cb161a39591f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:23:35.919985+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "unknown", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247584217", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247584217", + "createdAt": "2026-06-26T15:22:36Z", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 251.93598866462708, + "p90": 260.3839933872223, + "p95": 263.10399174690247, + "p99": 268.5759961605072 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 69.88800317049026, + "p95": 70.8480030298233, + "p99": 76.03199779987335 + }, + "roundtrip": { + "p50": 296.51200771331787, + "p90": 304.1279911994934, + "p95": 306.40000104904175, + "p99": 349.15199875831604 + }, + "isolatedSum": { + "p50": 320.3519880771637, + "p90": 330.27199655771255, + "p95": 333.95199477672577, + "p99": 344.60799396038055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 200.51200687885284, + "p90": 256.8320035934448, + "p95": 259.99999046325684, + "p99": 268.0000066757202 + }, + "combine": { + "p50": 63.00800293684006, + "p90": 71.00799679756165, + "p95": 71.84000313282013, + "p99": 74.68800246715546 + }, + "roundtrip": { + "p50": 243.1039959192276, + "p90": 300.1919984817505, + "p95": 303.5840094089508, + "p99": 308.9919984340668 + }, + "isolatedSum": { + "p50": 263.5200098156929, + "p90": 327.84000039100647, + "p95": 331.83999359607697, + "p99": 342.68800914287567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 199.13600385189056, + "p90": 287.9680097103119, + "p95": 291.1359965801239, + "p99": 298.2720136642456 + }, + "combine": { + "p50": 63.519999384880066, + "p90": 75.1039981842041, + "p95": 76.73600316047668, + "p99": 81.40800148248672 + }, + "roundtrip": { + "p50": 246.17600440979004, + "p90": 330.84800839424133, + "p95": 333.9200019836426, + "p99": 343.6479866504669 + }, + "isolatedSum": { + "p50": 262.65600323677063, + "p90": 363.072007894516, + "p95": 367.8719997406006, + "p99": 379.68001514673233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 199.16799664497375, + "p90": 258.14399123191833, + "p95": 261.4080011844635, + "p99": 267.16798543930054 + }, + "combine": { + "p50": 63.4239986538887, + "p90": 72.57600128650665, + "p95": 73.18399846553802, + "p99": 76.28799974918365 + }, + "roundtrip": { + "p50": 244.83199417591095, + "p90": 302.3039996623993, + "p95": 305.759996175766, + "p99": 310.94399094581604 + }, + "isolatedSum": { + "p50": 262.59199529886246, + "p90": 330.719992518425, + "p95": 334.5919996500015, + "p99": 343.4559851884842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 200.28799772262573, + "p90": 286.5599989891052, + "p95": 290.0800108909607, + "p99": 296.57599329948425 + }, + "combine": { + "p50": 65.5359998345375, + "p90": 76.86399668455124, + "p95": 77.66400277614594, + "p99": 80.76799660921097 + }, + "roundtrip": { + "p50": 248.57600033283234, + "p90": 330.4640054702759, + "p95": 333.6319923400879, + "p99": 344.7360098361969 + }, + "isolatedSum": { + "p50": 265.82399755716324, + "p90": 363.42399567365646, + "p95": 367.7440136671066, + "p99": 377.3439899086952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 198.88000190258026, + "p90": 284.4800055027008, + "p95": 288.12798857688904, + "p99": 293.0240035057068 + }, + "combine": { + "p50": 69.18399780988693, + "p90": 80.54400235414505, + "p95": 81.4720019698143, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 253.12000513076782, + "p90": 334.01599526405334, + "p95": 336.89600229263306, + "p99": 340.31999111175537 + }, + "isolatedSum": { + "p50": 268.0639997124672, + "p90": 365.02400785684586, + "p95": 369.59999054670334, + "p99": 377.6639997959137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 202.07999646663666, + "p90": 355.00800609588623, + "p95": 361.7280125617981, + "p99": 423.007994890213 + }, + "combine": { + "p50": 82.65600353479385, + "p90": 94.11200135946274, + "p95": 95.8079993724823, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 266.88000559806824, + "p90": 352.03200578689575, + "p95": 355.3600013256073, + "p99": 361.4720106124878 + }, + "isolatedSum": { + "p50": 284.7360000014305, + "p90": 449.12000745534897, + "p95": 457.5360119342804, + "p99": 522.4639922380447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 221.79199755191803, + "p90": 289.72798585891724, + "p95": 293.08798909187317, + "p99": 300.9600043296814 + }, + "combine": { + "p50": 98.27200323343277, + "p90": 108.8000014424324, + "p95": 110.1439967751503, + "p99": 113.88800293207169 + }, + "roundtrip": { + "p50": 303.74398827552795, + "p90": 364.8639917373657, + "p95": 367.45598912239075, + "p99": 371.5519905090332 + }, + "isolatedSum": { + "p50": 320.0640007853508, + "p90": 398.52798730134964, + "p95": 403.23198586702347, + "p99": 414.8480072617531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b83230a1", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_eddc3af6", + "comparisonKey": "f291497d6f9ce0d1", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:42.999710+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254341346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", + "createdAt": "2026-06-26T17:27:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.34400099515915, + "p90": 84.927998483181, + "p95": 86.496002972126, + "p99": 90.14400094747543 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 73.91999661922455, + "p95": 74.87999647855759, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 173.15199971199036, + "p90": 178.6240041255951, + "p95": 180.92800676822662, + "p99": 186.5600049495697 + }, + "isolatedSum": { + "p50": 152.73600071668625, + "p90": 158.84799510240555, + "p95": 161.3759994506836, + "p99": 168.12799870967865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.49599838256836, + "p90": 82.78399705886841, + "p95": 84.3840017914772, + "p99": 90.01599997282028 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 74.0479975938797, + "p95": 74.8480036854744, + "p99": 77.44000107049942 + }, + "roundtrip": { + "p50": 141.12000167369843, + "p90": 176.54399573802948, + "p95": 178.81600558757782, + "p99": 181.92000687122345 + }, + "isolatedSum": { + "p50": 121.56800180673599, + "p90": 156.8319946527481, + "p95": 159.2320054769516, + "p99": 167.4560010433197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.13599953055382, + "p90": 82.68799632787704, + "p95": 85.37600189447403, + "p99": 91.61599725484848 + }, + "combine": { + "p50": 63.64800035953522, + "p90": 74.14399832487106, + "p95": 75.19999891519547, + "p99": 79.32800054550171 + }, + "roundtrip": { + "p50": 140.83200693130493, + "p90": 178.49600315093994, + "p95": 180.92800676822662, + "p99": 187.45599687099457 + }, + "isolatedSum": { + "p50": 122.78399989008904, + "p90": 156.8319946527481, + "p95": 160.5760008096695, + "p99": 170.9439978003502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.792001128196716, + "p90": 83.20000022649765, + "p95": 86.07999980449677, + "p99": 96.00000083446503 + }, + "combine": { + "p50": 65.43999910354614, + "p90": 75.93599706888199, + "p95": 78.14399898052216, + "p99": 83.74399691820145 + }, + "roundtrip": { + "p50": 144.44799721240997, + "p90": 181.15200102329254, + "p95": 184.25600230693817, + "p99": 199.8080015182495 + }, + "isolatedSum": { + "p50": 127.23200023174286, + "p90": 159.13599729537964, + "p95": 164.22399878501892, + "p99": 179.74399775266647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.38400113582611, + "p90": 83.52000266313553, + "p95": 85.08799970149994, + "p99": 92.38400310277939 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 77.85599678754807, + "p95": 79.03999835252762, + "p99": 80.83199709653854 + }, + "roundtrip": { + "p50": 150.59199929237366, + "p90": 182.49599635601044, + "p95": 184.60799753665924, + "p99": 194.815993309021 + }, + "isolatedSum": { + "p50": 156.19200468063354, + "p90": 161.3759994506836, + "p95": 164.12799805402756, + "p99": 173.21600019931793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 61.792001128196716, + "p90": 81.727996468544, + "p95": 84.28800106048584, + "p99": 89.88799899816513 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 79.96799796819687, + "p95": 81.24800026416779, + "p99": 83.99999886751175 + }, + "roundtrip": { + "p50": 146.11199498176575, + "p90": 184.32000279426575, + "p95": 186.52799725532532, + "p99": 192.44800508022308 + }, + "isolatedSum": { + "p50": 131.1360001564026, + "p90": 161.69599443674088, + "p95": 165.53600132465363, + "p99": 173.88799786567688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 72.80000299215317, + "p90": 86.43200248479843, + "p95": 92.54399687051773, + "p99": 99.7759997844696 + }, + "combine": { + "p50": 85.08799970149994, + "p90": 95.0080007314682, + "p95": 96.41599655151367, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 182.8799992799759, + "p90": 202.94399559497833, + "p95": 208.3200067281723, + "p99": 218.176007270813 + }, + "isolatedSum": { + "p50": 157.8880026936531, + "p90": 181.44000321626663, + "p95": 188.9599934220314, + "p99": 200.99200308322906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.79199630022049, + "p90": 92.12800115346909, + "p95": 93.72799843549728, + "p99": 98.24000298976898 + }, + "combine": { + "p50": 99.55199807882309, + "p90": 109.72800105810165, + "p95": 110.91200262308121, + "p99": 114.46399986743927 + }, + "roundtrip": { + "p50": 205.1520049571991, + "p90": 219.200000166893, + "p95": 220.89600563049316, + "p99": 223.4880030155182 + }, + "isolatedSum": { + "p50": 177.34399437904358, + "p90": 201.85600221157074, + "p95": 204.6400010585785, + "p99": 212.70400285720825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d8e58489", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ec72792b", + "comparisonKey": "2bfd4913feb2a935", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:47:54.320638+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271573150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271573150", + "createdAt": "2026-06-26T23:46:58Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.3040001988411, + "p90": 82.07999914884567, + "p95": 84.44800227880478, + "p99": 88.03199976682663 + }, + "combine": { + "p50": 71.1359977722168, + "p90": 72.86400347948074, + "p95": 73.82400333881378, + "p99": 77.88799703121185 + }, + "roundtrip": { + "p50": 136.63999736309052, + "p90": 174.75199699401855, + "p95": 177.15199291706085, + "p99": 181.08800053596497 + }, + "isolatedSum": { + "p50": 149.4399979710579, + "p90": 154.94400262832642, + "p95": 158.27200561761856, + "p99": 165.91999679803848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.832000613212585, + "p90": 79.74400371313095, + "p95": 81.11999928951263, + "p99": 85.69599688053131 + }, + "combine": { + "p50": 62.3680017888546, + "p90": 71.58400118350983, + "p95": 72.25599884986877, + "p99": 75.9039968252182 + }, + "roundtrip": { + "p50": 138.0160003900528, + "p90": 172.95999825000763, + "p95": 174.30399358272552, + "p99": 179.61600422859192 + }, + "isolatedSum": { + "p50": 119.20000240206718, + "p90": 151.32800489664078, + "p95": 153.3759981393814, + "p99": 161.5999937057495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 56.92800134420395, + "p90": 82.0159986615181, + "p95": 85.02399921417236, + "p99": 87.77599781751633 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 74.94399696588516, + "p95": 76.28799974918365, + "p99": 79.99999821186066 + }, + "roundtrip": { + "p50": 138.7840062379837, + "p90": 179.51999604701996, + "p95": 182.01600015163422, + "p99": 187.42400407791138 + }, + "isolatedSum": { + "p50": 120.00000476837158, + "p90": 156.95999562740326, + "p95": 161.31199896335602, + "p99": 167.77599602937698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 56.832000613212585, + "p90": 80.99199831485748, + "p95": 82.94399827718735, + "p99": 87.99999952316284 + }, + "combine": { + "p50": 63.71200084686279, + "p90": 74.43200051784515, + "p95": 75.19999891519547, + "p99": 79.52000200748444 + }, + "roundtrip": { + "p50": 139.93600010871887, + "p90": 178.5919964313507, + "p95": 181.98400735855103, + "p99": 185.47199666500092 + }, + "isolatedSum": { + "p50": 120.54400146007538, + "p90": 155.42399883270264, + "p95": 158.1439971923828, + "p99": 167.52000153064728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 57.472001761198044, + "p90": 81.79199695587158, + "p95": 84.28800106048584, + "p99": 87.87199854850769 + }, + "combine": { + "p50": 65.5359998345375, + "p90": 77.37600058317184, + "p95": 79.3600007891655, + "p99": 82.46400207281113 + }, + "roundtrip": { + "p50": 141.184002161026, + "p90": 181.7920058965683, + "p95": 184.9599927663803, + "p99": 191.93600118160248 + }, + "isolatedSum": { + "p50": 123.00800159573555, + "p90": 159.16799753904343, + "p95": 163.64800184965134, + "p99": 170.33600062131882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 60.32000109553337, + "p90": 82.0159986615181, + "p95": 84.63999629020691, + "p99": 91.0400003194809 + }, + "combine": { + "p50": 70.97599655389786, + "p90": 82.14399963617325, + "p95": 83.20000022649765, + "p99": 88.60799670219421 + }, + "roundtrip": { + "p50": 147.0080018043518, + "p90": 185.7919991016388, + "p95": 188.06399405002594, + "p99": 192.25600361824036 + }, + "isolatedSum": { + "p50": 131.29599764943123, + "p90": 164.15999829769135, + "p95": 167.83999651670456, + "p99": 179.6479970216751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 69.60000097751617, + "p90": 85.69599688053131, + "p95": 87.99999952316284, + "p99": 100.8640006184578 + }, + "combine": { + "p50": 80.6720033288002, + "p90": 92.70399808883667, + "p95": 93.66399794816971, + "p99": 97.4079966545105 + }, + "roundtrip": { + "p50": 160.70400178432465, + "p90": 200.83199441432953, + "p95": 203.19999754428864, + "p99": 211.5200012922287 + }, + "isolatedSum": { + "p50": 150.27200430631638, + "p90": 178.39999496936798, + "p95": 181.66399747133255, + "p99": 198.2719972729683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.05599814653397, + "p90": 91.96799993515015, + "p95": 94.43199634552002, + "p99": 99.32799637317657 + }, + "combine": { + "p50": 97.53599762916565, + "p90": 109.37599837779999, + "p95": 110.68800091743469, + "p99": 115.7120019197464 + }, + "roundtrip": { + "p50": 203.80799472332, + "p90": 219.9999988079071, + "p95": 222.59199619293213, + "p99": 236.4799976348877 + }, + "isolatedSum": { + "p50": 174.59199577569962, + "p90": 201.34399831295013, + "p95": 205.1199972629547, + "p99": 215.03999829292297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f1a3625a", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_7720baf2", + "comparisonKey": "800e526f613bc59d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:09.827299+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271594334", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", + "createdAt": "2026-06-26T23:47:39Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 30.81599995493889, + "p90": 33.824000507593155, + "p95": 36.67199984192848, + "p99": 41.760001331567764 + }, + "combine": { + "p50": 33.535998314619064, + "p90": 36.06399893760681, + "p95": 38.656000047922134, + "p99": 94.62399780750275 + }, + "roundtrip": { + "p50": 2063.647985458374, + "p90": 2066.3039684295654, + "p95": 2067.5199031829834, + "p99": 2072.1280574798584 + }, + "isolatedSum": { + "p50": 64.35199826955795, + "p90": 69.88799944519997, + "p95": 75.32799988985062, + "p99": 136.3839991390705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 30.688000842928886, + "p90": 33.440001308918, + "p95": 35.32800078392029, + "p99": 41.85599833726883 + }, + "combine": { + "p50": 35.10399907827377, + "p90": 39.135999977588654, + "p95": 60.99199876189232, + "p99": 184.2239946126938 + }, + "roundtrip": { + "p50": 2065.023899078369, + "p90": 2067.647933959961, + "p95": 2069.279909133911, + "p99": 2082.5600624084473 + }, + "isolatedSum": { + "p50": 65.79199992120266, + "p90": 72.57600128650665, + "p95": 96.3199995458126, + "p99": 226.07999294996262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 30.527999624609947, + "p90": 32.70399942994118, + "p95": 34.33600068092346, + "p99": 38.72000053524971 + }, + "combine": { + "p50": 34.71999987959862, + "p90": 36.896001547575, + "p95": 37.82400116324425, + "p99": 40.672000497579575 + }, + "roundtrip": { + "p50": 2065.7920837402344, + "p90": 2069.4079399108887, + "p95": 2074.079990386963, + "p99": 2120.703935623169 + }, + "isolatedSum": { + "p50": 65.24799950420856, + "p90": 69.60000097751617, + "p95": 72.16000184416771, + "p99": 79.39200103282928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 31.007999554276466, + "p90": 33.24799984693527, + "p95": 35.45600175857544, + "p99": 42.11200028657913 + }, + "combine": { + "p50": 35.74400022625923, + "p90": 38.62399980425835, + "p95": 39.903998374938965, + "p99": 44.12800073623657 + }, + "roundtrip": { + "p50": 2066.240072250366, + "p90": 2069.6959495544434, + "p95": 2070.784091949463, + "p99": 2073.9200115203857 + }, + "isolatedSum": { + "p50": 66.7519997805357, + "p90": 71.87199965119362, + "p95": 75.3600001335144, + "p99": 86.2400010228157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 32.32000023126602, + "p90": 39.103999733924866, + "p95": 52.799999713897705, + "p99": 55.36000058054924 + }, + "combine": { + "p50": 38.656000047922134, + "p90": 41.79200157523155, + "p95": 42.97599941492081, + "p99": 47.520000487565994 + }, + "roundtrip": { + "p50": 2071.9680786132812, + "p90": 2074.592113494873, + "p95": 2075.615882873535, + "p99": 2079.7760486602783 + }, + "isolatedSum": { + "p50": 70.97600027918816, + "p90": 80.89600130915642, + "p95": 95.77599912881851, + "p99": 102.88000106811523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 36.3520011305809, + "p90": 38.11199963092804, + "p95": 40.22400081157684, + "p99": 45.951999723911285 + }, + "combine": { + "p50": 47.968000173568726, + "p90": 50.87999999523163, + "p95": 51.83999985456467, + "p99": 58.04799869656563 + }, + "roundtrip": { + "p50": 2082.7200412750244, + "p90": 2085.2479934692383, + "p95": 2086.2081050872803, + "p99": 2089.1199111938477 + }, + "isolatedSum": { + "p50": 84.32000130414963, + "p90": 88.99199962615967, + "p95": 92.06400066614151, + "p99": 103.99999842047691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 41.600000113248825, + "p90": 51.00800096988678, + "p95": 52.12799832224846, + "p99": 55.1999993622303 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 68.67200136184692, + "p95": 71.68000191450119, + "p99": 97.08800166845322 + }, + "roundtrip": { + "p50": 2101.8240451812744, + "p90": 2108.736038208008, + "p95": 2111.936092376709, + "p99": 2120.1279163360596 + }, + "isolatedSum": { + "p50": 102.27200016379356, + "p90": 119.6800023317337, + "p95": 123.80800023674965, + "p99": 152.28800103068352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 54.016001522541046, + "p90": 56.223999708890915, + "p95": 57.312000542879105, + "p99": 60.575999319553375 + }, + "combine": { + "p50": 88.54400366544724, + "p90": 91.93599969148636, + "p95": 92.70399808883667, + "p99": 114.81600254774094 + }, + "roundtrip": { + "p50": 2143.0718898773193, + "p90": 2146.7199325561523, + "p95": 2147.455930709839, + "p99": 2153.791904449463 + }, + "isolatedSum": { + "p50": 142.56000518798828, + "p90": 148.15999940037727, + "p95": 150.01599863171577, + "p99": 175.3920018672943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-73d1725a", + "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_3a77ee8e", + "comparisonKey": "93509525aa3f27c6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:16.484836+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271598000", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", + "createdAt": "2026-06-26T23:47:46Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 31.10400028526783, + "p90": 33.376000821590424, + "p95": 34.88000109791756, + "p99": 39.264000952243805 + }, + "combine": { + "p50": 32.575998455286026, + "p90": 35.32800078392029, + "p95": 36.928001791238785, + "p99": 40.41599854826927 + }, + "roundtrip": { + "p50": 2062.4639987945557, + "p90": 2065.1841163635254, + "p95": 2067.9678916931152, + "p99": 2091.871976852417 + }, + "isolatedSum": { + "p50": 63.679998740553856, + "p90": 68.70400160551071, + "p95": 71.80800288915634, + "p99": 79.67999950051308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 30.719999223947525, + "p90": 32.99200162291527, + "p95": 35.551998764276505, + "p99": 40.64000025391579 + }, + "combine": { + "p50": 32.735999673604965, + "p90": 35.00799834728241, + "p95": 36.3520011305809, + "p99": 43.807998299598694 + }, + "roundtrip": { + "p50": 2063.136100769043, + "p90": 2065.376043319702, + "p95": 2067.296028137207, + "p99": 2071.039915084839 + }, + "isolatedSum": { + "p50": 63.45599889755249, + "p90": 67.99999997019768, + "p95": 71.9039998948574, + "p99": 84.44799855351448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 32.35200047492981, + "p90": 46.65600135922432, + "p95": 47.42399975657463, + "p99": 53.279999643564224 + }, + "combine": { + "p50": 33.824000507593155, + "p90": 36.768000572919846, + "p95": 39.07199949026108, + "p99": 50.783999264240265 + }, + "roundtrip": { + "p50": 2064.095973968506, + "p90": 2066.9119358062744, + "p95": 2069.567918777466, + "p99": 2080.512046813965 + }, + "isolatedSum": { + "p50": 66.17600098252296, + "p90": 83.42400193214417, + "p95": 86.49599924683571, + "p99": 104.06399890780449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 31.90400078892708, + "p90": 34.04799848794937, + "p95": 35.74400022625923, + "p99": 39.77600112557411 + }, + "combine": { + "p50": 34.17599946260452, + "p90": 36.22400015592575, + "p95": 37.53599897027016, + "p99": 42.208001017570496 + }, + "roundtrip": { + "p50": 2065.279960632324, + "p90": 2068.416118621826, + "p95": 2070.6560611724854, + "p99": 2080.8000564575195 + }, + "isolatedSum": { + "p50": 66.0800002515316, + "p90": 70.27199864387512, + "p95": 73.27999919652939, + "p99": 81.98400214314461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 32.86400064826012, + "p90": 34.432001411914825, + "p95": 36.25600039958954, + "p99": 40.73600098490715 + }, + "combine": { + "p50": 37.88800165057182, + "p90": 44.67200115323067, + "p95": 46.30399867892265, + "p99": 69.24799829721451 + }, + "roundtrip": { + "p50": 2071.1679458618164, + "p90": 2079.5199871063232, + "p95": 2080.4800987243652, + "p99": 2085.439920425415 + }, + "isolatedSum": { + "p50": 70.75200229883194, + "p90": 79.10400256514549, + "p95": 82.55999907851219, + "p99": 109.98399928212166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 35.00799834728241, + "p90": 36.928001791238785, + "p95": 39.07199949026108, + "p99": 41.98399931192398 + }, + "combine": { + "p50": 43.68000105023384, + "p90": 45.72800174355507, + "p95": 46.879999339580536, + "p99": 52.480001002550125 + }, + "roundtrip": { + "p50": 2079.263925552368, + "p90": 2081.279993057251, + "p95": 2082.5281143188477, + "p99": 2086.1759185791016 + }, + "isolatedSum": { + "p50": 78.68799939751625, + "p90": 82.65600353479385, + "p95": 85.95199882984161, + "p99": 94.4640003144741 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 42.11200028657913, + "p90": 65.15199691057205, + "p95": 74.36800003051758, + "p99": 88.99199962615967 + }, + "combine": { + "p50": 58.9120015501976, + "p90": 63.87200206518173, + "p95": 64.80000168085098, + "p99": 71.45600020885468 + }, + "roundtrip": { + "p50": 2100.9280681610107, + "p90": 2110.1760864257812, + "p95": 2111.2639904022217, + "p99": 2114.367961883545 + }, + "isolatedSum": { + "p50": 101.02400183677673, + "p90": 129.02399897575378, + "p95": 139.16800171136856, + "p99": 160.44799983501434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 53.18399891257286, + "p90": 54.78399991989136, + "p95": 56.60799890756607, + "p99": 61.535999178886414 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 88.03199976682663, + "p95": 89.12000060081482, + "p99": 95.29600292444229 + }, + "roundtrip": { + "p50": 2140.671968460083, + "p90": 2143.5201168060303, + "p95": 2145.632028579712, + "p99": 2288.991928100586 + }, + "isolatedSum": { + "p50": 138.94399628043175, + "p90": 142.815999686718, + "p95": 145.7279995083809, + "p99": 156.8320021033287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d30dd2c", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_ac25b0a1", + "comparisonKey": "405d06288635d74f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:32:59.549027+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 LL (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254359089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", + "createdAt": "2026-06-26T17:27:42Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 30.432000756263733, + "p90": 32.32000023126602, + "p95": 34.143999218940735, + "p99": 38.015998899936676 + }, + "combine": { + "p50": 32.287999987602234, + "p90": 34.78400036692619, + "p95": 35.87200120091438, + "p99": 40.383998304605484 + }, + "roundtrip": { + "p50": 2063.9359951019287, + "p90": 2065.632104873657, + "p95": 2066.9760704040527, + "p99": 2069.6001052856445 + }, + "isolatedSum": { + "p50": 62.72000074386597, + "p90": 67.10400059819221, + "p95": 70.01600041985512, + "p99": 78.39999720454216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 30.368000268936157, + "p90": 32.09599852561951, + "p95": 34.01599824428558, + "p99": 37.248000502586365 + }, + "combine": { + "p50": 32.22399950027466, + "p90": 34.46400165557861, + "p95": 35.711999982595444, + "p99": 45.88799923658371 + }, + "roundtrip": { + "p50": 2064.768075942993, + "p90": 2067.13604927063, + "p95": 2069.024085998535, + "p99": 2083.7440490722656 + }, + "isolatedSum": { + "p50": 62.591999769210815, + "p90": 66.56000018119812, + "p95": 69.72799822688103, + "p99": 83.13599973917007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 30.527999624609947, + "p90": 32.54399821162224, + "p95": 35.26400029659271, + "p99": 40.063999593257904 + }, + "combine": { + "p50": 34.2399999499321, + "p90": 37.53599897027016, + "p95": 38.24000060558319, + "p99": 40.031999349594116 + }, + "roundtrip": { + "p50": 2065.376043319702, + "p90": 2067.3279762268066, + "p95": 2068.3200359344482, + "p99": 2070.5599784851074 + }, + "isolatedSum": { + "p50": 64.76799957454205, + "p90": 70.0799971818924, + "p95": 73.5040009021759, + "p99": 80.09599894285202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 31.231999397277832, + "p90": 33.055998384952545, + "p95": 35.61599925160408, + "p99": 38.94399851560593 + }, + "combine": { + "p50": 33.76000002026558, + "p90": 35.999998450279236, + "p95": 37.76000067591667, + "p99": 53.888000547885895 + }, + "roundtrip": { + "p50": 2066.528081893921, + "p90": 2068.511962890625, + "p95": 2069.6959495544434, + "p99": 2078.07993888855 + }, + "isolatedSum": { + "p50": 64.99199941754341, + "p90": 69.05599683523178, + "p95": 73.37599992752075, + "p99": 92.83199906349182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 32.51200169324875, + "p90": 34.20799970626831, + "p95": 36.86400130391121, + "p99": 40.09599983692169 + }, + "combine": { + "p50": 37.21600025892258, + "p90": 39.45599868893623, + "p95": 40.41599854826927, + "p99": 42.399998754262924 + }, + "roundtrip": { + "p50": 2071.392059326172, + "p90": 2074.687957763672, + "p95": 2078.7200927734375, + "p99": 2156.5120220184326 + }, + "isolatedSum": { + "p50": 69.72800195217133, + "p90": 73.66399839520454, + "p95": 77.27999985218048, + "p99": 82.49599859118462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 35.10399907827377, + "p90": 36.38400137424469, + "p95": 38.43199834227562, + "p99": 42.208001017570496 + }, + "combine": { + "p50": 42.7200011909008, + "p90": 44.89599913358688, + "p95": 45.66400125622749, + "p99": 48.70399832725525 + }, + "roundtrip": { + "p50": 2080.22403717041, + "p90": 2081.9520950317383, + "p95": 2083.359956741333, + "p99": 2118.4639930725098 + }, + "isolatedSum": { + "p50": 77.82400026917458, + "p90": 81.28000050783157, + "p95": 84.09599959850311, + "p99": 90.91199934482574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 42.047999799251556, + "p90": 47.90399968624115, + "p95": 48.8319993019104, + "p99": 53.119998425245285 + }, + "combine": { + "p50": 57.40800127387047, + "p90": 62.68800050020218, + "p95": 64.51199948787689, + "p99": 67.03999638557434 + }, + "roundtrip": { + "p50": 2100.5120277404785, + "p90": 2108.383893966675, + "p95": 2109.503984451294, + "p99": 2111.9039058685303 + }, + "isolatedSum": { + "p50": 99.45600107312202, + "p90": 110.59200018644333, + "p95": 113.34399878978729, + "p99": 120.15999481081963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 53.599998354911804, + "p90": 55.39200082421303, + "p95": 56.41600117087364, + "p99": 61.08799949288368 + }, + "combine": { + "p50": 83.5840031504631, + "p90": 86.11200004816055, + "p95": 87.2960016131401, + "p99": 91.51999652385712 + }, + "roundtrip": { + "p50": 2139.967918395996, + "p90": 2142.303943634033, + "p95": 2142.911911010742, + "p99": 2144.831895828247 + }, + "isolatedSum": { + "p50": 137.1840015053749, + "p90": 141.50400087237358, + "p95": 143.71200278401375, + "p99": 152.6079960167408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8265fe0e", + "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_ff7906f8", + "comparisonKey": "d0edce95a580d060", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:52:06.777183+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271688175", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271688175", + "createdAt": "2026-06-26T23:50:32Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.32000261545181, + "p90": 96.41599655151367, + "p95": 98.39999675750732, + "p99": 104.22399640083313 + }, + "combine": { + "p50": 86.97599917650223, + "p90": 88.41600269079208, + "p95": 89.50400352478027, + "p99": 93.31200271844864 + }, + "roundtrip": { + "p50": 156.73600137233734, + "p90": 160.70400178432465, + "p95": 161.6639941930771, + "p99": 166.04800522327423 + }, + "isolatedSum": { + "p50": 179.29600179195404, + "p90": 184.83199924230576, + "p95": 187.9040002822876, + "p99": 197.53599911928177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.10399663448334, + "p90": 133.98399949073792, + "p95": 135.96799969673157, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 112.99200356006622, + "p90": 121.47200107574463, + "p95": 122.01599776744843, + "p99": 128.35200130939484 + }, + "roundtrip": { + "p50": 202.72000133991241, + "p90": 217.6000028848648, + "p95": 219.39200162887573, + "p99": 223.7440049648285 + }, + "isolatedSum": { + "p50": 224.09600019454956, + "p90": 255.45600056648254, + "p95": 257.98399746418, + "p99": 268.3199942111969 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 153.08800339698792, + "p90": 166.9439971446991, + "p95": 168.67199540138245, + "p99": 175.55199563503265 + }, + "combine": { + "p50": 168.92799735069275, + "p90": 181.15200102329254, + "p95": 183.07200074195862, + "p99": 186.0480010509491 + }, + "roundtrip": { + "p50": 291.29600524902344, + "p90": 307.45598673820496, + "p95": 309.6959888935089, + "p99": 313.9199912548065 + }, + "isolatedSum": { + "p50": 322.01600074768066, + "p90": 348.09599816799164, + "p95": 351.74399614334106, + "p99": 361.59999668598175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 219.26400065422058, + "p90": 230.71999847888947, + "p95": 234.9119931459427, + "p99": 238.62400650978088 + }, + "combine": { + "p50": 274.04800057411194, + "p90": 280.5440127849579, + "p95": 281.69599175453186, + "p99": 284.1919958591461 + }, + "roundtrip": { + "p50": 467.4240052700043, + "p90": 473.2159972190857, + "p95": 475.8079946041107, + "p99": 479.2639911174774 + }, + "isolatedSum": { + "p50": 493.3120012283325, + "p90": 511.26401126384735, + "p95": 516.6079849004745, + "p99": 522.816002368927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 360.79999804496765, + "p90": 374.36801195144653, + "p95": 376.5760064125061, + "p99": 380.2880048751831 + }, + "combine": { + "p50": 465.88799357414246, + "p90": 475.77598690986633, + "p95": 478.4319996833801, + "p99": 481.53600096702576 + }, + "roundtrip": { + "p50": 799.1999983787537, + "p90": 816.6720271110535, + "p95": 819.8080062866211, + "p99": 824.7680068016052 + }, + "isolatedSum": { + "p50": 826.6879916191101, + "p90": 850.1439988613129, + "p95": 855.0080060958862, + "p99": 861.8240058422089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 638.975977897644, + "p90": 648.1279730796814, + "p95": 652.7040004730225, + "p99": 661.1520051956177 + }, + "combine": { + "p50": 848.4799861907959, + "p90": 856.8000197410583, + "p95": 859.5520257949829, + "p99": 898.5919952392578 + }, + "roundtrip": { + "p50": 1462.623953819275, + "p90": 1474.079966545105, + "p95": 1478.4959554672241, + "p99": 1489.3120527267456 + }, + "isolatedSum": { + "p50": 1487.45596408844, + "p90": 1504.9279928207397, + "p95": 1512.2560262680054, + "p99": 1559.7440004348755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2dcc1e5c", + "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_ff7906f8", + "comparisonKey": "69b861c40f88be42", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:59.492832+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271702702", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271702702", + "createdAt": "2026-06-26T23:50:59Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 99.45599734783173, + "p90": 105.05600273609161, + "p95": 106.04800283908844, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 95.58399766683578, + "p90": 97.47199714183807, + "p95": 98.39999675750732, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 170.33599317073822, + "p90": 175.10400712490082, + "p95": 177.85599827766418, + "p99": 179.58399653434753 + }, + "isolatedSum": { + "p50": 195.0399950146675, + "p90": 202.5279998779297, + "p95": 204.44799959659576, + "p99": 213.21599930524826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 119.29599940776825, + "p90": 124.22399967908859, + "p95": 126.30400061607361, + "p99": 130.5599957704544 + }, + "combine": { + "p50": 122.079998254776, + "p90": 127.80800461769104, + "p95": 128.67200374603271, + "p99": 132.9919993877411 + }, + "roundtrip": { + "p50": 219.32800114154816, + "p90": 223.1680005788803, + "p95": 224.5440036058426, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 241.37599766254425, + "p90": 252.03200429677963, + "p95": 254.97600436210632, + "p99": 263.5519951581955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 165.53600132465363, + "p90": 178.1120002269745, + "p95": 180.12799322605133, + "p99": 184.25600230693817 + }, + "combine": { + "p50": 190.46400487422943, + "p90": 198.71999323368073, + "p95": 200.9280025959015, + "p99": 213.79199624061584 + }, + "roundtrip": { + "p50": 325.76000690460205, + "p90": 331.07200264930725, + "p95": 332.73598551750183, + "p99": 336.1920118331909 + }, + "isolatedSum": { + "p50": 356.00000619888306, + "p90": 376.8319934606552, + "p95": 381.0559958219528, + "p99": 398.047998547554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 244.57600712776184, + "p90": 249.439999461174, + "p95": 253.56799364089966, + "p99": 409.56801176071167 + }, + "combine": { + "p50": 299.1040050983429, + "p90": 303.9360046386719, + "p95": 305.759996175766, + "p99": 311.0719919204712 + }, + "roundtrip": { + "p50": 515.7759785652161, + "p90": 522.2399830818176, + "p95": 524.1600275039673, + "p99": 528.8959741592407 + }, + "isolatedSum": { + "p50": 543.6800122261047, + "p90": 553.3760040998459, + "p95": 559.3279898166656, + "p99": 720.6400036811829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 413.1520092487335, + "p90": 423.0720102787018, + "p95": 426.2399971485138, + "p99": 432.5760006904602 + }, + "combine": { + "p50": 515.7439708709717, + "p90": 523.7119793891907, + "p95": 526.4319777488708, + "p99": 530.3360223770142 + }, + "roundtrip": { + "p50": 898.2080221176147, + "p90": 911.0400080680847, + "p95": 915.2960181236267, + "p99": 921.6639995574951 + }, + "isolatedSum": { + "p50": 928.8959801197052, + "p90": 946.7839896678925, + "p95": 952.6719748973846, + "p99": 962.9120230674744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 745.0559735298157, + "p90": 758.2719922065735, + "p95": 762.112021446228, + "p99": 772.4159955978394 + }, + "combine": { + "p50": 933.247983455658, + "p90": 941.9839978218079, + "p95": 945.1839923858643, + "p99": 951.3279795646667 + }, + "roundtrip": { + "p50": 1646.2719440460205, + "p90": 1661.9199514389038, + "p95": 1667.3599481582642, + "p99": 1685.7600212097168 + }, + "isolatedSum": { + "p50": 1678.3039569854736, + "p90": 1700.2559900283813, + "p95": 1707.2960138320923, + "p99": 1723.743975162506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-29bbdbee", + "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ff7906f8", + "comparisonKey": "4401899311d5e08c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:52:30.177352+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271717621", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271717621", + "createdAt": "2026-06-26T23:51:27Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.42399907112122, + "p90": 114.94400352239609, + "p95": 116.03199690580368, + "p99": 119.61600184440613 + }, + "combine": { + "p50": 98.33600372076035, + "p90": 103.71199995279312, + "p95": 104.67199981212616, + "p99": 106.4319983124733 + }, + "roundtrip": { + "p50": 184.9599927663803, + "p90": 188.63999843597412, + "p95": 189.66400623321533, + "p99": 194.11200284957886 + }, + "isolatedSum": { + "p50": 209.76000279188156, + "p90": 218.6560034751892, + "p95": 220.70399671792984, + "p99": 226.04800015687943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.86399841308594, + "p90": 137.7599984407425, + "p95": 139.3280029296875, + "p99": 142.4960047006607 + }, + "combine": { + "p50": 137.69599795341492, + "p90": 140.4159963130951, + "p95": 141.37600362300873, + "p99": 145.53600549697876 + }, + "roundtrip": { + "p50": 237.2480034828186, + "p90": 242.08000302314758, + "p95": 243.1039959192276, + "p99": 246.24000489711761 + }, + "isolatedSum": { + "p50": 270.55999636650085, + "p90": 278.1759947538376, + "p95": 280.7040065526962, + "p99": 288.03201019763947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 184.00000035762787, + "p90": 197.31199741363525, + "p95": 200.15999674797058, + "p99": 204.12799715995789 + }, + "combine": { + "p50": 209.6959948539734, + "p90": 216.86400473117828, + "p95": 217.92000532150269, + "p99": 221.95200622081757 + }, + "roundtrip": { + "p50": 365.02400040626526, + "p90": 377.21601128578186, + "p95": 380.5760145187378, + "p99": 388.12801241874695 + }, + "isolatedSum": { + "p50": 393.69599521160126, + "p90": 414.17600214481354, + "p95": 418.08000206947327, + "p99": 426.08000338077545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 273.21600914001465, + "p90": 277.44001150131226, + "p95": 279.87200021743774, + "p99": 289.3120050430298 + }, + "combine": { + "p50": 332.41599798202515, + "p90": 337.119996547699, + "p95": 338.20798993110657, + "p99": 341.66398644447327 + }, + "roundtrip": { + "p50": 577.6320099830627, + "p90": 582.751989364624, + "p95": 584.7679972648621, + "p99": 588.7680053710938 + }, + "isolatedSum": { + "p50": 605.6320071220398, + "p90": 614.5600080490112, + "p95": 618.0799901485443, + "p99": 630.975991487503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 464.32000398635864, + "p90": 473.60000014305115, + "p95": 477.3760139942169, + "p99": 648.8320231437683 + }, + "combine": { + "p50": 584.384024143219, + "p90": 590.9119844436646, + "p95": 593.0560231208801, + "p99": 596.8000292778015 + }, + "roundtrip": { + "p50": 1019.2320346832275, + "p90": 1029.6640396118164, + "p95": 1033.7599515914917, + "p99": 1037.984013557434 + }, + "isolatedSum": { + "p50": 1048.7040281295776, + "p90": 1064.5119845867157, + "p95": 1070.432037115097, + "p99": 1245.6320524215698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 879.423975944519, + "p90": 904.6720266342163, + "p95": 913.2480025291443, + "p99": 928.991973400116 + }, + "combine": { + "p50": 1065.6960010528564, + "p90": 1075.3920078277588, + "p95": 1078.3040523529053, + "p99": 1084.2560529708862 + }, + "roundtrip": { + "p50": 1901.9520282745361, + "p90": 1920.7359552383423, + "p95": 1926.5919923782349, + "p99": 1940.1600360870361 + }, + "isolatedSum": { + "p50": 1945.1199769973755, + "p90": 1980.064034461975, + "p95": 1991.5520548820496, + "p99": 2013.2480263710022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d524fd7e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", + "colorKey": "h100_42947950", + "comparisonKey": "4c920ba7523ac63b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:28.917588+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "157ca81687ddb63", + "workloadId": "set:3:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271785174", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271785174", + "createdAt": "2026-06-26T23:53:30Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.52800297737122, + "p90": 135.55200397968292, + "p95": 138.43199610710144, + "p99": 176.79999768733978 + }, + "combine": { + "p50": 113.8560026884079, + "p90": 120.86399644613266, + "p95": 122.11199849843979, + "p99": 145.50399780273438 + }, + "roundtrip": { + "p50": 209.05600488185883, + "p90": 217.56799519062042, + "p95": 219.200000166893, + "p99": 275.04000067710876 + }, + "isolatedSum": { + "p50": 244.3840056657791, + "p90": 256.4160004258156, + "p95": 260.54399460554123, + "p99": 322.30399549007416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 210.27199923992157, + "p90": 217.056006193161, + "p95": 220.22399306297302, + "p99": 256.99201226234436 + }, + "combine": { + "p50": 234.9119931459427, + "p90": 241.40800535678864, + "p95": 244.9920028448105, + "p99": 262.9759907722473 + }, + "roundtrip": { + "p50": 412.54401206970215, + "p90": 420.9280014038086, + "p95": 423.0720102787018, + "p99": 427.35999822616577 + }, + "isolatedSum": { + "p50": 445.18399238586426, + "p90": 458.46401154994965, + "p95": 465.2159959077835, + "p99": 519.9680030345917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.5920162200928, + "p90": 541.4720177650452, + "p95": 545.9200143814087, + "p99": 552.3520112037659 + }, + "combine": { + "p50": 637.5679969787598, + "p90": 649.6959924697876, + "p95": 652.6079773902893, + "p99": 661.0879898071289 + }, + "roundtrip": { + "p50": 1134.6240043640137, + "p90": 1146.880030632019, + "p95": 1151.2320041656494, + "p99": 1158.5919857025146 + }, + "isolatedSum": { + "p50": 1164.1600131988525, + "p90": 1191.1680102348328, + "p95": 1198.527991771698, + "p99": 1213.4400010108948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-efe3a643", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_42947950", + "comparisonKey": "4c920ba7523ac63b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:47:28.966623+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271547494", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271547494", + "createdAt": "2026-06-26T23:46:11Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.84000223875046, + "p90": 124.15999919176102, + "p95": 131.1360001564026, + "p99": 137.66400516033173 + }, + "combine": { + "p50": 106.6880002617836, + "p90": 114.30399864912033, + "p95": 120.09599804878235, + "p99": 123.03999811410904 + }, + "roundtrip": { + "p50": 199.0399956703186, + "p90": 207.58399367332458, + "p95": 216.3199931383133, + "p99": 222.1119999885559 + }, + "isolatedSum": { + "p50": 218.52800250053406, + "p90": 238.46399784088135, + "p95": 251.23199820518494, + "p99": 260.70400327444077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.97600090503693, + "p90": 152.3520052433014, + "p95": 161.28000617027283, + "p99": 169.21600699424744 + }, + "combine": { + "p50": 150.176003575325, + "p90": 155.68000078201294, + "p95": 162.36799955368042, + "p99": 171.26399278640747 + }, + "roundtrip": { + "p50": 263.2319927215576, + "p90": 269.72800493240356, + "p95": 276.0320007801056, + "p99": 290.5920147895813 + }, + "isolatedSum": { + "p50": 293.15200448036194, + "p90": 308.03200602531433, + "p95": 323.64800572395325, + "p99": 340.4799997806549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.8640021085739, + "p90": 211.39200031757355, + "p95": 214.27200734615326, + "p99": 220.96000611782074 + }, + "combine": { + "p50": 229.72799837589264, + "p90": 236.67199909687042, + "p95": 238.71999979019165, + "p99": 246.2719976902008 + }, + "roundtrip": { + "p50": 400.86400508880615, + "p90": 413.5040044784546, + "p95": 418.94400119781494, + "p99": 428.51200699806213 + }, + "isolatedSum": { + "p50": 430.59200048446655, + "p90": 448.06399941444397, + "p95": 452.9920071363449, + "p99": 467.23200380802155 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.1040132045746, + "p90": 308.9280128479004, + "p95": 311.2959861755371, + "p99": 318.015992641449 + }, + "combine": { + "p50": 365.9839928150177, + "p90": 372.8959858417511, + "p95": 375.39198994636536, + "p99": 382.4320137500763 + }, + "roundtrip": { + "p50": 644.8000073432922, + "p90": 654.528021812439, + "p95": 657.8879952430725, + "p99": 668.4799790382385 + }, + "isolatedSum": { + "p50": 669.0880060195923, + "p90": 681.8239986896515, + "p95": 686.6879761219025, + "p99": 700.4480063915253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.8800258636475, + "p90": 540.5759811401367, + "p95": 545.0239777565002, + "p99": 551.6160130500793 + }, + "combine": { + "p50": 638.0159854888916, + "p90": 650.2400040626526, + "p95": 653.1519889831543, + "p99": 660.1920127868652 + }, + "roundtrip": { + "p50": 1135.424017906189, + "p90": 1147.7760076522827, + "p95": 1151.0720252990723, + "p99": 1157.5039625167847 + }, + "isolatedSum": { + "p50": 1164.896011352539, + "p90": 1190.8159852027893, + "p95": 1198.1759667396545, + "p99": 1211.8080258369446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1005.2160024642944, + "p90": 1027.2639989852905, + "p95": 1033.5359573364258, + "p99": 1050.271987915039 + }, + "combine": { + "p50": 1168.511986732483, + "p90": 1181.7599534988403, + "p95": 1189.1520023345947, + "p99": 1202.015995979309 + }, + "roundtrip": { + "p50": 2131.455898284912, + "p90": 2150.815963745117, + "p95": 2158.112049102783, + "p99": 2167.3600673675537 + }, + "isolatedSum": { + "p50": 2173.7279891967773, + "p90": 2209.023952484131, + "p95": 2222.6879596710205, + "p99": 2252.287983894348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a96205b", + "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ff7906f8", + "comparisonKey": "6a625438eb544ee8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:12.079136+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271563151", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271563151", + "createdAt": "2026-06-26T23:46:38Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.61600053310394, + "p90": 117.3119992017746, + "p95": 118.81600320339203, + "p99": 123.74400347471237 + }, + "combine": { + "p50": 105.85600137710571, + "p90": 107.07200318574905, + "p95": 111.16799712181091, + "p99": 113.8560026884079 + }, + "roundtrip": { + "p50": 193.02399456501007, + "p90": 199.52000677585602, + "p95": 200.9280025959015, + "p99": 204.96000349521637 + }, + "isolatedSum": { + "p50": 217.47200191020966, + "p90": 224.38400238752365, + "p95": 229.98400032520294, + "p99": 237.60000616312027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.23200285434723, + "p90": 147.5200057029724, + "p95": 148.6400067806244, + "p99": 152.28800475597382 + }, + "combine": { + "p50": 148.76799285411835, + "p90": 154.4640064239502, + "p95": 155.29599785804749, + "p99": 156.76799416542053 + }, + "roundtrip": { + "p50": 262.33598589897156, + "p90": 266.431987285614, + "p95": 268.12800765037537, + "p99": 271.1679935455322 + }, + "isolatedSum": { + "p50": 291.9999957084656, + "p90": 301.9840121269226, + "p95": 303.9360046386719, + "p99": 309.05599892139435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 196.25599682331085, + "p90": 201.1840045452118, + "p95": 202.72000133991241, + "p99": 214.84799683094025 + }, + "combine": { + "p50": 230.49600422382355, + "p90": 236.12800240516663, + "p95": 237.2799962759018, + "p99": 241.15200340747833 + }, + "roundtrip": { + "p50": 403.0719995498657, + "p90": 408.3839952945709, + "p95": 410.14400124549866, + "p99": 412.76800632476807 + }, + "isolatedSum": { + "p50": 426.7520010471344, + "p90": 437.3120069503784, + "p95": 439.9999976158142, + "p99": 456.0000002384186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 301.6960024833679, + "p90": 306.43200874328613, + "p95": 307.9040050506592, + "p99": 312.1280074119568 + }, + "combine": { + "p50": 364.1279935836792, + "p90": 369.4399893283844, + "p95": 372.0319867134094, + "p99": 374.9760091304779 + }, + "roundtrip": { + "p50": 640.064001083374, + "p90": 646.8160152435303, + "p95": 648.5120058059692, + "p99": 653.6960005760193 + }, + "isolatedSum": { + "p50": 665.8239960670471, + "p90": 675.8719980716705, + "p95": 679.9359917640686, + "p99": 687.1040165424347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.1599841117859, + "p90": 540.7040119171143, + "p95": 544.1280007362366, + "p99": 549.2799878120422 + }, + "combine": { + "p50": 637.503981590271, + "p90": 645.5039978027344, + "p95": 647.7760076522827, + "p99": 653.9520025253296 + }, + "roundtrip": { + "p50": 1141.9199705123901, + "p90": 1154.4320583343506, + "p95": 1160.1920127868652, + "p99": 1180.9600591659546 + }, + "isolatedSum": { + "p50": 1169.6639657020569, + "p90": 1186.2080097198486, + "p95": 1191.9040083885193, + "p99": 1203.2319903373718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 993.9200282096863, + "p90": 1017.2799825668335, + "p95": 1023.4240293502808, + "p99": 1036.8319749832153 + }, + "combine": { + "p50": 1165.0559902191162, + "p90": 1175.3599643707275, + "p95": 1177.9520511627197, + "p99": 1283.2640409469604 + }, + "roundtrip": { + "p50": 2117.6319122314453, + "p90": 2134.848117828369, + "p95": 2139.6799087524414, + "p99": 2151.5839099884033 + }, + "isolatedSum": { + "p50": 2158.9760184288025, + "p90": 2192.639946937561, + "p95": 2201.3760805130005, + "p99": 2320.096015930176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-32c90de8", + "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_ff7906f8", + "comparisonKey": "db866d0065c2a509", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:05.825406+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271671786", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271671786", + "createdAt": "2026-06-26T23:50:04Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.96000331640244, + "p90": 118.78400295972824, + "p95": 120.28799951076508, + "p99": 130.40000200271606 + }, + "combine": { + "p50": 106.1440035700798, + "p90": 109.15199667215347, + "p95": 110.30399799346924, + "p99": 114.49600011110306 + }, + "roundtrip": { + "p50": 196.99199497699738, + "p90": 201.34399831295013, + "p95": 202.94399559497833, + "p99": 206.04799687862396 + }, + "isolatedSum": { + "p50": 219.10400688648224, + "p90": 227.9359996318817, + "p95": 230.5919975042343, + "p99": 244.89600211381912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.21599626541138, + "p90": 155.03999590873718, + "p95": 157.05600380897522, + "p99": 159.4880074262619 + }, + "combine": { + "p50": 153.50399911403656, + "p90": 158.62399339675903, + "p95": 160.25599837303162, + "p99": 165.15199840068817 + }, + "roundtrip": { + "p50": 270.3999876976013, + "p90": 284.0000092983246, + "p95": 285.69599986076355, + "p99": 288.9600098133087 + }, + "isolatedSum": { + "p50": 302.71999537944794, + "p90": 313.6639893054962, + "p95": 317.31200218200684, + "p99": 324.6400058269501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.92000269889832, + "p90": 212.5760018825531, + "p95": 214.59199488162994, + "p99": 217.8560048341751 + }, + "combine": { + "p50": 229.5999974012375, + "p90": 237.92000114917755, + "p95": 241.2479966878891, + "p99": 245.2159970998764 + }, + "roundtrip": { + "p50": 404.2240083217621, + "p90": 417.5359904766083, + "p95": 419.3919897079468, + "p99": 424.1600036621094 + }, + "isolatedSum": { + "p50": 431.5200001001358, + "p90": 450.49600303173065, + "p95": 455.83999156951904, + "p99": 463.0720019340515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.83201146125793, + "p90": 315.39198756217957, + "p95": 317.6319897174835, + "p99": 320.51199674606323 + }, + "combine": { + "p50": 367.48799681663513, + "p90": 376.96000933647156, + "p95": 381.9200098514557, + "p99": 392.192006111145 + }, + "roundtrip": { + "p50": 644.7039842605591, + "p90": 655.456006526947, + "p95": 677.951991558075, + "p99": 919.8399782180786 + }, + "isolatedSum": { + "p50": 672.3200082778931, + "p90": 692.3519968986511, + "p95": 699.5519995689392, + "p99": 712.7040028572083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 521.2799906730652, + "p90": 536.4800095558167, + "p95": 540.224015712738, + "p99": 549.3119955062866 + }, + "combine": { + "p50": 632.4160099029541, + "p90": 640.7679915428162, + "p95": 643.3600187301636, + "p99": 651.4559984207153 + }, + "roundtrip": { + "p50": 1126.431941986084, + "p90": 1137.8240585327148, + "p95": 1141.5679454803467, + "p99": 1157.6000452041626 + }, + "isolatedSum": { + "p50": 1153.6960005760193, + "p90": 1177.2480010986328, + "p95": 1183.5840344429016, + "p99": 1200.767993927002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1011.2960338592529, + "p90": 1036.895990371704, + "p95": 1044.3840026855469, + "p99": 1057.088017463684 + }, + "combine": { + "p50": 1154.8160314559937, + "p90": 1163.9360189437866, + "p95": 1166.5279865264893, + "p99": 1172.160029411316 + }, + "roundtrip": { + "p50": 2122.7200031280518, + "p90": 2144.9921131134033, + "p95": 2150.559902191162, + "p99": 2167.6158905029297 + }, + "isolatedSum": { + "p50": 2166.1120653152466, + "p90": 2200.8320093154907, + "p95": 2210.911989212036, + "p99": 2229.248046875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3c52549e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_16047c28", + "comparisonKey": "987d0ef30063bb5c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:36.290170+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271938768", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271938768", + "createdAt": "2026-06-26T23:58:32Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.23200023174286, + "p90": 131.52000308036804, + "p95": 133.08799266815186, + "p99": 136.3839954137802 + }, + "combine": { + "p50": 126.11199915409088, + "p90": 130.62399625778198, + "p95": 131.48799538612366, + "p99": 133.98399949073792 + }, + "roundtrip": { + "p50": 233.43999683856964, + "p90": 236.76800727844238, + "p95": 237.40799725055695, + "p99": 240.4160052537918 + }, + "isolatedSum": { + "p50": 253.34399938583374, + "p90": 262.14399933815, + "p95": 264.5759880542755, + "p99": 270.3679949045181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 180.7039976119995, + "p90": 191.3280040025711, + "p95": 193.08799505233765, + "p99": 197.28000462055206 + }, + "combine": { + "p50": 183.26400220394135, + "p90": 190.97599387168884, + "p95": 192.3840045928955, + "p99": 197.66399264335632 + }, + "roundtrip": { + "p50": 332.15999603271484, + "p90": 344.35200691223145, + "p95": 346.3680148124695, + "p99": 348.83201122283936 + }, + "isolatedSum": { + "p50": 363.96799981594086, + "p90": 382.30399787425995, + "p95": 385.47199964523315, + "p99": 394.9439972639084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 272.41599559783936, + "p90": 284.0000092983246, + "p95": 286.46400570869446, + "p99": 290.1439964771271 + }, + "combine": { + "p50": 276.2239873409271, + "p90": 285.0880026817322, + "p95": 286.8799865245819, + "p99": 294.624000787735 + }, + "roundtrip": { + "p50": 519.648015499115, + "p90": 533.2159996032715, + "p95": 535.1999998092651, + "p99": 538.0480289459229 + }, + "isolatedSum": { + "p50": 548.6399829387665, + "p90": 569.0880119800568, + "p95": 573.3439922332764, + "p99": 584.7679972648621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 450.3679871559143, + "p90": 462.14398741722107, + "p95": 464.2559885978699, + "p99": 469.34399008750916 + }, + "combine": { + "p50": 469.11999583244324, + "p90": 477.53599286079407, + "p95": 479.0720045566559, + "p99": 484.0959906578064 + }, + "roundtrip": { + "p50": 892.3839926719666, + "p90": 904.3520092964172, + "p95": 909.0560078620911, + "p99": 1079.967975616455 + }, + "isolatedSum": { + "p50": 919.4879829883575, + "p90": 939.6799802780151, + "p95": 943.3279931545258, + "p99": 953.4399807453156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 810.7200264930725, + "p90": 828.607976436615, + "p95": 831.3599824905396, + "p99": 837.2480273246765 + }, + "combine": { + "p50": 854.8160195350647, + "p90": 863.6159896850586, + "p95": 865.9840226173401, + "p99": 870.3359961509705 + }, + "roundtrip": { + "p50": 1635.583996772766, + "p90": 1645.0239419937134, + "p95": 1648.095965385437, + "p99": 1656.7679643630981 + }, + "isolatedSum": { + "p50": 1665.5360460281372, + "p90": 1692.2239661216736, + "p95": 1697.3440051078796, + "p99": 1707.584023475647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1546.623945236206, + "p90": 1554.0159940719604, + "p95": 1556.3839673995972, + "p99": 1562.559962272644 + }, + "combine": { + "p50": 1599.552035331726, + "p90": 1609.2480421066284, + "p95": 1612.4800443649292, + "p99": 1621.6000318527222 + }, + "roundtrip": { + "p50": 3122.015953063965, + "p90": 3132.4799060821533, + "p95": 3136.352062225342, + "p99": 3144.4480419158936 + }, + "isolatedSum": { + "p50": 3146.175980567932, + "p90": 3163.264036178589, + "p95": 3168.8640117645264, + "p99": 3184.159994125366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-05271e8a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", + "colorKey": "h100_16047c28", + "comparisonKey": "987d0ef30063bb5c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:32.762651+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "9e6ac678a09f7f8", + "workloadId": "set:3:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271791847", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271791847", + "createdAt": "2026-06-26T23:53:43Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.9840008020401, + "p90": 148.15999567508698, + "p95": 150.43200552463531, + "p99": 159.71200168132782 + }, + "combine": { + "p50": 131.77600502967834, + "p90": 138.7840062379837, + "p95": 139.80799913406372, + "p99": 147.07200229167938 + }, + "roundtrip": { + "p50": 243.1039959192276, + "p90": 250.71999430656433, + "p95": 252.03201174736023, + "p99": 257.9840123653412 + }, + "isolatedSum": { + "p50": 273.76000583171844, + "p90": 286.9440019130707, + "p95": 290.24000465869904, + "p99": 306.7840039730072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 282.20799565315247, + "p90": 291.04000329971313, + "p95": 293.3439910411835, + "p99": 299.3920147418976 + }, + "combine": { + "p50": 282.71999955177307, + "p90": 287.4560058116913, + "p95": 288.9600098133087, + "p99": 297.5040078163147 + }, + "roundtrip": { + "p50": 530.239999294281, + "p90": 536.9600057601929, + "p95": 540.0320291519165, + "p99": 549.3119955062866 + }, + "isolatedSum": { + "p50": 564.9279952049255, + "p90": 578.4960091114044, + "p95": 582.3040008544922, + "p99": 596.8960225582123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 815.7439827919006, + "p90": 825.2800107002258, + "p95": 828.5760283470154, + "p99": 835.0080251693726 + }, + "combine": { + "p50": 857.9840064048767, + "p90": 866.27197265625, + "p95": 869.6320056915283, + "p99": 877.8560161590576 + }, + "roundtrip": { + "p50": 1642.5280570983887, + "p90": 1654.5920372009277, + "p95": 1658.944010734558, + "p99": 1692.7039623260498 + }, + "isolatedSum": { + "p50": 1673.7279891967773, + "p90": 1691.5519833564758, + "p95": 1698.2080340385437, + "p99": 1712.8640413284302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06b4b084", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_0c515f8b", + "comparisonKey": "e2c5b47e428e10b6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:50.950252+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": "set:3:388ff74baef05c72", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271798809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271798809", + "createdAt": "2026-06-26T23:53:57Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.52799838781357, + "p90": 106.52799904346466, + "p95": 108.31999778747559, + "p99": 112.44799941778183 + }, + "combine": { + "p50": 81.31200075149536, + "p90": 88.128000497818, + "p95": 88.48000317811966, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 155.32800555229187, + "p90": 160.92799603939056, + "p95": 161.79199516773224, + "p99": 165.40800034999847 + }, + "isolatedSum": { + "p50": 183.83999913930893, + "p90": 194.65599954128265, + "p95": 196.80000096559525, + "p99": 202.94400304555893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 124.95999783277512, + "p90": 130.36799430847168, + "p95": 131.9040060043335, + "p99": 142.17600226402283 + }, + "combine": { + "p50": 128.7039965391159, + "p90": 130.43199479579926, + "p95": 136.80000603199005, + "p99": 147.67999947071075 + }, + "roundtrip": { + "p50": 216.25599265098572, + "p90": 220.57600319385529, + "p95": 223.4880030155182, + "p99": 267.8399980068207 + }, + "isolatedSum": { + "p50": 253.66399437189102, + "p90": 260.79998910427094, + "p95": 268.70401203632355, + "p99": 289.8560017347336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 205.6639939546585, + "p90": 211.13599836826324, + "p95": 216.48000180721283, + "p99": 269.1200077533722 + }, + "combine": { + "p50": 295.80798745155334, + "p90": 300.54399371147156, + "p95": 305.2160143852234, + "p99": 337.3439908027649 + }, + "roundtrip": { + "p50": 464.4800126552582, + "p90": 471.45599126815796, + "p95": 474.047988653183, + "p99": 503.35997343063354 + }, + "isolatedSum": { + "p50": 501.47198140621185, + "p90": 511.6799920797348, + "p95": 521.6960161924362, + "p99": 606.4639985561371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4058f6f5", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_c0c0ad86", + "comparisonKey": "252e0af9287be53d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:35.979250+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271942138", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271942138", + "createdAt": "2026-06-26T23:58:39Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.14400160312653, + "p90": 98.01600128412247, + "p95": 99.74399954080582, + "p99": 103.29599678516388 + }, + "combine": { + "p50": 83.03999900817871, + "p90": 88.22400122880936, + "p95": 89.15200084447861, + "p99": 90.81599861383438 + }, + "roundtrip": { + "p50": 157.79200196266174, + "p90": 161.9199961423874, + "p95": 163.5199934244156, + "p99": 167.67999529838562 + }, + "isolatedSum": { + "p50": 177.18400061130524, + "p90": 186.24000251293182, + "p95": 188.89600038528442, + "p99": 194.11199539899826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 122.40000069141388, + "p90": 129.95199859142303, + "p95": 143.10400187969208, + "p99": 173.95199835300446 + }, + "combine": { + "p50": 104.41599786281586, + "p90": 106.65600001811981, + "p95": 120.51200121641159, + "p99": 144.28800344467163 + }, + "roundtrip": { + "p50": 198.43199849128723, + "p90": 202.36800611019135, + "p95": 205.1839977502823, + "p99": 235.32800376415253 + }, + "isolatedSum": { + "p50": 226.81599855422974, + "p90": 236.60799860954285, + "p95": 263.61600309610367, + "p99": 318.2400017976761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 150.43200552463531, + "p90": 154.7199934720993, + "p95": 158.4320068359375, + "p99": 386.1120045185089 + }, + "combine": { + "p50": 141.15199446678162, + "p90": 145.91999351978302, + "p95": 146.55999839305878, + "p99": 147.5200057029724 + }, + "roundtrip": { + "p50": 266.1440074443817, + "p90": 274.9119997024536, + "p95": 278.3679962158203, + "p99": 286.9440019130707 + }, + "isolatedSum": { + "p50": 291.58399999141693, + "p90": 300.6399869918823, + "p95": 304.9920052289963, + "p99": 533.6320102214813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 207.61600136756897, + "p90": 213.44000101089478, + "p95": 217.98400580883026, + "p99": 245.5040067434311 + }, + "combine": { + "p50": 219.93599832057953, + "p90": 225.0880002975464, + "p95": 227.2000014781952, + "p99": 244.86400187015533 + }, + "roundtrip": { + "p50": 405.023992061615, + "p90": 410.0480079650879, + "p95": 412.31998801231384, + "p99": 437.6640021800995 + }, + "isolatedSum": { + "p50": 427.5519996881485, + "p90": 438.52800130844116, + "p95": 445.18400728702545, + "p99": 490.3680086135864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 329.47200536727905, + "p90": 336.60799264907837, + "p95": 339.04001116752625, + "p99": 460.4159891605377 + }, + "combine": { + "p50": 368.3199882507324, + "p90": 375.2639889717102, + "p95": 377.6960074901581, + "p99": 383.07198882102966 + }, + "roundtrip": { + "p50": 670.0159907341003, + "p90": 675.8400201797485, + "p95": 678.3360242843628, + "p99": 682.3359727859497 + }, + "isolatedSum": { + "p50": 697.7919936180115, + "p90": 711.8719816207886, + "p95": 716.7360186576843, + "p99": 843.4879779815674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 580.8960199356079, + "p90": 590.1119709014893, + "p95": 592.6079750061035, + "p99": 597.5040197372437 + }, + "combine": { + "p50": 647.9039788246155, + "p90": 655.0719738006592, + "p95": 657.2480201721191, + "p99": 660.863995552063 + }, + "roundtrip": { + "p50": 1207.4559926986694, + "p90": 1217.087984085083, + "p95": 1224.0639925003052, + "p99": 1241.312026977539 + }, + "isolatedSum": { + "p50": 1228.7999987602234, + "p90": 1245.1839447021484, + "p95": 1249.8559951782227, + "p99": 1258.3680152893066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b89c63a5", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "colorKey": "h100_b654f9b2", + "comparisonKey": "37db9a5137981152", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:36.358305+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "38fd0bcf7109c32", + "workloadId": "set:3:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271820121", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271820121", + "createdAt": "2026-06-26T23:54:38Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.61600250005722, + "p90": 127.48800218105316, + "p95": 131.1040073633194, + "p99": 136.19199395179749 + }, + "combine": { + "p50": 116.95999652147293, + "p90": 122.46400117874146, + "p95": 124.95999783277512, + "p99": 131.26400113105774 + }, + "roundtrip": { + "p50": 217.72800385951996, + "p90": 224.89599883556366, + "p95": 229.24800217151642, + "p99": 245.37600576877594 + }, + "isolatedSum": { + "p50": 240.57599902153015, + "p90": 249.95200335979462, + "p95": 256.0640051960945, + "p99": 267.4559950828552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.91200125217438, + "p90": 257.24801421165466, + "p95": 259.2960000038147, + "p99": 261.9520127773285 + }, + "combine": { + "p50": 271.93599939346313, + "p90": 282.1759879589081, + "p95": 284.8320007324219, + "p99": 288.5119915008545 + }, + "roundtrip": { + "p50": 486.04801297187805, + "p90": 500.8959770202637, + "p95": 503.55201959609985, + "p99": 509.2160105705261 + }, + "isolatedSum": { + "p50": 510.8480006456375, + "p90": 539.4240021705627, + "p95": 544.1280007362366, + "p99": 550.464004278183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 718.3039784431458, + "p90": 732.3840260505676, + "p95": 736.3520264625549, + "p99": 740.4159903526306 + }, + "combine": { + "p50": 829.9520015716553, + "p90": 838.047981262207, + "p95": 840.2559757232666, + "p99": 846.6879725456238 + }, + "roundtrip": { + "p50": 1516.2559747695923, + "p90": 1525.3759622573853, + "p95": 1528.223991394043, + "p99": 1535.2319478988647 + }, + "isolatedSum": { + "p50": 1548.255980014801, + "p90": 1570.4320073127747, + "p95": 1576.6080021858215, + "p99": 1587.1039628982544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fa73d33e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_b654f9b2", + "comparisonKey": "37db9a5137981152", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:55.460957+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": "set:6:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272012738", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272012738", + "createdAt": "2026-06-27T00:00:49Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.08799946308136, + "p90": 131.42399489879608, + "p95": 132.03200697898865, + "p99": 135.903999209404 + }, + "combine": { + "p50": 119.87199634313583, + "p90": 121.98399752378464, + "p95": 122.36800044775009, + "p99": 125.72799623012543 + }, + "roundtrip": { + "p50": 219.200000166893, + "p90": 223.80800545215607, + "p95": 224.7679978609085, + "p99": 228.0000001192093 + }, + "isolatedSum": { + "p50": 248.9599958062172, + "p90": 253.40799242258072, + "p95": 254.40000742673874, + "p99": 261.6319954395294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.96799683570862, + "p90": 168.19199919700623, + "p95": 170.43200135231018, + "p99": 173.12000691890717 + }, + "combine": { + "p50": 171.55200242996216, + "p90": 176.83200538158417, + "p95": 178.3680021762848, + "p99": 180.60800433158875 + }, + "roundtrip": { + "p50": 306.7840039730072, + "p90": 310.94399094581604, + "p95": 312.3199939727783, + "p99": 314.7839903831482 + }, + "isolatedSum": { + "p50": 335.5199992656708, + "p90": 345.0240045785904, + "p95": 348.80000352859497, + "p99": 353.7280112504959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.34399676322937, + "p90": 242.11199581623077, + "p95": 244.1920042037964, + "p99": 248.28800559043884 + }, + "combine": { + "p50": 268.22400093078613, + "p90": 273.53599667549133, + "p95": 274.84801411628723, + "p99": 277.69601345062256 + }, + "roundtrip": { + "p50": 482.7519953250885, + "p90": 488.44799399375916, + "p95": 490.4319941997528, + "p99": 495.07200717926025 + }, + "isolatedSum": { + "p50": 505.5679976940155, + "p90": 515.6479924917221, + "p95": 519.0400183200836, + "p99": 525.9840190410614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 391.61598682403564, + "p90": 397.5679874420166, + "p95": 399.9040126800537, + "p99": 407.1039855480194 + }, + "combine": { + "p50": 455.6480050086975, + "p90": 461.5359902381897, + "p95": 463.0720019340515, + "p99": 466.5600061416626 + }, + "roundtrip": { + "p50": 823.2960104942322, + "p90": 829.5040130615234, + "p95": 831.5839767456055, + "p99": 835.4560136795044 + }, + "isolatedSum": { + "p50": 847.2639918327332, + "p90": 859.1039776802063, + "p95": 862.9760146141052, + "p99": 873.663991689682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 720.6720113754272, + "p90": 733.6320281028748, + "p95": 737.5680208206177, + "p99": 744.9280023574829 + }, + "combine": { + "p50": 825.7279992103577, + "p90": 834.559977054596, + "p95": 837.3759984970093, + "p99": 841.2479758262634 + }, + "roundtrip": { + "p50": 1514.240026473999, + "p90": 1523.7120389938354, + "p95": 1526.6239643096924, + "p99": 1534.3999862670898 + }, + "isolatedSum": { + "p50": 1546.400010585785, + "p90": 1568.1920051574707, + "p95": 1574.944019317627, + "p99": 1586.1759781837463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1379.1359663009644, + "p90": 1390.1439905166626, + "p95": 1393.280029296875, + "p99": 1400.480031967163 + }, + "combine": { + "p50": 1540.5759811401367, + "p90": 1547.4879741668701, + "p95": 1549.7599840164185, + "p99": 1553.1519651412964 + }, + "roundtrip": { + "p50": 2893.3119773864746, + "p90": 2902.30393409729, + "p95": 2905.695915222168, + "p99": 2912.480115890503 + }, + "isolatedSum": { + "p50": 2919.711947441101, + "p90": 2937.6319646835327, + "p95": 2943.0400133132935, + "p99": 2953.6319971084595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e91dfe75", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_456a963c", + "comparisonKey": "54b53207b090a644", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:57.841646+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": "set:6:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272016505", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272016505", + "createdAt": "2026-06-27T00:00:56Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.69600081443787, + "p90": 113.98400366306305, + "p95": 115.77600240707397, + "p99": 122.43200093507767 + }, + "combine": { + "p50": 105.50399869680405, + "p90": 111.10399663448334, + "p95": 112.31999844312668, + "p99": 114.27199840545654 + }, + "roundtrip": { + "p50": 196.6720074415207, + "p90": 203.2960057258606, + "p95": 204.0960043668747, + "p99": 207.64799416065216 + }, + "isolatedSum": { + "p50": 215.1999995112419, + "p90": 225.0880002975464, + "p95": 228.09600085020065, + "p99": 236.7039993405342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.44799721240997, + "p90": 149.63200688362122, + "p95": 151.2320041656494, + "p99": 155.83999454975128 + }, + "combine": { + "p50": 152.0639955997467, + "p90": 153.60000729560852, + "p95": 154.4640064239502, + "p99": 158.52800011634827 + }, + "roundtrip": { + "p50": 265.0560140609741, + "p90": 268.92799139022827, + "p95": 270.687997341156, + "p99": 273.21600914001465 + }, + "isolatedSum": { + "p50": 296.5119928121567, + "p90": 303.23201417922974, + "p95": 305.6960105895996, + "p99": 314.36799466609955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.64799284934998, + "p90": 203.45599949359894, + "p95": 204.79999482631683, + "p99": 208.76799523830414 + }, + "combine": { + "p50": 228.5120040178299, + "p90": 234.23999547958374, + "p95": 235.167995095253, + "p99": 236.95999383926392 + }, + "roundtrip": { + "p50": 403.80799770355225, + "p90": 408.35198760032654, + "p95": 410.0799858570099, + "p99": 413.88800740242004 + }, + "isolatedSum": { + "p50": 428.15999686717987, + "p90": 437.6959949731827, + "p95": 439.9679899215698, + "p99": 445.72798907756805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 305.4080009460449, + "p90": 310.016006231308, + "p95": 311.7760121822357, + "p99": 316.76799058914185 + }, + "combine": { + "p50": 367.19998717308044, + "p90": 374.0159869194031, + "p95": 375.5199909210205, + "p99": 379.2960047721863 + }, + "roundtrip": { + "p50": 649.1199731826782, + "p90": 655.6479930877686, + "p95": 658.4640145301819, + "p99": 661.9840264320374 + }, + "isolatedSum": { + "p50": 672.6079881191254, + "p90": 684.0319931507111, + "p95": 687.2960031032562, + "p99": 696.0639953613281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 528.8640260696411, + "p90": 539.3919944763184, + "p95": 543.8079833984375, + "p99": 805.9520125389099 + }, + "combine": { + "p50": 633.184015750885, + "p90": 640.9919857978821, + "p95": 643.9039707183838, + "p99": 648.5440135002136 + }, + "roundtrip": { + "p50": 1132.032036781311, + "p90": 1143.8720226287842, + "p95": 1147.3920345306396, + "p99": 1154.8160314559937 + }, + "isolatedSum": { + "p50": 1162.0480418205261, + "p90": 1180.3839802742004, + "p95": 1187.7119541168213, + "p99": 1454.4960260391235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 985.8880043029785, + "p90": 1005.5680274963379, + "p95": 1010.9119415283203, + "p99": 1020.5440521240234 + }, + "combine": { + "p50": 1144.1919803619385, + "p90": 1153.92005443573, + "p95": 1157.439947128296, + "p99": 1163.6799573898315 + }, + "roundtrip": { + "p50": 2094.464063644409, + "p90": 2109.8880767822266, + "p95": 2115.295886993408, + "p99": 2124.5760917663574 + }, + "isolatedSum": { + "p50": 2130.079984664917, + "p90": 2159.488081932068, + "p95": 2168.351888656616, + "p99": 2184.224009513855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f8095d72", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_fb5b86de", + "comparisonKey": "cd6da73322e03923", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:17.404659+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271927356", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271927356", + "createdAt": "2026-06-26T23:58:11Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.02399635314941, + "p90": 118.40000003576279, + "p95": 120.12799829244614, + "p99": 126.0479986667633 + }, + "combine": { + "p50": 105.66399991512299, + "p90": 108.89600217342377, + "p95": 112.06399649381638, + "p99": 115.9679964184761 + }, + "roundtrip": { + "p50": 195.8719938993454, + "p90": 201.24800503253937, + "p95": 202.62399315834045, + "p99": 207.39200711250305 + }, + "isolatedSum": { + "p50": 218.6879962682724, + "p90": 227.29600220918655, + "p95": 232.1919947862625, + "p99": 242.0159950852394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.97599411010742, + "p90": 150.91200172901154, + "p95": 151.7760008573532, + "p99": 155.39200603961945 + }, + "combine": { + "p50": 148.3519971370697, + "p90": 153.82400155067444, + "p95": 154.4959992170334, + "p99": 156.67200088500977 + }, + "roundtrip": { + "p50": 265.9200131893158, + "p90": 270.9760069847107, + "p95": 273.1199860572815, + "p99": 278.4000039100647 + }, + "isolatedSum": { + "p50": 295.3279912471771, + "p90": 304.736003279686, + "p95": 306.2720000743866, + "p99": 312.0640069246292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.32000541687012, + "p90": 203.3279985189438, + "p95": 204.57600057125092, + "p99": 208.28799903392792 + }, + "combine": { + "p50": 229.8559993505478, + "p90": 235.4239970445633, + "p95": 236.4480048418045, + "p99": 237.98400163650513 + }, + "roundtrip": { + "p50": 402.46400237083435, + "p90": 407.9360067844391, + "p95": 410.0480079650879, + "p99": 413.1839871406555 + }, + "isolatedSum": { + "p50": 430.1760047674179, + "p90": 438.7519955635071, + "p95": 441.0240054130554, + "p99": 446.27200067043304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.51999402046204, + "p90": 308.6720108985901, + "p95": 310.2720081806183, + "p99": 315.8400058746338 + }, + "combine": { + "p50": 366.3040101528168, + "p90": 374.33600425720215, + "p95": 375.99998712539673, + "p99": 380.0320029258728 + }, + "roundtrip": { + "p50": 643.9679861068726, + "p90": 650.9119868278503, + "p95": 653.4720063209534, + "p99": 656.9280028343201 + }, + "isolatedSum": { + "p50": 669.8240041732788, + "p90": 683.0080151557922, + "p95": 686.271995306015, + "p99": 695.8720088005066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.6559720039368, + "p90": 533.8879823684692, + "p95": 536.0000133514404, + "p99": 542.4000024795532 + }, + "combine": { + "p50": 628.607988357544, + "p90": 636.5759968757629, + "p95": 639.3600106239319, + "p99": 643.455982208252 + }, + "roundtrip": { + "p50": 1128.5760402679443, + "p90": 1137.984037399292, + "p95": 1141.5679454803467, + "p99": 1146.1759805679321 + }, + "isolatedSum": { + "p50": 1155.2639603614807, + "p90": 1170.4639792442322, + "p95": 1175.3600239753723, + "p99": 1185.8559846878052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1018.4320211410522, + "p90": 1046.496033668518, + "p95": 1056.1920404434204, + "p99": 1073.5039710998535 + }, + "combine": { + "p50": 1148.5120058059692, + "p90": 1156.3199758529663, + "p95": 1158.784031867981, + "p99": 1164.031982421875 + }, + "roundtrip": { + "p50": 2113.408088684082, + "p90": 2138.5281085968018, + "p95": 2143.807888031006, + "p99": 2155.679941177368 + }, + "isolatedSum": { + "p50": 2166.9440269470215, + "p90": 2202.8160095214844, + "p95": 2214.9760723114014, + "p99": 2237.5359535217285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ff5c49bb", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", + "colorKey": "h100_aa268d13", + "comparisonKey": "927a6d7282665742", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:17.079494+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4caecd33bedf786", + "workloadId": "set:3:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271806404", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271806404", + "createdAt": "2026-06-26T23:54:11Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.37600100040436, + "p90": 131.9040060043335, + "p95": 132.7359974384308, + "p99": 137.08800077438354 + }, + "combine": { + "p50": 113.0559965968132, + "p90": 114.04799669981003, + "p95": 114.56000059843063, + "p99": 120.67200243473053 + }, + "roundtrip": { + "p50": 216.2880003452301, + "p90": 219.67999637126923, + "p95": 221.15199267864227, + "p99": 226.17599368095398 + }, + "isolatedSum": { + "p50": 238.43199759721756, + "p90": 245.95200270414352, + "p95": 247.29599803686142, + "p99": 257.7600032091141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.42400515079498, + "p90": 255.5519938468933, + "p95": 258.14399123191833, + "p99": 261.9200050830841 + }, + "combine": { + "p50": 267.07199215888977, + "p90": 276.63999795913696, + "p95": 277.536004781723, + "p99": 279.90400791168213 + }, + "roundtrip": { + "p50": 476.22400522232056, + "p90": 492.3520088195801, + "p95": 495.03999948501587, + "p99": 499.55201148986816 + }, + "isolatedSum": { + "p50": 506.49599730968475, + "p90": 532.1919918060303, + "p95": 535.6799960136414, + "p99": 541.8240129947662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 677.183985710144, + "p90": 691.3599967956543, + "p95": 694.8800086975098, + "p99": 701.2479901313782 + }, + "combine": { + "p50": 816.2879943847656, + "p90": 828.607976436615, + "p95": 832.5759768486023, + "p99": 837.8239870071411 + }, + "roundtrip": { + "p50": 1460.4159593582153, + "p90": 1474.176049232483, + "p95": 1478.4640073776245, + "p99": 1485.8880043029785 + }, + "isolatedSum": { + "p50": 1493.4719800949097, + "p90": 1519.9679732322693, + "p95": 1527.455985546112, + "p99": 1539.0719771385193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5264491", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_aa268d13", + "comparisonKey": "927a6d7282665742", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:04.176924+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271951888", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271951888", + "createdAt": "2026-06-26T23:59:00Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.70400333404541, + "p90": 128.86400520801544, + "p95": 131.071999669075, + "p99": 132.9600065946579 + }, + "combine": { + "p50": 112.5119999051094, + "p90": 114.01599645614624, + "p95": 114.3679991364479, + "p99": 116.5120005607605 + }, + "roundtrip": { + "p50": 216.22399985790253, + "p90": 219.90400552749634, + "p95": 221.02400660514832, + "p99": 223.90399873256683 + }, + "isolatedSum": { + "p50": 237.21600323915482, + "p90": 242.88000166416168, + "p95": 245.43999880552292, + "p99": 249.4720071554184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.57599401474, + "p90": 167.93599724769592, + "p95": 169.5680022239685, + "p99": 229.15199398994446 + }, + "combine": { + "p50": 162.6559942960739, + "p90": 168.64000260829926, + "p95": 169.98399794101715, + "p99": 171.29600048065186 + }, + "roundtrip": { + "p50": 299.80799555778503, + "p90": 305.11999130249023, + "p95": 306.71998858451843, + "p99": 308.9919984340668 + }, + "isolatedSum": { + "p50": 327.2319883108139, + "p90": 336.5759998559952, + "p95": 339.55200016498566, + "p99": 400.4479944705963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.92000114917755, + "p90": 242.3039972782135, + "p95": 244.4159984588623, + "p99": 250.14400482177734 + }, + "combine": { + "p50": 260.9280049800873, + "p90": 265.6640112400055, + "p95": 267.67998933792114, + "p99": 272.7360129356384 + }, + "roundtrip": { + "p50": 471.77600860595703, + "p90": 476.8959879875183, + "p95": 479.2639911174774, + "p99": 495.2000081539154 + }, + "isolatedSum": { + "p50": 498.84800612926483, + "p90": 507.968008518219, + "p95": 512.0959877967834, + "p99": 522.8800177574158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.85598635673523, + "p90": 384.38400626182556, + "p95": 385.8239948749542, + "p99": 390.6880021095276 + }, + "combine": { + "p50": 442.1760141849518, + "p90": 447.80799746513367, + "p95": 449.3120014667511, + "p99": 452.86399126052856 + }, + "roundtrip": { + "p50": 795.6799864768982, + "p90": 803.167998790741, + "p95": 806.3039779663086, + "p99": 813.0559921264648 + }, + "isolatedSum": { + "p50": 820.032000541687, + "p90": 832.1920037269592, + "p95": 835.1359963417053, + "p99": 843.5519933700562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 675.3919720649719, + "p90": 690.4320120811462, + "p95": 694.1120028495789, + "p99": 700.8320093154907 + }, + "combine": { + "p50": 806.1439990997314, + "p90": 816.5119886398315, + "p95": 818.5279965400696, + "p99": 824.5440125465393 + }, + "roundtrip": { + "p50": 1447.1999406814575, + "p90": 1458.143949508667, + "p95": 1462.5600576400757, + "p99": 1468.991994857788 + }, + "isolatedSum": { + "p50": 1481.5359711647034, + "p90": 1506.9440007209778, + "p95": 1512.6399993896484, + "p99": 1525.37602186203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1273.7280130386353, + "p90": 1286.1759662628174, + "p95": 1290.2400493621826, + "p99": 1300.3519773483276 + }, + "combine": { + "p50": 1515.6480073928833, + "p90": 1529.1199684143066, + "p95": 1554.6239614486694, + "p99": 1575.2639770507812 + }, + "roundtrip": { + "p50": 2763.0081176757812, + "p90": 2772.9599475860596, + "p95": 2776.3519287109375, + "p99": 2782.464027404785 + }, + "isolatedSum": { + "p50": 2789.3760204315186, + "p90": 2815.295934677124, + "p95": 2844.864010810852, + "p99": 2875.615954399109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f680673f", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", + "colorKey": "h100_002beb29", + "comparisonKey": "3715210183d38757", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:20.108988+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3dd868cb33839a3", + "workloadId": "set:3:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271813470", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271813470", + "createdAt": "2026-06-26T23:54:25Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.63200163841248, + "p90": 122.43200093507767, + "p95": 123.55200201272964, + "p99": 126.5919953584671 + }, + "combine": { + "p50": 106.62399977445602, + "p90": 112.31999844312668, + "p95": 113.27999830245972, + "p99": 115.9679964184761 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 211.84000372886658, + "p95": 213.18399906158447, + "p99": 216.35200083255768 + }, + "isolatedSum": { + "p50": 224.2560014128685, + "p90": 234.75199937820435, + "p95": 236.83200031518936, + "p99": 242.5599917769432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 236.35199666023254, + "p90": 249.82400238513947, + "p95": 253.88801097869873, + "p99": 257.02399015426636 + }, + "combine": { + "p50": 251.583993434906, + "p90": 259.7759962081909, + "p95": 260.47998666763306, + "p99": 262.2080147266388 + }, + "roundtrip": { + "p50": 459.29598808288574, + "p90": 472.1919894218445, + "p95": 474.88000988960266, + "p99": 478.5279929637909 + }, + "isolatedSum": { + "p50": 487.93599009513855, + "p90": 509.5999985933304, + "p95": 514.3679976463318, + "p99": 519.2320048809052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 659.3279838562012, + "p90": 669.0239906311035, + "p95": 672.0960140228271, + "p99": 678.4319877624512 + }, + "combine": { + "p50": 783.456027507782, + "p90": 794.6239709854126, + "p95": 799.0720272064209, + "p99": 807.6800107955933 + }, + "roundtrip": { + "p50": 1412.6399755477905, + "p90": 1421.8239784240723, + "p95": 1426.0480403900146, + "p99": 1434.0159893035889 + }, + "isolatedSum": { + "p50": 1442.7840113639832, + "p90": 1463.647961616516, + "p95": 1471.168041229248, + "p99": 1486.1119985580444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-329395ff", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_002beb29", + "comparisonKey": "3715210183d38757", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:29.454209+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271996602", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271996602", + "createdAt": "2026-06-27T00:00:22Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.87200313806534, + "p90": 122.14399874210358, + "p95": 123.10399860143661, + "p99": 127.16799974441528 + }, + "combine": { + "p50": 106.72000050544739, + "p90": 111.7120012640953, + "p95": 112.57600039243698, + "p99": 114.46399986743927 + }, + "roundtrip": { + "p50": 207.07200467586517, + "p90": 210.91200411319733, + "p95": 212.54399418830872, + "p99": 243.52000653743744 + }, + "isolatedSum": { + "p50": 222.59200364351273, + "p90": 233.85600000619888, + "p95": 235.6799989938736, + "p99": 241.63199961185455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.29600596427917, + "p90": 166.62399470806122, + "p95": 167.4560010433197, + "p99": 169.21600699424744 + }, + "combine": { + "p50": 154.65599298477173, + "p90": 163.10399770736694, + "p95": 163.7759953737259, + "p99": 165.0560051202774 + }, + "roundtrip": { + "p50": 289.44000601768494, + "p90": 301.66399478912354, + "p95": 303.5840094089508, + "p99": 308.03200602531433 + }, + "isolatedSum": { + "p50": 313.9519989490509, + "p90": 329.72799241542816, + "p95": 331.2319964170456, + "p99": 334.27201211452484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.1680018901825, + "p90": 236.4799976348877, + "p95": 237.40799725055695, + "p99": 240.7039999961853 + }, + "combine": { + "p50": 252.73600220680237, + "p90": 260.8639895915985, + "p95": 261.8879973888397, + "p99": 263.64800333976746 + }, + "roundtrip": { + "p50": 461.34400367736816, + "p90": 475.39201378822327, + "p95": 476.639986038208, + "p99": 479.45600748062134 + }, + "isolatedSum": { + "p50": 483.90400409698486, + "p90": 497.3439872264862, + "p95": 499.29599463939667, + "p99": 504.35200333595276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.2400109767914, + "p90": 379.040002822876, + "p95": 381.98399543762207, + "p99": 387.4559998512268 + }, + "combine": { + "p50": 431.2640130519867, + "p90": 439.8399889469147, + "p95": 443.07199120521545, + "p99": 446.78398966789246 + }, + "roundtrip": { + "p50": 779.2320251464844, + "p90": 791.3600206375122, + "p95": 794.0160036087036, + "p99": 801.0240197181702 + }, + "isolatedSum": { + "p50": 805.5040240287781, + "p90": 818.8799917697906, + "p95": 825.0559866428375, + "p99": 834.2399895191193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 661.1520051956177, + "p90": 676.1919856071472, + "p95": 679.6479821205139, + "p99": 685.9520077705383 + }, + "combine": { + "p50": 789.9519801139832, + "p90": 800.0959753990173, + "p95": 803.1359910964966, + "p99": 808.7360262870789 + }, + "roundtrip": { + "p50": 1422.271966934204, + "p90": 1435.1680278778076, + "p95": 1439.1039609909058, + "p99": 1454.367995262146 + }, + "isolatedSum": { + "p50": 1451.1039853096008, + "p90": 1476.2879610061646, + "p95": 1482.7839732170105, + "p99": 1494.6880340576172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1241.8559789657593, + "p90": 1251.871943473816, + "p95": 1256.4799785614014, + "p99": 1264.0639543533325 + }, + "combine": { + "p50": 1471.4560508728027, + "p90": 1480.1599979400635, + "p95": 1482.6240539550781, + "p99": 1489.8879528045654 + }, + "roundtrip": { + "p50": 2687.9680156707764, + "p90": 2698.848009109497, + "p95": 2703.104019165039, + "p99": 2708.928108215332 + }, + "isolatedSum": { + "p50": 2713.312029838562, + "p90": 2732.0319414138794, + "p95": 2739.1040325164795, + "p99": 2753.951907157898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c90a67e2", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_c44978e5", + "comparisonKey": "6c5c69e3474ec552", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:29.771027+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272000459", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272000459", + "createdAt": "2026-06-27T00:00:28Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.75200140476227, + "p90": 114.97599631547928, + "p95": 116.95999652147293, + "p99": 122.01599776744843 + }, + "combine": { + "p50": 105.92000186443329, + "p90": 109.56799983978271, + "p95": 111.23199760913849, + "p99": 114.14399743080139 + }, + "roundtrip": { + "p50": 193.1840032339096, + "p90": 198.7520009279251, + "p95": 200.19200444221497, + "p99": 204.44799959659576 + }, + "isolatedSum": { + "p50": 216.67200326919556, + "p90": 224.543996155262, + "p95": 228.19199413061142, + "p99": 236.15999519824982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.81600034236908, + "p90": 151.48800611495972, + "p95": 152.44799852371216, + "p99": 156.80000185966492 + }, + "combine": { + "p50": 150.62400698661804, + "p90": 154.7520011663437, + "p95": 155.39200603961945, + "p99": 161.31199896335602 + }, + "roundtrip": { + "p50": 266.59199595451355, + "p90": 270.4640030860901, + "p95": 271.64798974990845, + "p99": 274.84801411628723 + }, + "isolatedSum": { + "p50": 297.4400073289871, + "p90": 306.2400072813034, + "p95": 307.8400045633316, + "p99": 318.11200082302094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.05600357055664, + "p90": 204.70400154590607, + "p95": 205.63200116157532, + "p99": 209.1200053691864 + }, + "combine": { + "p50": 227.64800488948822, + "p90": 231.99999332427979, + "p95": 234.17599499225616, + "p99": 235.83999276161194 + }, + "roundtrip": { + "p50": 403.55199575424194, + "p90": 408.160001039505, + "p95": 409.15200114250183, + "p99": 411.77600622177124 + }, + "isolatedSum": { + "p50": 428.70400846004486, + "p90": 436.70399487018585, + "p95": 439.8079961538315, + "p99": 444.95999813079834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 309.1840147972107, + "p90": 313.2160007953644, + "p95": 314.62401151657104, + "p99": 317.79199838638306 + }, + "combine": { + "p50": 368.5440123081207, + "p90": 374.9440014362335, + "p95": 376.22401118278503, + "p99": 380.7680010795593 + }, + "roundtrip": { + "p50": 652.2560119628906, + "p90": 658.9760184288025, + "p95": 661.3759994506836, + "p99": 665.2479767799377 + }, + "isolatedSum": { + "p50": 677.7280271053314, + "p90": 688.1600022315979, + "p95": 690.8480226993561, + "p99": 698.5599994659424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.6079726219177, + "p90": 546.5599894523621, + "p95": 550.495982170105, + "p99": 557.7600002288818 + }, + "combine": { + "p50": 642.5279974937439, + "p90": 649.9519944190979, + "p95": 652.2560119628906, + "p99": 658.8159799575806 + }, + "roundtrip": { + "p50": 1146.399974822998, + "p90": 1156.9600105285645, + "p95": 1160.9920263290405, + "p99": 1168.511986732483 + }, + "isolatedSum": { + "p50": 1175.1359701156616, + "p90": 1196.51198387146, + "p95": 1202.7519941329956, + "p99": 1216.5759801864624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1024.351954460144, + "p90": 1048.5440492630005, + "p95": 1056.9599866867065, + "p99": 1069.3119764328003 + }, + "combine": { + "p50": 1185.9840154647827, + "p90": 1194.1759586334229, + "p95": 1196.5759992599487, + "p99": 1201.5680074691772 + }, + "roundtrip": { + "p50": 2167.520046234131, + "p90": 2183.3600997924805, + "p95": 2188.8959407806396, + "p99": 2197.727918624878 + }, + "isolatedSum": { + "p50": 2210.3359699249268, + "p90": 2242.7200078964233, + "p95": 2253.5359859466553, + "p99": 2270.8799839019775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fe520015", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_9aa30544", + "comparisonKey": "212a6f0661f5d2d6", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:29.937355+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": "set:6:a224603e5a1640b8", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271965088", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271965088", + "createdAt": "2026-06-26T23:59:26Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.71200323104858, + "p90": 127.6479959487915, + "p95": 131.20000064373016, + "p99": 133.7279975414276 + }, + "combine": { + "p50": 113.76000195741653, + "p90": 115.13599753379822, + "p95": 119.48800086975098, + "p99": 121.56800180673599 + }, + "roundtrip": { + "p50": 214.65599536895752, + "p90": 219.29599344730377, + "p95": 220.12799978256226, + "p99": 223.61600399017334 + }, + "isolatedSum": { + "p50": 237.47200518846512, + "p90": 242.78399348258972, + "p95": 250.68800151348114, + "p99": 255.2959993481636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 160.19199788570404, + "p90": 166.4000004529953, + "p95": 167.61599481105804, + "p99": 170.43200135231018 + }, + "combine": { + "p50": 169.37600076198578, + "p90": 172.5119948387146, + "p95": 173.40800166130066, + "p99": 177.50400304794312 + }, + "roundtrip": { + "p50": 299.5840013027191, + "p90": 303.42400074005127, + "p95": 305.1519989967346, + "p99": 310.8479976654053 + }, + "isolatedSum": { + "p50": 329.5679986476898, + "p90": 338.9119952917099, + "p95": 341.0239964723587, + "p99": 347.9360044002533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 233.18399488925934, + "p90": 239.26399648189545, + "p95": 240.28800427913666, + "p99": 242.94400215148926 + }, + "combine": { + "p50": 263.5839879512787, + "p90": 268.70399713516235, + "p95": 270.27198672294617, + "p99": 274.1760015487671 + }, + "roundtrip": { + "p50": 471.71199321746826, + "p90": 476.639986038208, + "p95": 478.5600006580353, + "p99": 481.3440144062042 + }, + "isolatedSum": { + "p50": 496.767982840538, + "p90": 507.9679936170578, + "p95": 510.5599910020828, + "p99": 517.1200037002563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.27999687194824, + "p90": 383.35999846458435, + "p95": 385.18399000167847, + "p99": 387.84000277519226 + }, + "combine": { + "p50": 446.30399346351624, + "p90": 453.44001054763794, + "p95": 455.52000403404236, + "p99": 460.89598536491394 + }, + "roundtrip": { + "p50": 797.0240116119385, + "p90": 804.4800162315369, + "p95": 807.1039915084839, + "p99": 811.6480112075806 + }, + "isolatedSum": { + "p50": 823.5839903354645, + "p90": 836.8000090122223, + "p95": 840.7039940357208, + "p99": 848.7359881401062 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 676.4479875564575, + "p90": 686.8799924850464, + "p95": 690.5279755592346, + "p99": 791.9679880142212 + }, + "combine": { + "p50": 796.3520288467407, + "p90": 808.4160089492798, + "p95": 811.3920092582703, + "p99": 820.5440044403076 + }, + "roundtrip": { + "p50": 1445.5360174179077, + "p90": 1457.311987876892, + "p95": 1460.6399536132812, + "p99": 1468.2879447937012 + }, + "isolatedSum": { + "p50": 1472.8000164031982, + "p90": 1495.2960014343262, + "p95": 1501.9199848175049, + "p99": 1612.5119924545288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1284.8639488220215, + "p90": 1296.3199615478516, + "p95": 1299.7759580612183, + "p99": 1306.5279722213745 + }, + "combine": { + "p50": 1503.5840272903442, + "p90": 1517.2799825668335, + "p95": 1524.2880582809448, + "p99": 1540.0960445404053 + }, + "roundtrip": { + "p50": 2760.960102081299, + "p90": 2775.10404586792, + "p95": 2783.936023712158, + "p99": 2810.0481033325195 + }, + "isolatedSum": { + "p50": 2788.4479761123657, + "p90": 2813.599944114685, + "p95": 2824.064016342163, + "p99": 2846.62401676178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2b98c773", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_e8b903ea", + "comparisonKey": "5961b4bc09451ca4", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:35.470349+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": "set:6:a224603e5a1640b8", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271968791", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271968791", + "createdAt": "2026-06-26T23:59:34Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.41599917411804, + "p90": 117.18399822711945, + "p95": 118.9119964838028, + "p99": 122.91199713945389 + }, + "combine": { + "p50": 106.33599758148193, + "p90": 112.12799698114395, + "p95": 113.0559965968132, + "p99": 114.43199962377548 + }, + "roundtrip": { + "p50": 198.81600141525269, + "p90": 204.03200387954712, + "p95": 205.4080069065094, + "p99": 207.58399367332458 + }, + "isolatedSum": { + "p50": 218.75199675559998, + "p90": 229.3119952082634, + "p95": 231.967993080616, + "p99": 237.34399676322937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 148.44800531864166, + "p90": 151.99999511241913, + "p95": 153.3759981393814, + "p99": 156.3519984483719 + }, + "combine": { + "p50": 149.47199821472168, + "p90": 155.39200603961945, + "p95": 159.39199924468994, + "p99": 164.06400501728058 + }, + "roundtrip": { + "p50": 267.4880027770996, + "p90": 272.2879946231842, + "p95": 274.04800057411194, + "p99": 279.4879972934723 + }, + "isolatedSum": { + "p50": 297.92000353336334, + "p90": 307.3920011520386, + "p95": 312.76799738407135, + "p99": 320.41600346565247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.12000405788422, + "p90": 204.48000729084015, + "p95": 206.04799687862396, + "p99": 212.22400665283203 + }, + "combine": { + "p50": 229.0239930152893, + "p90": 233.95200073719025, + "p95": 236.4480048418045, + "p99": 238.52799832820892 + }, + "roundtrip": { + "p50": 404.06399965286255, + "p90": 408.86399149894714, + "p95": 411.0719859600067, + "p99": 431.5840005874634 + }, + "isolatedSum": { + "p50": 430.1439970731735, + "p90": 438.4320080280304, + "p95": 442.49600172042847, + "p99": 450.75200498104095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.0960133075714, + "p90": 309.28000807762146, + "p95": 311.64801120758057, + "p99": 479.5520007610321 + }, + "combine": { + "p50": 366.11199378967285, + "p90": 372.8959858417511, + "p95": 374.55999851226807, + "p99": 383.4559917449951 + }, + "roundtrip": { + "p50": 644.0640091896057, + "p90": 650.1439809799194, + "p95": 652.1919965744019, + "p99": 656.5120220184326 + }, + "isolatedSum": { + "p50": 670.2080070972443, + "p90": 682.1759939193726, + "p95": 686.2080097198486, + "p99": 863.0079925060272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 521.5039849281311, + "p90": 530.1120281219482, + "p95": 533.3759784698486, + "p99": 540.5120253562927 + }, + "combine": { + "p50": 632.1920156478882, + "p90": 639.3280029296875, + "p95": 640.9599781036377, + "p99": 647.2960114479065 + }, + "roundtrip": { + "p50": 1123.9999532699585, + "p90": 1132.8959465026855, + "p95": 1135.807991027832, + "p99": 1143.5840129852295 + }, + "isolatedSum": { + "p50": 1153.6960005760193, + "p90": 1169.4400310516357, + "p95": 1174.3359565734863, + "p99": 1187.8080368041992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 990.2399778366089, + "p90": 1009.4720125198364, + "p95": 1016.1279439926147, + "p99": 1026.8160104751587 + }, + "combine": { + "p50": 1164.736032485962, + "p90": 1174.015998840332, + "p95": 1177.2799491882324, + "p99": 1183.9359998703003 + }, + "roundtrip": { + "p50": 2116.895914077759, + "p90": 2137.7599239349365, + "p95": 2143.712043762207, + "p99": 2157.8240394592285 + }, + "isolatedSum": { + "p50": 2154.976010322571, + "p90": 2183.4880113601685, + "p95": 2193.407893180847, + "p99": 2210.752010345459 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a66c8a3", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_552a4b73", + "comparisonKey": "44cbfb11e1668dc5", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:00.044863+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:6709a02c31933a9f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271978834", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271978834", + "createdAt": "2026-06-26T23:59:54Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.7360035777092, + "p90": 130.68799674510956, + "p95": 132.03200697898865, + "p99": 136.4479959011078 + }, + "combine": { + "p50": 112.5119999051094, + "p90": 114.17599767446518, + "p95": 115.07199704647064, + "p99": 120.67200243473053 + }, + "roundtrip": { + "p50": 215.16799926757812, + "p90": 219.35999393463135, + "p95": 221.11999988555908, + "p99": 229.18400168418884 + }, + "isolatedSum": { + "p50": 237.2480034828186, + "p90": 244.86399441957474, + "p95": 247.1040040254593, + "p99": 257.1199983358383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.7440025806427, + "p90": 167.26399958133698, + "p95": 168.44800114631653, + "p99": 174.6560037136078 + }, + "combine": { + "p50": 164.51199352741241, + "p90": 169.50400173664093, + "p95": 170.1440066099167, + "p99": 174.14399981498718 + }, + "roundtrip": { + "p50": 297.91998863220215, + "p90": 302.72001028060913, + "p95": 304.32000756263733, + "p99": 306.5600097179413 + }, + "isolatedSum": { + "p50": 328.2559961080551, + "p90": 336.7680013179779, + "p95": 338.5920077562332, + "p99": 348.80000352859497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.44000494480133, + "p90": 241.82400107383728, + "p95": 243.0720031261444, + "p99": 247.74399399757385 + }, + "combine": { + "p50": 264.51200246810913, + "p90": 268.41598749160767, + "p95": 271.5519964694977, + "p99": 281.6320061683655 + }, + "roundtrip": { + "p50": 475.5840003490448, + "p90": 482.59198665618896, + "p95": 490.30399322509766, + "p99": 504.96000051498413 + }, + "isolatedSum": { + "p50": 501.95200741291046, + "p90": 510.23998856544495, + "p95": 514.6239995956421, + "p99": 529.3760001659393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.040002822876, + "p90": 385.72800159454346, + "p95": 388.2240056991577, + "p99": 414.3359959125519 + }, + "combine": { + "p50": 447.00801372528076, + "p90": 452.4799883365631, + "p95": 453.5039961338043, + "p99": 456.89600706100464 + }, + "roundtrip": { + "p50": 800.2240061759949, + "p90": 805.791974067688, + "p95": 807.744026184082, + "p99": 811.680018901825 + }, + "isolatedSum": { + "p50": 826.0480165481567, + "p90": 838.2079899311066, + "p95": 841.728001832962, + "p99": 871.2320029735565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 675.3919720649719, + "p90": 695.6800222396851, + "p95": 707.8400254249573, + "p99": 910.8160138130188 + }, + "combine": { + "p50": 819.2319869995117, + "p90": 829.6639919281006, + "p95": 833.2160115242004, + "p99": 841.3439989089966 + }, + "roundtrip": { + "p50": 1459.9679708480835, + "p90": 1476.9599437713623, + "p95": 1481.8559885025024, + "p99": 1501.2799501419067 + }, + "isolatedSum": { + "p50": 1494.6239590644836, + "p90": 1525.3440141677856, + "p95": 1541.0560369491577, + "p99": 1752.1600127220154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1275.10404586792, + "p90": 1287.5200510025024, + "p95": 1291.8399572372437, + "p99": 1346.0479974746704 + }, + "combine": { + "p50": 1538.7200117111206, + "p90": 1550.3679513931274, + "p95": 1555.232048034668, + "p99": 1607.9360246658325 + }, + "roundtrip": { + "p50": 2787.168025970459, + "p90": 2798.784017562866, + "p95": 2802.9439449310303, + "p99": 2818.4640407562256 + }, + "isolatedSum": { + "p50": 2813.8240575790405, + "p90": 2837.88800239563, + "p95": 2847.0720052719116, + "p99": 2953.984022140503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7114a01f", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_106a51ab", + "comparisonKey": "80b7db884aaf5a8c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:17.822701+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:6709a02c31933a9f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271982260", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271982260", + "createdAt": "2026-06-27T00:00:01Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.30399864912033, + "p90": 120.31999975442886, + "p95": 121.56800180673599, + "p99": 125.02400577068329 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 111.48799955844879, + "p95": 111.77600175142288, + "p99": 114.1119971871376 + }, + "roundtrip": { + "p50": 198.0160027742386, + "p90": 201.82399451732635, + "p95": 203.36000621318817, + "p99": 207.35999941825867 + }, + "isolatedSum": { + "p50": 220.5759957432747, + "p90": 231.80799931287766, + "p95": 233.34400355815887, + "p99": 239.1360029578209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.9279934167862, + "p90": 149.85600113868713, + "p95": 151.45599842071533, + "p99": 155.87200224399567 + }, + "combine": { + "p50": 151.19999647140503, + "p90": 154.84799444675446, + "p95": 156.63999319076538, + "p99": 160.73599457740784 + }, + "roundtrip": { + "p50": 266.11199975013733, + "p90": 271.5519964694977, + "p95": 273.6000120639801, + "p99": 277.1199941635132 + }, + "isolatedSum": { + "p50": 296.1279898881912, + "p90": 304.7039955854416, + "p95": 308.0959916114807, + "p99": 316.6079968214035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.32000541687012, + "p90": 204.12799715995789, + "p95": 205.4399996995926, + "p99": 208.38400721549988 + }, + "combine": { + "p50": 227.58400440216064, + "p90": 233.75999927520752, + "p95": 234.55999791622162, + "p99": 238.3359968662262 + }, + "roundtrip": { + "p50": 402.0479917526245, + "p90": 407.1039855480194, + "p95": 408.735990524292, + "p99": 412.06398606300354 + }, + "isolatedSum": { + "p50": 427.90400981903076, + "p90": 437.8879964351654, + "p95": 439.9999976158142, + "p99": 446.7200040817261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.16799879074097, + "p90": 307.3920011520386, + "p95": 308.76800417900085, + "p99": 313.27998638153076 + }, + "combine": { + "p50": 362.2399866580963, + "p90": 368.76800656318665, + "p95": 370.3039884567261, + "p99": 372.70399928092957 + }, + "roundtrip": { + "p50": 641.1839723587036, + "p90": 647.9359865188599, + "p95": 650.7520079612732, + "p99": 656.6399931907654 + }, + "isolatedSum": { + "p50": 665.4079854488373, + "p90": 676.1600077152252, + "p95": 679.0719926357269, + "p99": 685.9839856624603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 520.9919810295105, + "p90": 531.4239859580994, + "p95": 534.4640016555786, + "p99": 541.1840081214905 + }, + "combine": { + "p50": 639.3600106239319, + "p90": 650.592029094696, + "p95": 654.5600295066833, + "p99": 660.4800224304199 + }, + "roundtrip": { + "p50": 1128.864049911499, + "p90": 1138.2720470428467, + "p95": 1141.2479877471924, + "p99": 1146.3040113449097 + }, + "isolatedSum": { + "p50": 1160.3519916534424, + "p90": 1182.0160150527954, + "p95": 1189.024031162262, + "p99": 1201.6640305519104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1005.5999755859375, + "p90": 1031.7120552062988, + "p95": 1038.3360385894775, + "p99": 1051.103949546814 + }, + "combine": { + "p50": 1158.9759588241577, + "p90": 1167.8719520568848, + "p95": 1169.9199676513672, + "p99": 1174.6560335159302 + }, + "roundtrip": { + "p50": 2121.5360164642334, + "p90": 2138.2720470428467, + "p95": 2142.6239013671875, + "p99": 2150.0160694122314 + }, + "isolatedSum": { + "p50": 2164.575934410095, + "p90": 2199.5840072631836, + "p95": 2208.2560062408447, + "p99": 2225.759983062744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71b6107f", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_769b9c4b", + "comparisonKey": "24fc2cc385891299", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:08.090138+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271955196", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271955196", + "createdAt": "2026-06-26T23:59:06Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.07199639081955, + "p90": 115.93600362539291, + "p95": 118.14399808645248, + "p99": 121.08799815177917 + }, + "combine": { + "p50": 106.08000308275223, + "p90": 111.26399785280228, + "p95": 112.38399893045425, + "p99": 114.14399743080139 + }, + "roundtrip": { + "p50": 195.68000733852386, + "p90": 201.1840045452118, + "p95": 202.39999890327454, + "p99": 204.96000349521637 + }, + "isolatedSum": { + "p50": 217.15199947357178, + "p90": 227.2000014781952, + "p95": 230.52799701690674, + "p99": 235.23199558258057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.48000490665436, + "p90": 148.0640023946762, + "p95": 149.6960073709488, + "p99": 153.60000729560852 + }, + "combine": { + "p50": 148.92800152301788, + "p90": 154.33600544929504, + "p95": 155.008003115654, + "p99": 157.8879952430725 + }, + "roundtrip": { + "p50": 262.81601190567017, + "p90": 266.975998878479, + "p95": 268.3199942111969, + "p99": 272.44800329208374 + }, + "isolatedSum": { + "p50": 293.40800642967224, + "p90": 302.40000784397125, + "p95": 304.7040104866028, + "p99": 311.48800253868103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.68000054359436, + "p90": 203.42400670051575, + "p95": 205.47200739383698, + "p99": 222.52799570560455 + }, + "combine": { + "p50": 227.80799865722656, + "p90": 232.9919934272766, + "p95": 234.3679964542389, + "p99": 237.34399676322937 + }, + "roundtrip": { + "p50": 399.83999729156494, + "p90": 405.023992061615, + "p95": 406.3040018081665, + "p99": 414.43198919296265 + }, + "isolatedSum": { + "p50": 427.4879992008209, + "p90": 436.41600012779236, + "p95": 439.84000384807587, + "p99": 459.8719924688339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.5840094089508, + "p90": 309.471994638443, + "p95": 310.4960024356842, + "p99": 313.82399797439575 + }, + "combine": { + "p50": 362.8480136394501, + "p90": 367.74399876594543, + "p95": 369.6320056915283, + "p99": 523.7119793891907 + }, + "roundtrip": { + "p50": 640.8320069313049, + "p90": 648.576021194458, + "p95": 651.2960195541382, + "p99": 733.4399819374084 + }, + "isolatedSum": { + "p50": 666.4320230484009, + "p90": 677.2159934043884, + "p95": 680.1280081272125, + "p99": 837.5359773635864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 524.3200063705444, + "p90": 533.5680246353149, + "p95": 536.191999912262, + "p99": 542.2080159187317 + }, + "combine": { + "p50": 643.9039707183838, + "p90": 653.1839966773987, + "p95": 655.8719873428345, + "p99": 661.1520051956177 + }, + "roundtrip": { + "p50": 1135.2959871292114, + "p90": 1144.8320150375366, + "p95": 1148.4800577163696, + "p99": 1153.92005443573 + }, + "isolatedSum": { + "p50": 1168.2239770889282, + "p90": 1186.7520213127136, + "p95": 1192.0639872550964, + "p99": 1203.3600211143494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1020.4800367355347, + "p90": 1048.8959550857544, + "p95": 1056.2560558319092, + "p99": 1071.4880228042603 + }, + "combine": { + "p50": 1164.6720170974731, + "p90": 1173.375964164734, + "p95": 1177.024006843567, + "p99": 1183.135986328125 + }, + "roundtrip": { + "p50": 2140.575885772705, + "p90": 2157.248020172119, + "p95": 2164.031982421875, + "p99": 2171.4560985565186 + }, + "isolatedSum": { + "p50": 2185.152053833008, + "p90": 2222.2719192504883, + "p95": 2233.280062675476, + "p99": 2254.6240091323853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19a8d159", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_7b3247bf", + "comparisonKey": "0ac8f8817cb63abb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:47.651979+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:26:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.46399921178818, + "p90": 116.35199934244156, + "p95": 117.8240031003952, + "p99": 166.01599752902985 + }, + "combine": { + "p50": 106.1440035700798, + "p90": 111.51999980211258, + "p95": 112.06399649381638, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 197.40800559520721, + "p90": 200.9280025959015, + "p95": 203.0400037765503, + "p99": 206.01600408554077 + }, + "isolatedSum": { + "p50": 216.60800278186798, + "p90": 227.87199914455414, + "p95": 229.88799959421158, + "p99": 280.09599447250366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.39200472831726, + "p90": 150.68799257278442, + "p95": 151.7760008573532, + "p99": 154.33600544929504 + }, + "combine": { + "p50": 145.1839953660965, + "p90": 149.88799393177032, + "p95": 151.67999267578125, + "p99": 154.7199934720993 + }, + "roundtrip": { + "p50": 262.4000012874603, + "p90": 267.2640085220337, + "p95": 269.27998661994934, + "p99": 357.34400153160095 + }, + "isolatedSum": { + "p50": 292.57600009441376, + "p90": 300.57598650455475, + "p95": 303.45599353313446, + "p99": 309.05599892139435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.92799580097198, + "p90": 219.39200162887573, + "p95": 221.76000475883484, + "p99": 226.4000028371811 + }, + "combine": { + "p50": 217.15199947357178, + "p90": 221.3120013475418, + "p95": 224.57599639892578, + "p99": 227.743998169899 + }, + "roundtrip": { + "p50": 392.60798692703247, + "p90": 397.47199416160583, + "p95": 400.09599924087524, + "p99": 421.37598991394043 + }, + "isolatedSum": { + "p50": 422.07999527454376, + "p90": 440.70400297641754, + "p95": 446.3360011577606, + "p99": 454.1440010070801 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 319.93600726127625, + "p90": 324.8960077762604, + "p95": 327.1679878234863, + "p99": 330.55999875068665 + }, + "combine": { + "p50": 330.01598715782166, + "p90": 335.1680040359497, + "p95": 336.64000034332275, + "p99": 340.2239978313446 + }, + "roundtrip": { + "p50": 624.064028263092, + "p90": 629.2480230331421, + "p95": 631.6159963607788, + "p99": 638.2399797439575 + }, + "isolatedSum": { + "p50": 649.9519944190979, + "p90": 660.0640118122101, + "p95": 663.8079881668091, + "p99": 670.7839965820312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.9440112113953, + "p90": 584.5119953155518, + "p95": 589.1519784927368, + "p99": 593.9199924468994 + }, + "combine": { + "p50": 564.9920105934143, + "p90": 574.3039846420288, + "p95": 576.7999887466431, + "p99": 583.5199952125549 + }, + "roundtrip": { + "p50": 1105.5680513381958, + "p90": 1120.1599836349487, + "p95": 1124.7680187225342, + "p99": 1134.719967842102 + }, + "isolatedSum": { + "p50": 1135.9360218048096, + "p90": 1158.8159799575806, + "p95": 1165.9519672393799, + "p99": 1177.4399876594543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1075.8719444274902, + "p90": 1088.703989982605, + "p95": 1093.5360193252563, + "p99": 1102.463960647583 + }, + "combine": { + "p50": 1031.872034072876, + "p90": 1041.3119792938232, + "p95": 1044.4799661636353, + "p99": 1055.359959602356 + }, + "roundtrip": { + "p50": 2082.304000854492, + "p90": 2096.640110015869, + "p95": 2100.895881652832, + "p99": 2108.031988143921 + }, + "isolatedSum": { + "p50": 2107.743978500366, + "p90": 2130.015969276428, + "p95": 2138.0159854888916, + "p99": 2157.823920249939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-107dd39c", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", + "colorKey": "h100_716e65b9", + "comparisonKey": "ea5a5b6f1b74dc9d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:48.643579+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254367516", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", + "createdAt": "2026-06-26T17:27:52Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.65599584579468, + "p90": 131.74399733543396, + "p95": 132.83200562000275, + "p99": 139.80799913406372 + }, + "combine": { + "p50": 120.4800009727478, + "p90": 122.40000069141388, + "p95": 124.28800016641617, + "p99": 129.12000715732574 + }, + "roundtrip": { + "p50": 221.40799462795258, + "p90": 226.49599611759186, + "p95": 227.77600586414337, + "p99": 232.16000199317932 + }, + "isolatedSum": { + "p50": 247.13599681854248, + "p90": 254.14399802684784, + "p95": 257.1200057864189, + "p99": 268.92800629138947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 174.04800653457642, + "p90": 177.5359958410263, + "p95": 179.29600179195404, + "p99": 190.0160014629364 + }, + "combine": { + "p50": 172.67200350761414, + "p90": 174.52800273895264, + "p95": 175.4239946603775, + "p99": 180.28800189495087 + }, + "roundtrip": { + "p50": 317.05600023269653, + "p90": 321.3759958744049, + "p95": 322.4320113658905, + "p99": 326.04798674583435 + }, + "isolatedSum": { + "p50": 346.72001004219055, + "p90": 352.06399857997894, + "p95": 354.71999645233154, + "p99": 370.30400335788727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 260.70401072502136, + "p90": 264.41600918769836, + "p95": 265.76000452041626, + "p99": 269.6639895439148 + }, + "combine": { + "p50": 255.13601303100586, + "p90": 258.2080066204071, + "p95": 259.5840096473694, + "p99": 263.5520100593567 + }, + "roundtrip": { + "p50": 489.3760085105896, + "p90": 493.696004152298, + "p95": 495.0079917907715, + "p99": 498.9120066165924 + }, + "isolatedSum": { + "p50": 515.8400237560272, + "p90": 522.6240158081055, + "p95": 525.3440141677856, + "p99": 533.2159996032715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 437.6640021800995, + "p90": 443.7119960784912, + "p95": 445.248007774353, + "p99": 449.50398802757263 + }, + "combine": { + "p50": 422.14399576187134, + "p90": 426.07998847961426, + "p95": 427.90400981903076, + "p99": 431.0399889945984 + }, + "roundtrip": { + "p50": 834.0799808502197, + "p90": 840.3199911117554, + "p95": 842.8159952163696, + "p99": 852.512001991272 + }, + "isolatedSum": { + "p50": 859.8079979419708, + "p90": 869.7919845581055, + "p95": 873.1520175933838, + "p99": 880.543977022171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 802.623987197876, + "p90": 819.7439908981323, + "p95": 822.3680257797241, + "p99": 830.3359746932983 + }, + "combine": { + "p50": 751.9360184669495, + "p90": 759.6160173416138, + "p95": 762.0480060577393, + "p99": 765.5680179595947 + }, + "roundtrip": { + "p50": 1521.9520330429077, + "p90": 1534.208059310913, + "p95": 1541.4400100708008, + "p99": 1552.5120496749878 + }, + "isolatedSum": { + "p50": 1554.5600056648254, + "p90": 1579.360008239746, + "p95": 1584.4160318374634, + "p99": 1595.903992652893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1529.0240049362183, + "p90": 1539.5519733428955, + "p95": 1543.4880256652832, + "p99": 1549.504041671753 + }, + "combine": { + "p50": 1399.6479511260986, + "p90": 1406.7840576171875, + "p95": 1409.440040588379, + "p99": 1416.767954826355 + }, + "roundtrip": { + "p50": 2903.520107269287, + "p90": 2916.3520336151123, + "p95": 2920.2558994293213, + "p99": 2930.016040802002 + }, + "isolatedSum": { + "p50": 2928.671956062317, + "p90": 2946.336030960083, + "p95": 2952.928066253662, + "p99": 2966.271996498108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a1762095", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", + "colorKey": "h100_f7ec28aa", + "comparisonKey": "18d3cab3936a264e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:07.856119+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254376151", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", + "createdAt": "2026-06-26T17:28:02Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.03999745845795, + "p90": 125.44000148773193, + "p95": 126.01600587368011, + "p99": 130.68799674510956 + }, + "combine": { + "p50": 111.32799834012985, + "p90": 113.92000317573547, + "p95": 114.33599889278412, + "p99": 119.77600306272507 + }, + "roundtrip": { + "p50": 207.42399990558624, + "p90": 212.351992726326, + "p95": 214.56000208854675, + "p99": 233.3119958639145 + }, + "isolatedSum": { + "p50": 230.3679957985878, + "p90": 239.3600046634674, + "p95": 240.35200476646423, + "p99": 250.46399980783463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.42400693893433, + "p90": 165.8879965543747, + "p95": 166.6879951953888, + "p99": 169.69600319862366 + }, + "combine": { + "p50": 156.19200468063354, + "p90": 162.49600052833557, + "p95": 163.26400637626648, + "p99": 168.83200407028198 + }, + "roundtrip": { + "p50": 290.336012840271, + "p90": 296.4160144329071, + "p95": 298.43199253082275, + "p99": 313.4399950504303 + }, + "isolatedSum": { + "p50": 315.61601161956787, + "p90": 328.38399708271027, + "p95": 329.9520015716553, + "p99": 338.52800726890564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 234.78400707244873, + "p90": 240.22400379180908, + "p95": 242.20800399780273, + "p99": 246.2719976902008 + }, + "combine": { + "p50": 244.47999894618988, + "p90": 252.16001272201538, + "p95": 254.8159956932068, + "p99": 262.4959945678711 + }, + "roundtrip": { + "p50": 450.81600546836853, + "p90": 456.83199167251587, + "p95": 458.624005317688, + "p99": 499.1680085659027 + }, + "isolatedSum": { + "p50": 479.2640060186386, + "p90": 492.38401651382446, + "p95": 497.0239996910095, + "p99": 508.7679922580719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.8399865627289, + "p90": 387.58400082588196, + "p95": 389.60000872612, + "p99": 392.9600119590759 + }, + "combine": { + "p50": 402.72000432014465, + "p90": 408.35198760032654, + "p95": 410.5280041694641, + "p99": 414.2400026321411 + }, + "roundtrip": { + "p50": 753.600001335144, + "p90": 759.8080039024353, + "p95": 761.5039944648743, + "p99": 764.959990978241 + }, + "isolatedSum": { + "p50": 782.5599908828735, + "p90": 795.9359884262085, + "p95": 800.1280128955841, + "p99": 807.200014591217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 663.7120246887207, + "p90": 672.1919775009155, + "p95": 675.9359836578369, + "p99": 683.0080151557922 + }, + "combine": { + "p50": 711.5839719772339, + "p90": 725.5359888076782, + "p95": 729.8880219459534, + "p99": 740.0320172309875 + }, + "roundtrip": { + "p50": 1344.383955001831, + "p90": 1357.5999736785889, + "p95": 1361.0880374908447, + "p99": 1368.6399459838867 + }, + "isolatedSum": { + "p50": 1375.2959966659546, + "p90": 1397.7279663085938, + "p95": 1405.8240056037903, + "p99": 1423.0400323867798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1251.1359453201294, + "p90": 1264.8320198059082, + "p95": 1269.6640491485596, + "p99": 1279.0080308914185 + }, + "combine": { + "p50": 1326.9120454788208, + "p90": 1337.3440504074097, + "p95": 1343.008041381836, + "p99": 1352.5439500808716 + }, + "roundtrip": { + "p50": 2547.0080375671387, + "p90": 2561.2800121307373, + "p95": 2564.863920211792, + "p99": 2581.696033477783 + }, + "isolatedSum": { + "p50": 2578.04799079895, + "p90": 2602.176070213318, + "p95": 2612.6720905303955, + "p99": 2631.55198097229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6339c695", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", + "colorKey": "h100_93503624", + "comparisonKey": "99696dfafd6d026a", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:46:27.794881+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255296001", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", + "createdAt": "2026-06-26T17:45:26Z", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.72000116109848, + "p90": 114.78400230407715, + "p95": 116.57600104808807, + "p99": 121.0239976644516 + }, + "combine": { + "p50": 105.8880016207695, + "p90": 111.35999858379364, + "p95": 112.0000034570694, + "p99": 114.56000059843063 + }, + "roundtrip": { + "p50": 195.99999487400055, + "p90": 200.00000298023224, + "p95": 201.24800503253937, + "p99": 205.59999346733093 + }, + "isolatedSum": { + "p50": 216.60800278186798, + "p90": 226.1440008878708, + "p95": 228.57600450515747, + "p99": 235.58399826288223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.31999623775482, + "p90": 148.0640023946762, + "p95": 149.24800395965576, + "p99": 152.0960032939911 + }, + "combine": { + "p50": 146.62399888038635, + "p90": 151.10400319099426, + "p95": 152.51199901103973, + "p99": 155.32800555229187 + }, + "roundtrip": { + "p50": 260.8959972858429, + "p90": 265.3760015964508, + "p95": 266.400009393692, + "p99": 270.7520127296448 + }, + "isolatedSum": { + "p50": 290.9439951181412, + "p90": 299.16800558567047, + "p95": 301.7600029706955, + "p99": 307.42400884628296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.56800067424774, + "p90": 210.36800742149353, + "p95": 212.09600567817688, + "p99": 214.6880030632019 + }, + "combine": { + "p50": 214.78399634361267, + "p90": 219.13599967956543, + "p95": 220.70400416851044, + "p99": 225.2800017595291 + }, + "roundtrip": { + "p50": 394.8799967765808, + "p90": 400.2879858016968, + "p95": 401.88801288604736, + "p99": 407.9680144786835 + }, + "isolatedSum": { + "p50": 420.3519970178604, + "p90": 429.50400710105896, + "p95": 432.8000098466873, + "p99": 439.968004822731 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 326.9760012626648, + "p90": 332.35201239585876, + "p95": 334.46401357650757, + "p99": 337.98399567604065 + }, + "combine": { + "p50": 338.75200152397156, + "p90": 346.0479974746704, + "p95": 347.4240005016327, + "p99": 379.5199990272522 + }, + "roundtrip": { + "p50": 642.8160071372986, + "p90": 650.6879925727844, + "p95": 652.895987033844, + "p99": 658.7520241737366 + }, + "isolatedSum": { + "p50": 665.7280027866364, + "p90": 678.4000098705292, + "p95": 681.8880140781403, + "p99": 717.5039947032928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.0640068054199, + "p90": 592.6719903945923, + "p95": 595.5520272254944, + "p99": 601.2160181999207 + }, + "combine": { + "p50": 568.8639879226685, + "p90": 576.9280195236206, + "p95": 579.3920159339905, + "p99": 584.5119953155518 + }, + "roundtrip": { + "p50": 1122.3679780960083, + "p90": 1133.8560581207275, + "p95": 1138.6239528656006, + "p99": 1146.783947944641 + }, + "isolatedSum": { + "p50": 1152.9279947280884, + "p90": 1169.600009918213, + "p95": 1174.9440431594849, + "p99": 1185.7280135154724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1107.200026512146, + "p90": 1119.0400123596191, + "p95": 1124.384045600891, + "p99": 1133.344054222107 + }, + "combine": { + "p50": 1020.6719636917114, + "p90": 1029.1839838027954, + "p95": 1032.1919918060303, + "p99": 1037.8559827804565 + }, + "roundtrip": { + "p50": 2098.4959602355957, + "p90": 2110.1760864257812, + "p95": 2113.856077194214, + "p99": 2120.60809135437 + }, + "isolatedSum": { + "p50": 2127.8719902038574, + "p90": 2148.2239961624146, + "p95": 2156.5760374069214, + "p99": 2171.2000370025635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96b1ca55", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_5df912ff", + "comparisonKey": "9fdbd6763ea7346a", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:28:17.076570+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254332840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", + "createdAt": "2026-06-26T17:27:12Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.31199657917023, + "p90": 105.69600015878677, + "p95": 107.55199939012527, + "p99": 110.84800213575363 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 107.42399841547012, + "p95": 108.60799998044968, + "p99": 112.64000087976456 + }, + "roundtrip": { + "p50": 183.1360012292862, + "p90": 188.03200125694275, + "p95": 188.960000872612, + "p99": 195.13599574565887 + }, + "isolatedSum": { + "p50": 207.13599771261215, + "p90": 213.1199985742569, + "p95": 216.15999937057495, + "p99": 223.4880030155182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.89600610733032, + "p90": 137.08800077438354, + "p95": 138.2399946451187, + "p99": 140.70400595664978 + }, + "combine": { + "p50": 144.96000111103058, + "p90": 147.5840061903, + "p95": 148.28799664974213, + "p99": 152.63999998569489 + }, + "roundtrip": { + "p50": 249.56800043582916, + "p90": 253.53598594665527, + "p95": 254.59200143814087, + "p99": 256.73601031303406 + }, + "isolatedSum": { + "p50": 277.8560072183609, + "p90": 284.67200696468353, + "p95": 286.52799129486084, + "p99": 293.34400594234467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.4400051832199, + "p90": 202.68799364566803, + "p95": 203.87199521064758, + "p99": 209.9519968032837 + }, + "combine": { + "p50": 216.8319970369339, + "p90": 220.92799842357635, + "p95": 223.55200350284576, + "p99": 226.04799270629883 + }, + "roundtrip": { + "p50": 382.4959993362427, + "p90": 387.7759873867035, + "p95": 388.7679874897003, + "p99": 392.767995595932 + }, + "isolatedSum": { + "p50": 410.2720022201538, + "p90": 423.6159920692444, + "p95": 427.42399871349335, + "p99": 435.9999895095825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 315.0720000267029, + "p90": 320.1279938220978, + "p95": 322.04800844192505, + "p99": 324.5759904384613 + }, + "combine": { + "p50": 329.27998900413513, + "p90": 333.3759903907776, + "p95": 335.61599254608154, + "p99": 338.9120101928711 + }, + "roundtrip": { + "p50": 619.0720200538635, + "p90": 625.2480149269104, + "p95": 627.839982509613, + "p99": 630.7839751243591 + }, + "isolatedSum": { + "p50": 644.351989030838, + "p90": 653.5039842128754, + "p95": 657.6640009880066, + "p99": 663.4880006313324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 560.8959794044495, + "p90": 569.8879957199097, + "p95": 572.1920132637024, + "p99": 577.2799849510193 + }, + "combine": { + "p50": 563.3599758148193, + "p90": 573.248028755188, + "p95": 576.3840079307556, + "p99": 580.672025680542 + }, + "roundtrip": { + "p50": 1093.727946281433, + "p90": 1102.6240587234497, + "p95": 1105.5999994277954, + "p99": 1112.0959520339966 + }, + "isolatedSum": { + "p50": 1124.2559552192688, + "p90": 1143.1360244750977, + "p95": 1148.576021194458, + "p99": 1157.9520106315613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1059.0720176696777, + "p90": 1071.7439651489258, + "p95": 1074.8480558395386, + "p99": 1091.2959575653076 + }, + "combine": { + "p50": 1026.8800258636475, + "p90": 1036.2880229949951, + "p95": 1038.7840270996094, + "p99": 1047.4879741668701 + }, + "roundtrip": { + "p50": 2055.1679134368896, + "p90": 2067.13604927063, + "p95": 2069.823980331421, + "p99": 2075.5200386047363 + }, + "isolatedSum": { + "p50": 2085.952043533325, + "p90": 2108.031988143921, + "p95": 2113.632082939148, + "p99": 2138.7839317321777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1ed69eb7", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_17694d2c", + "comparisonKey": "379c3371e525c0fb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:34.870060+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271555838", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271555838", + "createdAt": "2026-06-26T23:46:24Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.27999699115753, + "p90": 110.36799848079681, + "p95": 112.12799698114395, + "p99": 115.23199826478958 + }, + "combine": { + "p50": 106.175996363163, + "p90": 108.0000028014183, + "p95": 111.1999973654747, + "p99": 113.72800171375275 + }, + "roundtrip": { + "p50": 183.3599954843521, + "p90": 188.48000466823578, + "p95": 190.17599523067474, + "p99": 193.56800615787506 + }, + "isolatedSum": { + "p50": 211.45599335432053, + "p90": 218.36800128221512, + "p95": 223.32799434661865, + "p99": 228.95999997854233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.5919966697693, + "p90": 140.06400108337402, + "p95": 142.46399700641632, + "p99": 146.88000082969666 + }, + "combine": { + "p50": 152.12799608707428, + "p90": 158.36800634860992, + "p95": 161.0880047082901, + "p99": 162.81600296497345 + }, + "roundtrip": { + "p50": 254.46400046348572, + "p90": 259.93600487709045, + "p95": 262.4639868736267, + "p99": 268.2560086250305 + }, + "isolatedSum": { + "p50": 286.71999275684357, + "p90": 298.43200743198395, + "p95": 303.5520017147064, + "p99": 309.6960037946701 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 188.960000872612, + "p90": 194.97600197792053, + "p95": 198.11199605464935, + "p99": 202.5279998779297 + }, + "combine": { + "p50": 228.67199778556824, + "p90": 236.09599471092224, + "p95": 237.05600202083588, + "p99": 241.08800292015076 + }, + "roundtrip": { + "p50": 391.90399646759033, + "p90": 399.80798959732056, + "p95": 402.3999869823456, + "p99": 424.0959882736206 + }, + "isolatedSum": { + "p50": 417.63199865818024, + "p90": 431.0719966888428, + "p95": 435.16799807548523, + "p99": 443.61600279808044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 294.0160036087036, + "p90": 311.3279938697815, + "p95": 315.20000100135803, + "p99": 326.07999444007874 + }, + "combine": { + "p50": 366.1760091781616, + "p90": 382.9120099544525, + "p95": 391.32800698280334, + "p99": 407.039999961853 + }, + "roundtrip": { + "p50": 632.9600214958191, + "p90": 674.3680238723755, + "p95": 687.3279809951782, + "p99": 835.3919982910156 + }, + "isolatedSum": { + "p50": 660.1920127868652, + "p90": 694.240003824234, + "p95": 706.5280079841614, + "p99": 733.1199944019318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 509.15199518203735, + "p90": 521.8560099601746, + "p95": 526.1120200157166, + "p99": 533.0560207366943 + }, + "combine": { + "p50": 635.2319717407227, + "p90": 645.5680131912231, + "p95": 649.4719982147217, + "p99": 656.3839912414551 + }, + "roundtrip": { + "p50": 1114.9760484695435, + "p90": 1128.0319690704346, + "p95": 1131.9680213928223, + "p99": 1147.711992263794 + }, + "isolatedSum": { + "p50": 1144.38396692276, + "p90": 1167.4240231513977, + "p95": 1175.5840182304382, + "p99": 1189.4400119781494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 970.848023891449, + "p90": 995.136022567749, + "p95": 1001.7919540405273, + "p99": 1016.1279439926147 + }, + "combine": { + "p50": 1156.3199758529663, + "p90": 1167.1040058135986, + "p95": 1172.287940979004, + "p99": 1184.928059577942 + }, + "roundtrip": { + "p50": 2089.279890060425, + "p90": 2105.664014816284, + "p95": 2110.431909561157, + "p99": 2118.0479526519775 + }, + "isolatedSum": { + "p50": 2127.1679997444153, + "p90": 2162.2400283813477, + "p95": 2174.0798950195312, + "p99": 2201.0560035705566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8fb1cb65", + "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_97196257", + "comparisonKey": "d361c128552b2ee8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:51.842450+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271695735", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271695735", + "createdAt": "2026-06-26T23:50:45Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 196.28800451755524, + "p90": 202.33599841594696, + "p95": 203.96800339221954, + "p99": 210.07999777793884 + }, + "combine": { + "p50": 85.11999994516373, + "p90": 87.5839963555336, + "p95": 89.72799777984619, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 266.7520046234131, + "p90": 273.824006319046, + "p95": 277.5680124759674, + "p99": 291.83998703956604 + }, + "isolatedSum": { + "p50": 281.40800446271896, + "p90": 289.91999477148056, + "p95": 293.69600117206573, + "p99": 303.3280000090599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 248.3839988708496, + "p90": 296.7680096626282, + "p95": 299.80799555778503, + "p99": 321.28000259399414 + }, + "combine": { + "p50": 118.81600320339203, + "p90": 125.15200674533844, + "p95": 126.17599964141846, + "p99": 128.06400656700134 + }, + "roundtrip": { + "p50": 353.85599732398987, + "p90": 407.9360067844391, + "p95": 410.3040099143982, + "p99": 414.40001130104065 + }, + "isolatedSum": { + "p50": 367.20000207424164, + "p90": 421.9200164079666, + "p95": 425.9839951992035, + "p99": 449.3440091609955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 341.43999218940735, + "p90": 394.0480053424835, + "p95": 396.5440094470978, + "p99": 400.41598677635193 + }, + "combine": { + "p50": 185.12000143527985, + "p90": 191.3280040025711, + "p95": 193.05600225925446, + "p99": 195.74399292469025 + }, + "roundtrip": { + "p50": 510.0160241127014, + "p90": 567.3919916152954, + "p95": 570.8479881286621, + "p99": 574.176013469696 + }, + "isolatedSum": { + "p50": 526.5599936246872, + "p90": 585.3760093450546, + "p95": 589.6000117063522, + "p99": 596.1599797010422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 517.9839730262756, + "p90": 568.6720013618469, + "p95": 574.0159749984741, + "p99": 579.8079967498779 + }, + "combine": { + "p50": 291.26399755477905, + "p90": 295.80798745155334, + "p95": 297.08799719810486, + "p99": 299.96800422668457 + }, + "roundtrip": { + "p50": 794.2079901695251, + "p90": 801.3120293617249, + "p95": 804.095983505249, + "p99": 814.4959807395935 + }, + "isolatedSum": { + "p50": 809.2479705810547, + "p90": 864.4799888134003, + "p95": 871.103972196579, + "p99": 879.7760009765625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 962.5599980354309, + "p90": 971.2640047073364, + "p95": 973.0560183525085, + "p99": 978.3999919891357 + }, + "combine": { + "p50": 513.1199955940247, + "p90": 523.5520005226135, + "p95": 526.0800123214722, + "p99": 531.9039821624756 + }, + "roundtrip": { + "p50": 1460.576057434082, + "p90": 1472.4160432815552, + "p95": 1476.6080379486084, + "p99": 1773.3759880065918 + }, + "isolatedSum": { + "p50": 1475.6799936294556, + "p90": 1494.81600522995, + "p95": 1499.1360306739807, + "p99": 1510.3039741516113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1818.5919523239136, + "p90": 1826.9439935684204, + "p95": 1829.7280073165894, + "p99": 1833.8559865951538 + }, + "combine": { + "p50": 930.3359985351562, + "p90": 939.7119879722595, + "p95": 942.8160190582275, + "p99": 948.0640292167664 + }, + "roundtrip": { + "p50": 2736.9279861450195, + "p90": 2750.3039836883545, + "p95": 2755.199909210205, + "p99": 2763.64803314209 + }, + "isolatedSum": { + "p50": 2748.92795085907, + "p90": 2766.65598154068, + "p95": 2772.544026374817, + "p99": 2781.92001581192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db3c52ad", + "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_97196257", + "comparisonKey": "d4fd66af6f4726f6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:52:17.424978+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271710412", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271710412", + "createdAt": "2026-06-26T23:51:13Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.8640034198761, + "p90": 215.68000316619873, + "p95": 218.75199675559998, + "p99": 469.56801414489746 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 93.21600198745728, + "p95": 95.551997423172, + "p99": 98.1760025024414 + }, + "roundtrip": { + "p50": 286.72000765800476, + "p90": 290.75199365615845, + "p95": 293.0240035057068, + "p99": 295.52000761032104 + }, + "isolatedSum": { + "p50": 299.20000582933426, + "p90": 308.896005153656, + "p95": 314.303994178772, + "p99": 567.7440166473389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 267.2320008277893, + "p90": 273.4079957008362, + "p95": 275.64799785614014, + "p99": 286.8480086326599 + }, + "combine": { + "p50": 127.23200023174286, + "p90": 130.40000200271606, + "p95": 131.52000308036804, + "p99": 134.0479999780655 + }, + "roundtrip": { + "p50": 387.3920142650604, + "p90": 392.2879993915558, + "p95": 394.9440121650696, + "p99": 403.328001499176 + }, + "isolatedSum": { + "p50": 394.46400105953217, + "p90": 403.80799770355225, + "p95": 407.1680009365082, + "p99": 420.8960086107254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 377.6960074901581, + "p90": 423.93600940704346, + "p95": 426.4320135116577, + "p99": 432.6080083847046 + }, + "combine": { + "p50": 203.42400670051575, + "p90": 213.95200490951538, + "p95": 215.03999829292297, + "p99": 218.6879962682724 + }, + "roundtrip": { + "p50": 564.4479990005493, + "p90": 604.8960089683533, + "p95": 608.1920266151428, + "p99": 615.1360273361206 + }, + "isolatedSum": { + "p50": 581.1200141906738, + "p90": 637.8880143165588, + "p95": 641.4720118045807, + "p99": 651.296004652977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 593.504011631012, + "p90": 597.9200005531311, + "p95": 599.295973777771, + "p99": 620.9279894828796 + }, + "combine": { + "p50": 322.59199023246765, + "p90": 326.78401470184326, + "p95": 328.7999927997589, + "p99": 331.36001229286194 + }, + "roundtrip": { + "p50": 899.1680145263672, + "p90": 904.416024684906, + "p95": 906.6240191459656, + "p99": 913.0560159683228 + }, + "isolatedSum": { + "p50": 916.0960018634796, + "p90": 924.7040152549744, + "p95": 928.0959665775299, + "p99": 952.2880017757416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1106.943964958191, + "p90": 1117.9519891738892, + "p95": 1120.7040548324585, + "p99": 1126.2719631195068 + }, + "combine": { + "p50": 574.4640231132507, + "p90": 583.0720067024231, + "p95": 584.991991519928, + "p99": 590.719997882843 + }, + "roundtrip": { + "p50": 1684.0640306472778, + "p90": 1699.5840072631836, + "p95": 1705.1520347595215, + "p99": 1751.9680261611938 + }, + "isolatedSum": { + "p50": 1681.4079880714417, + "p90": 1701.0239958763123, + "p95": 1705.6960463523865, + "p99": 1716.9919610023499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2105.151891708374, + "p90": 2114.687919616699, + "p95": 2120.1279163360596, + "p99": 2217.0560359954834 + }, + "combine": { + "p50": 1052.9279708862305, + "p90": 1061.0560178756714, + "p95": 1063.264012336731, + "p99": 1068.0320262908936 + }, + "roundtrip": { + "p50": 3201.6959190368652, + "p90": 3233.1199645996094, + "p95": 3240.8320903778076, + "p99": 3259.615898132324 + }, + "isolatedSum": { + "p50": 3158.0798625946045, + "p90": 3175.7439374923706, + "p95": 3183.3919286727905, + "p99": 3285.088062286377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bf310e7a", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_a96c99f3", + "comparisonKey": "0fd91e8522237192", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:27.259424+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271570100", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271570100", + "createdAt": "2026-06-26T23:46:51Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.93599903583527, + "p90": 92.0960009098053, + "p95": 94.55999732017517, + "p99": 100.63999891281128 + }, + "combine": { + "p50": 97.02400118112564, + "p90": 99.96800124645233, + "p95": 102.39999741315842, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 216.09599888324738, + "p90": 219.4560021162033, + "p95": 220.7999974489212, + "p99": 224.5119959115982 + }, + "isolatedSum": { + "p50": 184.9600002169609, + "p90": 192.06400215625763, + "p95": 196.9599947333336, + "p99": 206.62400126457214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 107.29599744081497, + "p90": 124.28800016641617, + "p95": 125.63200294971466, + "p99": 128.9920061826706 + }, + "combine": { + "p50": 144.03200149536133, + "p90": 154.36799824237823, + "p95": 155.58399260044098, + "p99": 158.78400206565857 + }, + "roundtrip": { + "p50": 332.60801434516907, + "p90": 353.05601358413696, + "p95": 354.8479974269867, + "p99": 358.36800932884216 + }, + "isolatedSum": { + "p50": 251.3279989361763, + "p90": 278.6559984087944, + "p95": 281.21599555015564, + "p99": 287.77600824832916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 143.93599331378937, + "p90": 162.36799955368042, + "p95": 164.8319959640503, + "p99": 172.95999825000763 + }, + "combine": { + "p50": 225.21600127220154, + "p90": 234.01600122451782, + "p95": 235.3920042514801, + "p99": 237.8239929676056 + }, + "roundtrip": { + "p50": 529.375970363617, + "p90": 547.9679703712463, + "p95": 550.2399802207947, + "p99": 553.0239939689636 + }, + "isolatedSum": { + "p50": 369.1519945859909, + "p90": 396.38400077819824, + "p95": 400.2240002155304, + "p99": 410.7839912176132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 217.56799519062042, + "p90": 234.047994017601, + "p95": 235.87200045585632, + "p99": 239.55200612545013 + }, + "combine": { + "p50": 362.527996301651, + "p90": 370.7520067691803, + "p95": 373.1519877910614, + "p99": 375.90399384498596 + }, + "roundtrip": { + "p50": 898.6240029335022, + "p90": 918.2080030441284, + "p95": 922.7200150489807, + "p99": 931.5199851989746 + }, + "isolatedSum": { + "p50": 580.0959914922714, + "p90": 604.8000007867813, + "p95": 609.0239882469177, + "p99": 615.4559999704361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 356.25600814819336, + "p90": 370.33599615097046, + "p95": 372.73600697517395, + "p99": 376.8959939479828 + }, + "combine": { + "p50": 631.8399906158447, + "p90": 639.0079855918884, + "p95": 641.6640281677246, + "p99": 645.0560092926025 + }, + "roundtrip": { + "p50": 1633.247971534729, + "p90": 1644.7360515594482, + "p95": 1647.3599672317505, + "p99": 1654.4640064239502 + }, + "isolatedSum": { + "p50": 988.0959987640381, + "p90": 1009.3439817428589, + "p95": 1014.4000351428986, + "p99": 1021.9520032405853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 645.5039978027344, + "p90": 653.2800197601318, + "p95": 656.1599969863892, + "p99": 660.7040166854858 + }, + "combine": { + "p50": 1158.911943435669, + "p90": 1167.9999828338623, + "p95": 1170.4319715499878, + "p99": 1178.2399415969849 + }, + "roundtrip": { + "p50": 3097.759962081909, + "p90": 3109.312057495117, + "p95": 3113.3439540863037, + "p99": 3129.1520595550537 + }, + "isolatedSum": { + "p50": 1804.4159412384033, + "p90": 1821.2800025939941, + "p95": 1826.591968536377, + "p99": 1838.9439582824707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9440251a", + "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_97196257", + "comparisonKey": "2b50b361430bc4f6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:40.278594+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271583505", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271583505", + "createdAt": "2026-06-26T23:47:19Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 211.2639993429184, + "p90": 217.28000044822693, + "p95": 219.32800114154816, + "p99": 226.78400576114655 + }, + "combine": { + "p50": 97.15200215578079, + "p90": 100.41599720716476, + "p95": 102.27199643850327, + "p99": 105.59999942779541 + }, + "roundtrip": { + "p50": 296.640008687973, + "p90": 303.26399207115173, + "p95": 305.82401156425476, + "p99": 313.9199912548065 + }, + "isolatedSum": { + "p50": 308.4160014986992, + "p90": 317.6959976553917, + "p95": 321.5999975800514, + "p99": 332.38400518894196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 281.98400139808655, + "p90": 324.6400058269501, + "p95": 327.7760148048401, + "p99": 332.99198746681213 + }, + "combine": { + "p50": 141.53599739074707, + "p90": 147.87200093269348, + "p95": 149.9519944190979, + "p99": 152.70400047302246 + }, + "roundtrip": { + "p50": 409.7920060157776, + "p90": 415.45599699020386, + "p95": 417.7280068397522, + "p99": 423.39199781417847 + }, + "isolatedSum": { + "p50": 423.5199987888336, + "p90": 472.51200675964355, + "p95": 477.728009223938, + "p99": 485.6959879398346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 402.24000811576843, + "p90": 444.64001059532166, + "p95": 447.3919868469238, + "p99": 454.1440010070801 + }, + "combine": { + "p50": 224.16000068187714, + "p90": 233.0559939146042, + "p95": 235.23199558258057, + "p99": 239.29600417613983 + }, + "roundtrip": { + "p50": 613.9839887619019, + "p90": 657.7600240707397, + "p95": 661.9200110435486, + "p99": 734.7840070724487 + }, + "isolatedSum": { + "p50": 626.4000087976456, + "p90": 677.6960045099258, + "p95": 682.6239824295044, + "p99": 693.4400051832199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 664.7359728813171, + "p90": 669.5680022239685, + "p95": 671.1360216140747, + "p99": 674.7519969940186 + }, + "combine": { + "p50": 358.0799996852875, + "p90": 363.23198676109314, + "p95": 364.47998881340027, + "p99": 369.9199855327606 + }, + "roundtrip": { + "p50": 1005.2160024642944, + "p90": 1010.2720260620117, + "p95": 1012.287974357605, + "p99": 1019.9999809265137 + }, + "isolatedSum": { + "p50": 1022.8159725666046, + "p90": 1032.7999889850616, + "p95": 1035.616010427475, + "p99": 1044.6719825267792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1237.8560304641724, + "p90": 1245.919942855835, + "p95": 1249.0559816360474, + "p99": 1253.6319494247437 + }, + "combine": { + "p50": 632.0639848709106, + "p90": 639.0720009803772, + "p95": 641.5359973907471, + "p99": 646.9119787216187 + }, + "roundtrip": { + "p50": 1845.0239896774292, + "p90": 1854.3039560317993, + "p95": 1857.983946800232, + "p99": 1862.720012664795 + }, + "isolatedSum": { + "p50": 1869.920015335083, + "p90": 1884.9919438362122, + "p95": 1890.5919790267944, + "p99": 1900.5439281463623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2360.80002784729, + "p90": 2371.2639808654785, + "p95": 2375.3280639648438, + "p99": 2383.3279609680176 + }, + "combine": { + "p50": 1150.8159637451172, + "p90": 1160.032033920288, + "p95": 1162.9120111465454, + "p99": 1171.6159582138062 + }, + "roundtrip": { + "p50": 3508.7039470672607, + "p90": 3525.631904602051, + "p95": 3531.615972518921, + "p99": 3547.4560260772705 + }, + "isolatedSum": { + "p50": 3511.615991592407, + "p90": 3531.2960147857666, + "p95": 3538.240075111389, + "p99": 3554.9439191818237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0ee3ca7d", + "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_97196257", + "comparisonKey": "7f26f72cd9fff78c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:56.826066+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271663775", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271663775", + "createdAt": "2026-06-26T23:49:50Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 211.93599700927734, + "p90": 218.33600103855133, + "p95": 220.5120027065277, + "p99": 225.055992603302 + }, + "combine": { + "p50": 97.59999811649323, + "p90": 100.0640019774437, + "p95": 101.85600072145462, + "p99": 104.5759990811348 + }, + "roundtrip": { + "p50": 297.91998863220215, + "p90": 303.9360046386719, + "p95": 306.5600097179413, + "p99": 328.000009059906 + }, + "isolatedSum": { + "p50": 309.53599512577057, + "p90": 318.400003015995, + "p95": 322.36800342798233, + "p99": 329.6319916844368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 278.3359885215759, + "p90": 284.5759987831116, + "p95": 285.8560085296631, + "p99": 292.03200340270996 + }, + "combine": { + "p50": 141.88799262046814, + "p90": 145.1520025730133, + "p95": 146.88000082969666, + "p99": 151.39199793338776 + }, + "roundtrip": { + "p50": 404.4800102710724, + "p90": 410.7840061187744, + "p95": 413.9519929885864, + "p99": 420.51199078559875 + }, + "isolatedSum": { + "p50": 420.22398114204407, + "p90": 429.7280013561249, + "p95": 432.73600935935974, + "p99": 443.4240013360977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 397.2800076007843, + "p90": 402.8480052947998, + "p95": 405.44000267982483, + "p99": 410.71999073028564 + }, + "combine": { + "p50": 221.02400660514832, + "p90": 225.0880002975464, + "p95": 226.01599991321564, + "p99": 229.50400412082672 + }, + "roundtrip": { + "p50": 601.4400124549866, + "p90": 608.1600189208984, + "p95": 610.4000210762024, + "p99": 616.8000102043152 + }, + "isolatedSum": { + "p50": 618.3040142059326, + "p90": 627.9360055923462, + "p95": 631.4560025930405, + "p99": 640.2239948511124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 659.5199704170227, + "p90": 663.2959842681885, + "p95": 665.0239825248718, + "p99": 667.2319769859314 + }, + "combine": { + "p50": 360.22400856018066, + "p90": 364.9280071258545, + "p95": 366.3040101528168, + "p99": 369.85599994659424 + }, + "roundtrip": { + "p50": 1002.9439926147461, + "p90": 1008.3839893341064, + "p95": 1010.0159645080566, + "p99": 1013.856053352356 + }, + "isolatedSum": { + "p50": 1019.7439789772034, + "p90": 1028.223991394043, + "p95": 1031.3279926776886, + "p99": 1037.0879769325256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1226.9760370254517, + "p90": 1235.1679801940918, + "p95": 1236.8320226669312, + "p99": 1242.143988609314 + }, + "combine": { + "p50": 624.5120167732239, + "p90": 631.8399906158447, + "p95": 634.1120004653931, + "p99": 675.8400201797485 + }, + "roundtrip": { + "p50": 1831.455945968628, + "p90": 1840.831995010376, + "p95": 1843.775987625122, + "p99": 1848.2880592346191 + }, + "isolatedSum": { + "p50": 1851.4880537986755, + "p90": 1867.0079708099365, + "p95": 1870.9440231323242, + "p99": 1917.9840087890625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2344.1600799560547, + "p90": 2350.719928741455, + "p95": 2352.9601097106934, + "p99": 2358.0799102783203 + }, + "combine": { + "p50": 1141.4719820022583, + "p90": 1150.9439945220947, + "p95": 1153.7920236587524, + "p99": 1162.592053413391 + }, + "roundtrip": { + "p50": 3469.856023788452, + "p90": 3481.6958904266357, + "p95": 3484.3521118164062, + "p99": 3490.528106689453 + }, + "isolatedSum": { + "p50": 3485.632061958313, + "p90": 3501.66392326355, + "p95": 3506.752133369446, + "p99": 3520.6719636917114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-560e55e7", + "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|dc27c5e0894e569", + "colorKey": "h100_7f10961a", + "comparisonKey": "6a3a9660e48371b3", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:45:34.307375+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28273218274", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273218274", + "createdAt": "2026-06-27T00:41:54Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 196.79999351501465, + "p90": 203.80799472332, + "p95": 205.79199492931366, + "p99": 214.11199867725372 + }, + "combine": { + "p50": 75.71200281381607, + "p90": 78.5600021481514, + "p95": 80.54400235414505, + "p99": 84.6719965338707 + }, + "roundtrip": { + "p50": 255.64798712730408, + "p90": 264.41600918769836, + "p95": 274.1119861602783, + "p99": 321.9519853591919 + }, + "isolatedSum": { + "p50": 272.5119963288307, + "p90": 282.3679968714714, + "p95": 286.3359972834587, + "p99": 298.7839952111244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 231.26399517059326, + "p90": 269.6639895439148, + "p95": 272.19200134277344, + "p99": 278.01600098609924 + }, + "combine": { + "p50": 100.99200159311295, + "p90": 109.82400178909302, + "p95": 110.81600189208984, + "p99": 113.3119985461235 + }, + "roundtrip": { + "p50": 315.8720135688782, + "p90": 327.39201188087463, + "p95": 355.679988861084, + "p99": 369.53601241111755 + }, + "isolatedSum": { + "p50": 332.2559967637062, + "p90": 379.4879913330078, + "p95": 383.0080032348633, + "p99": 391.32799953222275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 309.59999561309814, + "p90": 352.1279990673065, + "p95": 355.00800609588623, + "p99": 361.1519932746887 + }, + "combine": { + "p50": 147.90399372577667, + "p90": 156.99200332164764, + "p95": 158.24000537395477, + "p99": 162.08000481128693 + }, + "roundtrip": { + "p50": 442.4000084400177, + "p90": 483.3280146121979, + "p95": 487.8399968147278, + "p99": 518.4000134468079 + }, + "isolatedSum": { + "p50": 457.5039893388748, + "p90": 509.12000238895416, + "p95": 513.248011469841, + "p99": 523.2319980859756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 453.98399233818054, + "p90": 459.3600034713745, + "p95": 461.88798546791077, + "p99": 466.623991727829 + }, + "combine": { + "p50": 235.29599606990814, + "p90": 239.00799453258514, + "p95": 240.51199853420258, + "p99": 242.46400594711304 + }, + "roundtrip": { + "p50": 673.3120083808899, + "p90": 678.8480281829834, + "p95": 680.6079745292664, + "p99": 684.544026851654 + }, + "isolatedSum": { + "p50": 689.2799884080887, + "p90": 698.3679980039597, + "p95": 702.3999840021133, + "p99": 709.087997674942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 784.928023815155, + "p90": 799.8719811439514, + "p95": 803.2000064849854, + "p99": 809.0239763259888 + }, + "combine": { + "p50": 405.4720103740692, + "p90": 416.06399416923523, + "p95": 418.3039963245392, + "p99": 422.4959909915924 + }, + "roundtrip": { + "p50": 1170.1120138168335, + "p90": 1179.58402633667, + "p95": 1183.6479902267456, + "p99": 1192.7679777145386 + }, + "isolatedSum": { + "p50": 1190.4000341892242, + "p90": 1215.9359753131866, + "p95": 1221.5040028095245, + "p99": 1231.5199673175812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1483.3279848098755, + "p90": 1490.496039390564, + "p95": 1493.6319589614868, + "p99": 1501.5679597854614 + }, + "combine": { + "p50": 732.2880029678345, + "p90": 738.8160228729248, + "p95": 740.8000230789185, + "p99": 745.9840178489685 + }, + "roundtrip": { + "p50": 2199.039936065674, + "p90": 2209.439992904663, + "p95": 2212.5439643859863, + "p99": 2217.087984085083 + }, + "isolatedSum": { + "p50": 2215.61598777771, + "p90": 2229.3120622634888, + "p95": 2234.4319820404053, + "p99": 2247.55197763443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-de081cfe", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_91aa6e56", + "comparisonKey": "e439d265ee12c9f2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:20.983875+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254323956", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", + "createdAt": "2026-06-26T17:27:01Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 89.59999680519104, + "p90": 93.72799843549728, + "p95": 95.36000341176987, + "p99": 100.832000374794 + }, + "combine": { + "p50": 98.14400225877762, + "p90": 100.60799866914749, + "p95": 102.11200267076492, + "p99": 105.0880029797554 + }, + "roundtrip": { + "p50": 215.13600647449493, + "p90": 218.55999529361725, + "p95": 220.12799978256226, + "p99": 228.06400060653687 + }, + "isolatedSum": { + "p50": 187.74399906396866, + "p90": 194.33599710464478, + "p95": 197.4720060825348, + "p99": 205.9200033545494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 106.175996363163, + "p90": 121.47200107574463, + "p95": 122.52800166606903, + "p99": 125.91999769210815 + }, + "combine": { + "p50": 139.48799669742584, + "p90": 146.17599546909332, + "p95": 147.61599898338318, + "p99": 149.82399344444275 + }, + "roundtrip": { + "p50": 320.92800736427307, + "p90": 336.41600608825684, + "p95": 337.92001008987427, + "p99": 341.2800133228302 + }, + "isolatedSum": { + "p50": 245.66399306058884, + "p90": 267.64799654483795, + "p95": 270.1440006494522, + "p99": 275.7439911365509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 138.46400380134583, + "p90": 182.8799992799759, + "p95": 190.97599387168884, + "p99": 197.28000462055206 + }, + "combine": { + "p50": 208.3200067281723, + "p90": 223.00800681114197, + "p95": 231.83999955654144, + "p99": 242.01600253582 + }, + "roundtrip": { + "p50": 509.69600677490234, + "p90": 521.5680003166199, + "p95": 523.4879851341248, + "p99": 528.9599895477295 + }, + "isolatedSum": { + "p50": 346.7840105295181, + "p90": 405.88800609111786, + "p95": 422.8159934282303, + "p99": 439.29600715637207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 197.4399983882904, + "p90": 210.87999641895294, + "p95": 213.31200003623962, + "p99": 216.2880003452301 + }, + "combine": { + "p50": 325.82399249076843, + "p90": 330.1120102405548, + "p95": 331.6799998283386, + "p99": 335.80800890922546 + }, + "roundtrip": { + "p50": 847.4879860877991, + "p90": 858.0160140991211, + "p95": 861.0879778862, + "p99": 869.2799806594849 + }, + "isolatedSum": { + "p50": 523.2639908790588, + "p90": 540.9920066595078, + "p95": 544.9919998645782, + "p99": 552.0960092544556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 318.65599751472473, + "p90": 335.29600501060486, + "p95": 338.0799889564514, + "p99": 347.29599952697754 + }, + "combine": { + "p50": 559.7760081291199, + "p90": 566.815972328186, + "p95": 569.5040225982666, + "p99": 573.311984539032 + }, + "roundtrip": { + "p50": 1524.0000486373901, + "p90": 1544.0640449523926, + "p95": 1550.7839918136597, + "p99": 1576.7359733581543 + }, + "isolatedSum": { + "p50": 878.4320056438446, + "p90": 902.1119773387909, + "p95": 907.584011554718, + "p99": 920.6079840660095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 574.7519731521606, + "p90": 593.1839942932129, + "p95": 598.1760025024414, + "p99": 604.7999858856201 + }, + "combine": { + "p50": 1025.056004524231, + "p90": 1033.5359573364258, + "p95": 1036.1920595169067, + "p99": 1042.847990989685 + }, + "roundtrip": { + "p50": 2880.863904953003, + "p90": 2894.5279121398926, + "p95": 2899.9040126800537, + "p99": 2908.3518981933594 + }, + "isolatedSum": { + "p50": 1599.8079776763916, + "p90": 1626.7199516296387, + "p95": 1634.3680620193481, + "p99": 1647.6479768753052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8c2a4d2", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_eddc3af6", + "comparisonKey": "fd73340f2af530d5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:48.926445+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254341346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", + "createdAt": "2026-06-26T17:27:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.60000228881836, + "p90": 81.4720019698143, + "p95": 83.52000266313553, + "p99": 102.7199998497963 + }, + "combine": { + "p50": 98.08000177145004, + "p90": 102.01600193977356, + "p95": 115.35999923944473, + "p99": 344.0319895744324 + }, + "roundtrip": { + "p50": 205.1520049571991, + "p90": 208.19200575351715, + "p95": 209.85600352287292, + "p99": 214.9440050125122 + }, + "isolatedSum": { + "p50": 175.6800040602684, + "p90": 183.48800390958786, + "p95": 198.88000190258026, + "p99": 446.75198942422867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 95.74399888515472, + "p90": 110.07999628782272, + "p95": 111.13599687814713, + "p99": 114.81600254774094 + }, + "combine": { + "p50": 141.7279988527298, + "p90": 148.8959938287735, + "p95": 150.4960060119629, + "p99": 153.02400290966034 + }, + "roundtrip": { + "p50": 311.45599484443665, + "p90": 319.5840120315552, + "p95": 321.696013212204, + "p99": 324.67201352119446 + }, + "isolatedSum": { + "p50": 237.47199773788452, + "p90": 258.9759901165962, + "p95": 261.63200289011, + "p99": 267.8400054574013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 129.56799566745758, + "p90": 144.57599818706512, + "p95": 146.14400267601013, + "p99": 148.8959938287735 + }, + "combine": { + "p50": 213.4079933166504, + "p90": 218.36799383163452, + "p95": 219.7760045528412, + "p99": 224.2240011692047 + }, + "roundtrip": { + "p50": 500.70399045944214, + "p90": 508.1599950790405, + "p95": 510.81597805023193, + "p99": 514.8159861564636 + }, + "isolatedSum": { + "p50": 342.97598898410797, + "p90": 362.94399201869965, + "p95": 365.9200072288513, + "p99": 373.1199949979782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 186.49600446224213, + "p90": 196.0960030555725, + "p95": 197.50399887561798, + "p99": 202.55999267101288 + }, + "combine": { + "p50": 327.7760148048401, + "p90": 333.18400382995605, + "p95": 334.3679904937744, + "p99": 337.72799372673035 + }, + "roundtrip": { + "p50": 835.2640271186829, + "p90": 841.69602394104, + "p95": 844.0639972686768, + "p99": 848.2879996299744 + }, + "isolatedSum": { + "p50": 514.2720192670822, + "p90": 529.2800068855286, + "p95": 531.8719893693924, + "p99": 540.2879863977432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 306.62399530410767, + "p90": 320.19200921058655, + "p95": 322.7519989013672, + "p99": 327.1679878234863 + }, + "combine": { + "p50": 559.6479773521423, + "p90": 567.296028137207, + "p95": 570.1119899749756, + "p99": 574.5919942855835 + }, + "roundtrip": { + "p50": 1509.6960067749023, + "p90": 1522.7199792861938, + "p95": 1525.6320238113403, + "p99": 1585.9839916229248 + }, + "isolatedSum": { + "p50": 866.27197265625, + "p90": 887.4880373477936, + "p95": 892.8639888763428, + "p99": 901.7599821090698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 539.8719906806946, + "p90": 550.7839918136597, + "p95": 555.7119846343994, + "p99": 564.7040009498596 + }, + "combine": { + "p50": 1024.9279737472534, + "p90": 1034.3040227890015, + "p95": 1037.11998462677, + "p99": 1047.0720529556274 + }, + "roundtrip": { + "p50": 2850.719928741455, + "p90": 2861.407995223999, + "p95": 2864.9280071258545, + "p99": 2870.176076889038 + }, + "isolatedSum": { + "p50": 1564.799964427948, + "p90": 1585.0880146026611, + "p95": 1592.8319692611694, + "p99": 1611.776053905487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f6d2d196", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ec72792b", + "comparisonKey": "39b4bc74c45641cb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:48:09.793091+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271576503", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271576503", + "createdAt": "2026-06-26T23:47:05Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 76.73600316047668, + "p90": 80.19199967384338, + "p95": 82.17599987983704, + "p99": 85.4720026254654 + }, + "combine": { + "p50": 98.68799895048141, + "p90": 100.8640006184578, + "p95": 102.84800082445145, + "p99": 113.27999830245972 + }, + "roundtrip": { + "p50": 204.25599813461304, + "p90": 206.84799551963806, + "p95": 208.0959975719452, + "p99": 211.32799983024597 + }, + "isolatedSum": { + "p50": 175.4240021109581, + "p90": 181.05600029230118, + "p95": 185.02400070428848, + "p99": 198.7520009279251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 95.551997423172, + "p90": 98.65599870681763, + "p95": 100.44799745082855, + "p99": 104.63999956846237 + }, + "combine": { + "p50": 143.51999759674072, + "p90": 146.04799449443817, + "p95": 147.2640037536621, + "p99": 150.07999539375305 + }, + "roundtrip": { + "p50": 317.05600023269653, + "p90": 320.67200541496277, + "p95": 322.07998633384705, + "p99": 325.56799054145813 + }, + "isolatedSum": { + "p50": 239.07199501991272, + "p90": 244.7039932012558, + "p95": 247.71200120449066, + "p99": 254.71999496221542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 132.4159950017929, + "p90": 136.09600067138672, + "p95": 137.40800321102142, + "p99": 140.19200205802917 + }, + "combine": { + "p50": 224.16000068187714, + "p90": 228.2239943742752, + "p95": 229.312002658844, + "p99": 232.03200101852417 + }, + "roundtrip": { + "p50": 517.5039768218994, + "p90": 522.5920081138611, + "p95": 523.8400101661682, + "p99": 534.1759920120239 + }, + "isolatedSum": { + "p50": 356.57599568367004, + "p90": 364.3199950456619, + "p95": 366.7200058698654, + "p99": 372.22400307655334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 203.5199999809265, + "p90": 207.39200711250305, + "p95": 208.95999670028687, + "p99": 213.1199985742569 + }, + "combine": { + "p50": 359.0719997882843, + "p90": 364.25599455833435, + "p95": 365.4080033302307, + "p99": 367.35999584198 + }, + "roundtrip": { + "p50": 883.679986000061, + "p90": 889.6960020065308, + "p95": 891.5839791297913, + "p99": 897.7599740028381 + }, + "isolatedSum": { + "p50": 562.5919997692108, + "p90": 571.6480016708374, + "p95": 574.3680000305176, + "p99": 580.4799944162369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 341.0240113735199, + "p90": 352.9280126094818, + "p95": 354.7840118408203, + "p99": 361.31200194358826 + }, + "combine": { + "p50": 631.2000155448914, + "p90": 639.136016368866, + "p95": 641.5359973907471, + "p99": 644.1599726676941 + }, + "roundtrip": { + "p50": 1616.5440082550049, + "p90": 1624.9920129776, + "p95": 1627.3599863052368, + "p99": 1631.9680213928223 + }, + "isolatedSum": { + "p50": 972.2240269184113, + "p90": 992.0640289783478, + "p95": 996.3200092315674, + "p99": 1005.4719746112823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 611.6160154342651, + "p90": 621.0240125656128, + "p95": 624.0959763526917, + "p99": 790.3040051460266 + }, + "combine": { + "p50": 1165.503978729248, + "p90": 1175.487995147705, + "p95": 1177.664041519165, + "p99": 1188.9280080795288 + }, + "roundtrip": { + "p50": 3078.4640312194824, + "p90": 3095.8399772644043, + "p95": 3103.071928024292, + "p99": 3115.9679889678955 + }, + "isolatedSum": { + "p50": 1777.1199941635132, + "p90": 1796.5120077133179, + "p95": 1801.7600178718567, + "p99": 1979.2320132255554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ab8f0534", + "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "40ee6d196d286895", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:38.574880+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271743900", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271743900", + "createdAt": "2026-06-26T23:52:15Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.64000111818314, + "p90": 89.56799656152725, + "p95": 96.41599655151367, + "p99": 126.36800110340118 + }, + "combine": { + "p50": 58.04799869656563, + "p90": 69.60000097751617, + "p95": 74.52800124883652, + "p99": 91.80799871683121 + }, + "roundtrip": { + "p50": 112.73600161075592, + "p90": 135.93600690364838, + "p95": 145.7280069589615, + "p99": 215.26400744915009 + }, + "isolatedSum": { + "p50": 126.68799981474876, + "p90": 159.16799753904343, + "p95": 170.9439978003502, + "p99": 218.1759998202324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.34399902820587, + "p90": 88.22400122880936, + "p95": 94.68799829483032, + "p99": 116.15999788045883 + }, + "combine": { + "p50": 58.94400179386139, + "p90": 68.70400160551071, + "p95": 72.03199714422226, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 112.89600282907486, + "p90": 138.3039951324463, + "p95": 150.52799880504608, + "p99": 196.51199877262115 + }, + "isolatedSum": { + "p50": 128.28800082206726, + "p90": 156.92800283432007, + "p95": 166.71999543905258, + "p99": 199.68000054359436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.46400010585785, + "p90": 84.63999629020691, + "p95": 92.0960009098053, + "p99": 110.78400164842606 + }, + "combine": { + "p50": 60.28800085186958, + "p90": 70.91200351715088, + "p95": 75.16799867153168, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 114.20799791812897, + "p90": 135.68000495433807, + "p95": 147.64800667762756, + "p99": 195.5520063638687 + }, + "isolatedSum": { + "p50": 130.75200095772743, + "p90": 155.5519998073578, + "p95": 167.26399958133698, + "p99": 198.36799800395966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.38399982452393, + "p90": 103.71199995279312, + "p95": 118.72000247240067, + "p99": 215.61600267887115 + }, + "combine": { + "p50": 61.055999249219894, + "p90": 76.03199779987335, + "p95": 81.7599967122078, + "p99": 112.57600039243698 + }, + "roundtrip": { + "p50": 115.84000289440155, + "p90": 143.51999759674072, + "p95": 151.67999267578125, + "p99": 190.46400487422943 + }, + "isolatedSum": { + "p50": 133.43999907374382, + "p90": 179.74399775266647, + "p95": 200.47999918460846, + "p99": 328.19200307130814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.23199915885925, + "p90": 102.04800218343735, + "p95": 107.87200182676315, + "p99": 130.20800054073334 + }, + "combine": { + "p50": 61.792001128196716, + "p90": 71.16799801588058, + "p95": 76.64000242948532, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 116.92799627780914, + "p90": 138.2399946451187, + "p95": 147.96799421310425, + "p99": 179.967999458313 + }, + "isolatedSum": { + "p50": 137.02400028705597, + "p90": 173.21600019931793, + "p95": 184.51200425624847, + "p99": 217.0559987425804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.75200295448303, + "p90": 90.52799642086029, + "p95": 98.75199943780899, + "p99": 135.48800349235535 + }, + "combine": { + "p50": 63.74400109052658, + "p90": 71.71200215816498, + "p95": 78.78399640321732, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 119.9679970741272, + "p90": 145.47200500965118, + "p95": 149.50400590896606, + "p99": 165.8879965543747 + }, + "isolatedSum": { + "p50": 138.4960040450096, + "p90": 162.23999857902527, + "p95": 177.5359958410263, + "p99": 226.56000405550003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 84.60800349712372, + "p90": 103.13600301742554, + "p95": 112.22399771213531, + "p99": 138.11199367046356 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 82.78399705886841, + "p95": 89.56799656152725, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 131.48799538612366, + "p90": 145.50399780273438, + "p95": 155.8080017566681, + "p99": 189.66400623321533 + }, + "isolatedSum": { + "p50": 156.64000064134598, + "p90": 185.92000007629395, + "p95": 201.79199427366257, + "p99": 243.03999543190002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.2080020904541, + "p90": 120.2239990234375, + "p95": 133.82400572299957, + "p99": 215.68000316619873 + }, + "combine": { + "p50": 82.8159973025322, + "p90": 92.70399808883667, + "p95": 96.12800180912018, + "p99": 107.04000294208527 + }, + "roundtrip": { + "p50": 152.22400426864624, + "p90": 168.32000017166138, + "p95": 176.2239933013916, + "p99": 196.03200256824493 + }, + "isolatedSum": { + "p50": 177.0239993929863, + "p90": 212.92799711227417, + "p95": 229.95200753211975, + "p99": 322.720006108284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d690e39", + "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "540c08b08c068f8c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:06.885074+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271759919", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271759919", + "createdAt": "2026-06-26T23:52:42Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.49600034952164, + "p90": 102.1760031580925, + "p95": 111.90400272607803, + "p99": 133.34399461746216 + }, + "combine": { + "p50": 60.5119988322258, + "p90": 72.9919970035553, + "p95": 79.55200225114822, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 113.8560026884079, + "p90": 143.5839980840683, + "p95": 150.94399452209473, + "p99": 190.14400243759155 + }, + "isolatedSum": { + "p50": 131.00799918174744, + "p90": 175.1680001616478, + "p95": 191.45600497722626, + "p99": 223.90399128198624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.88799637556076, + "p90": 109.43999886512756, + "p95": 123.74400347471237, + "p99": 176.2239933013916 + }, + "combine": { + "p50": 62.463998794555664, + "p90": 76.4480009675026, + "p95": 81.37600123882294, + "p99": 89.6959975361824 + }, + "roundtrip": { + "p50": 118.40000003576279, + "p90": 146.7839926481247, + "p95": 154.88000214099884, + "p99": 198.0160027742386 + }, + "isolatedSum": { + "p50": 136.35199517011642, + "p90": 185.88799983263016, + "p95": 205.1200047135353, + "p99": 265.919990837574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.12799787521362, + "p90": 100.8640006184578, + "p95": 107.84000158309937, + "p99": 182.5920045375824 + }, + "combine": { + "p50": 62.24000081419945, + "p90": 77.504001557827, + "p95": 82.36800134181976, + "p99": 100.22400319576263 + }, + "roundtrip": { + "p50": 116.64000153541565, + "p90": 148.3840048313141, + "p95": 158.49600732326508, + "p99": 193.34399700164795 + }, + "isolatedSum": { + "p50": 134.36799868941307, + "p90": 178.3680021762848, + "p95": 190.20800292491913, + "p99": 282.81600773334503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.60800153017044, + "p90": 101.1200025677681, + "p95": 114.1119971871376, + "p99": 128.06400656700134 + }, + "combine": { + "p50": 63.74400109052658, + "p90": 79.26400005817413, + "p95": 85.50400286912918, + "p99": 120.03199756145477 + }, + "roundtrip": { + "p50": 117.53600090742111, + "p90": 147.74399995803833, + "p95": 156.8319946527481, + "p99": 184.54399704933167 + }, + "isolatedSum": { + "p50": 136.35200262069702, + "p90": 180.38400262594223, + "p95": 199.61600005626678, + "p99": 248.09600412845612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.9039968252182, + "p90": 101.79200023412704, + "p95": 111.77600175142288, + "p99": 127.9039978981018 + }, + "combine": { + "p50": 64.41599875688553, + "p90": 79.68000322580338, + "p95": 84.06399935483932, + "p99": 103.61599922180176 + }, + "roundtrip": { + "p50": 124.09599870443344, + "p90": 154.91199493408203, + "p95": 167.35999286174774, + "p99": 218.6560034751892 + }, + "isolatedSum": { + "p50": 140.31999558210373, + "p90": 181.47200345993042, + "p95": 195.8400011062622, + "p99": 231.51999711990356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.63200253248215, + "p90": 102.08000242710114, + "p95": 110.1439967751503, + "p99": 138.5280042886734 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 83.45600217580795, + "p95": 89.50400352478027, + "p99": 97.82399982213974 + }, + "roundtrip": { + "p50": 122.81599640846252, + "p90": 153.50399911403656, + "p95": 163.13600540161133, + "p99": 190.5599981546402 + }, + "isolatedSum": { + "p50": 146.11200243234634, + "p90": 185.5360046029091, + "p95": 199.64800029993057, + "p99": 236.35200411081314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.90399944782257, + "p90": 113.08799684047699, + "p95": 123.52000176906586, + "p99": 162.9759967327118 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 91.13600105047226, + "p95": 97.59999811649323, + "p99": 112.06399649381638 + }, + "roundtrip": { + "p50": 140.47999680042267, + "p90": 166.75199568271637, + "p95": 175.9359985589981, + "p99": 250.20799040794373 + }, + "isolatedSum": { + "p50": 169.0559983253479, + "p90": 204.22399789094925, + "p95": 221.11999988555908, + "p99": 275.03999322652817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 99.07200187444687, + "p90": 122.27199971675873, + "p95": 127.42400169372559, + "p99": 146.7519998550415 + }, + "combine": { + "p50": 90.87999910116196, + "p90": 105.3759977221489, + "p95": 109.37599837779999, + "p99": 125.37600100040436 + }, + "roundtrip": { + "p50": 166.4319932460785, + "p90": 186.5919977426529, + "p95": 193.12000274658203, + "p99": 222.01600670814514 + }, + "isolatedSum": { + "p50": 189.95200097560883, + "p90": 227.64799743890762, + "p95": 236.80000007152557, + "p99": 272.12800085544586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e3311b84", + "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "fc31c0a33afa32cc", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:56.726240+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271775418", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271775418", + "createdAt": "2026-06-26T23:53:10Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.05599749088287, + "p90": 102.7199998497963, + "p95": 111.35999858379364, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 65.92000275850296, + "p90": 79.77599650621414, + "p95": 88.44800293445587, + "p99": 126.30400061607361 + }, + "roundtrip": { + "p50": 118.78400295972824, + "p90": 148.28799664974213, + "p95": 155.8080017566681, + "p99": 184.64000523090363 + }, + "isolatedSum": { + "p50": 138.97600024938583, + "p90": 182.49599635601044, + "p95": 199.8080015182495, + "p99": 249.31199848651886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.5920017361641, + "p90": 107.07200318574905, + "p95": 120.51200121641159, + "p99": 142.87999272346497 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 84.73599702119827, + "p95": 92.12800115346909, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 120.38400024175644, + "p90": 157.18400478363037, + "p95": 169.24799978733063, + "p99": 195.68000733852386 + }, + "isolatedSum": { + "p50": 141.63199812173843, + "p90": 191.80800020694733, + "p95": 212.64000236988068, + "p99": 256.9599896669388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.95999675989151, + "p90": 101.05600208044052, + "p95": 114.68800157308578, + "p99": 137.472003698349 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 79.23199981451035, + "p95": 84.06399935483932, + "p99": 93.50399672985077 + }, + "roundtrip": { + "p50": 120.99199742078781, + "p90": 154.81600165367126, + "p95": 165.95199704170227, + "p99": 220.41599452495575 + }, + "isolatedSum": { + "p50": 139.1039937734604, + "p90": 180.28800189495087, + "p95": 198.7520009279251, + "p99": 230.97600042819977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.08799773454666, + "p90": 102.78400033712387, + "p95": 110.88000237941742, + "p99": 142.17600226402283 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 83.29600095748901, + "p95": 89.31200206279755, + "p99": 102.30399668216705 + }, + "roundtrip": { + "p50": 120.95999717712402, + "p90": 156.73600137233734, + "p95": 165.56799411773682, + "p99": 189.43999707698822 + }, + "isolatedSum": { + "p50": 140.99200069904327, + "p90": 186.08000129461288, + "p95": 200.19200444221497, + "p99": 244.47999894618988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.81600344181061, + "p90": 102.65599936246872, + "p95": 110.68800091743469, + "p99": 122.49600142240524 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 86.14400029182434, + "p95": 90.4960036277771, + "p99": 105.95200210809708 + }, + "roundtrip": { + "p50": 121.44000083208084, + "p90": 152.25599706172943, + "p95": 161.40800714492798, + "p99": 200.9280025959015 + }, + "isolatedSum": { + "p50": 143.0400013923645, + "p90": 188.79999965429306, + "p95": 201.1840045452118, + "p99": 228.44800353050232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.49600231647491, + "p90": 114.01599645614624, + "p95": 123.74400347471237, + "p99": 148.3519971370697 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 88.60799670219421, + "p95": 94.11200135946274, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 128.54400277137756, + "p90": 162.33600676059723, + "p95": 178.20799350738525, + "p99": 222.30400145053864 + }, + "isolatedSum": { + "p50": 156.64000064134598, + "p90": 202.62399315834045, + "p95": 217.8560048341751, + "p99": 255.16799837350845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.36800330877304, + "p90": 133.02400708198547, + "p95": 138.49599659442902, + "p99": 182.20800161361694 + }, + "combine": { + "p50": 81.44000172615051, + "p90": 95.42399644851685, + "p95": 100.5759984254837, + "p99": 123.74400347471237 + }, + "roundtrip": { + "p50": 151.2320041656494, + "p90": 172.03199863433838, + "p95": 182.17599391937256, + "p99": 404.1599929332733 + }, + "isolatedSum": { + "p50": 175.80800503492355, + "p90": 228.44800353050232, + "p95": 239.07199501991272, + "p99": 305.9520050883293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.0880036354065, + "p90": 134.68800485134125, + "p95": 142.752006649971, + "p99": 173.3119934797287 + }, + "combine": { + "p50": 97.43999689817429, + "p90": 114.97599631547928, + "p95": 121.08799815177917, + "p99": 138.75199854373932 + }, + "roundtrip": { + "p50": 180.1919937133789, + "p90": 205.56800067424774, + "p95": 210.07999777793884, + "p99": 237.7600073814392 + }, + "isolatedSum": { + "p50": 206.52800053358078, + "p90": 249.66400116682053, + "p95": 263.8400048017502, + "p99": 312.063992023468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a3bb3bd5", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "1e550a8055ce0039", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:16.783949+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272139795", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272139795", + "createdAt": "2026-06-27T00:04:50Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 82.65600353479385, + "p90": 133.59999656677246, + "p95": 142.59199798107147, + "p99": 158.4320068359375 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 99.61599856615067, + "p95": 103.84000092744827, + "p99": 158.1760048866272 + }, + "roundtrip": { + "p50": 128.35200130939484, + "p90": 157.21599757671356, + "p95": 169.63200271129608, + "p99": 325.6959915161133 + }, + "isolatedSum": { + "p50": 159.04000401496887, + "p90": 233.21599513292313, + "p95": 246.43199890851974, + "p99": 316.6080117225647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.60000163316727, + "p90": 94.81599926948547, + "p95": 101.82400047779083, + "p99": 127.32799351215363 + }, + "combine": { + "p50": 70.23999840021133, + "p90": 99.16800260543823, + "p95": 101.34399682283401, + "p99": 121.34400010108948 + }, + "roundtrip": { + "p50": 130.5599957704544, + "p90": 186.46399676799774, + "p95": 191.3280040025711, + "p99": 227.48799622058868 + }, + "isolatedSum": { + "p50": 143.8400000333786, + "p90": 193.9840018749237, + "p95": 203.16799730062485, + "p99": 248.6719936132431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.70399677753448, + "p90": 100.44799745082855, + "p95": 106.55999928712845, + "p99": 121.18399888277054 + }, + "combine": { + "p50": 77.47200131416321, + "p90": 89.47200328111649, + "p95": 95.32800316810608, + "p99": 106.1440035700798 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 158.49600732326508, + "p95": 167.42399334907532, + "p99": 188.54400515556335 + }, + "isolatedSum": { + "p50": 162.1759980916977, + "p90": 189.92000073194504, + "p95": 201.88800245523453, + "p99": 227.32800245285034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.69600212574005, + "p90": 133.15199315547943, + "p95": 140.25600254535675, + "p99": 154.7199934720993 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 123.55200201272964, + "p95": 129.50399518013, + "p99": 141.85599982738495 + }, + "roundtrip": { + "p50": 196.83200120925903, + "p90": 213.69600296020508, + "p95": 222.04799950122833, + "p99": 265.8880054950714 + }, + "isolatedSum": { + "p50": 223.52000325918198, + "p90": 256.7039951682091, + "p95": 269.75999772548676, + "p99": 296.57599329948425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0688a10c", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||2baace5eca64609", + "colorKey": "h200_d982b749", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:47:12.411729+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2baace5eca64609", + "workloadId": "set:2:07d544ac2af401ec", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271536417", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271536417", + "createdAt": "2026-06-26T23:45:51Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.14399832487106, + "p90": 99.20000284910202, + "p95": 104.86400127410889, + "p99": 119.13599818944931 + }, + "combine": { + "p50": 70.0799971818924, + "p90": 87.13600039482117, + "p95": 91.839998960495, + "p99": 107.87200182676315 + }, + "roundtrip": { + "p50": 126.94400548934937, + "p90": 155.5519998073578, + "p95": 165.95199704170227, + "p99": 190.40000438690186 + }, + "isolatedSum": { + "p50": 144.22399550676346, + "p90": 186.3360032439232, + "p95": 196.70400023460388, + "p99": 227.00800001621246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.51199859380722, + "p90": 139.48799669742584, + "p95": 145.56799829006195, + "p99": 155.10399639606476 + }, + "combine": { + "p50": 87.45600283145905, + "p90": 101.40799731016159, + "p95": 109.24799740314484, + "p99": 129.2479932308197 + }, + "roundtrip": { + "p50": 163.35999965667725, + "p90": 187.8719925880432, + "p95": 195.10400295257568, + "p99": 370.5599904060364 + }, + "isolatedSum": { + "p50": 191.96800142526627, + "p90": 240.89599400758743, + "p95": 254.8159956932068, + "p99": 284.35198962688446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d576fec7", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:04.752374+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271823274", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271823274", + "createdAt": "2026-06-26T23:54:45Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.7600028514862, + "p90": 85.53600311279297, + "p95": 96.28800302743912, + "p99": 124.67200309038162 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 78.59200239181519, + "p95": 84.927998483181, + "p99": 101.3759970664978 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 147.32800424098969, + "p95": 163.58399391174316, + "p99": 201.88799500465393 + }, + "isolatedSum": { + "p50": 142.14400202035904, + "p90": 164.12800550460815, + "p95": 181.21600151062012, + "p99": 226.04800015687943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.49600100517273, + "p90": 89.15200084447861, + "p95": 96.92800045013428, + "p99": 135.6479972600937 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 78.14399898052216, + "p95": 83.74399691820145, + "p99": 94.94400024414062 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 143.77599954605103, + "p95": 155.2319973707199, + "p99": 184.38400328159332 + }, + "isolatedSum": { + "p50": 144.6079984307289, + "p90": 167.29599982500076, + "p95": 180.67199736833572, + "p99": 230.5919975042343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.60000360012054, + "p90": 97.21600264310837, + "p95": 104.2879968881607, + "p99": 122.81599640846252 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 89.79199826717377, + "p95": 95.2640026807785, + "p99": 107.19999670982361 + }, + "roundtrip": { + "p50": 136.19199395179749, + "p90": 152.6080071926117, + "p95": 160.67199409008026, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 164.000004529953, + "p90": 187.00800091028214, + "p95": 199.5519995689392, + "p99": 230.01599311828613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.73600226640701, + "p90": 128.48000228405, + "p95": 133.12000036239624, + "p99": 152.92799472808838 + }, + "combine": { + "p50": 106.04800283908844, + "p90": 113.76000195741653, + "p95": 119.45600062608719, + "p99": 131.8719983100891 + }, + "roundtrip": { + "p50": 197.91999459266663, + "p90": 210.62399446964264, + "p95": 217.6000028848648, + "p99": 242.01600253582 + }, + "isolatedSum": { + "p50": 222.78400510549545, + "p90": 242.24000424146652, + "p95": 252.57600098848343, + "p99": 284.7999930381775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ca51f4f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d982b749", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:18.590174+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271601584", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271601584", + "createdAt": "2026-06-26T23:47:53Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 105.0880029797554, + "p90": 132.7040046453476, + "p95": 145.21600306034088, + "p99": 190.11199474334717 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 93.37600320577621, + "p95": 98.01600128412247, + "p99": 108.51199924945831 + }, + "roundtrip": { + "p50": 123.45600128173828, + "p90": 180.60800433158875, + "p95": 190.7840073108673, + "p99": 233.2800030708313 + }, + "isolatedSum": { + "p50": 176.4800027012825, + "p90": 226.0800078511238, + "p95": 243.23200434446335, + "p99": 298.6239939928055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 87.10400015115738, + "p90": 130.0799995660782, + "p95": 139.96799290180206, + "p99": 167.1999990940094 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 97.50399738550186, + "p95": 105.31199723482132, + "p99": 143.61600577831268 + }, + "roundtrip": { + "p50": 144.83200013637543, + "p90": 179.1040003299713, + "p95": 191.96799397468567, + "p99": 229.5680046081543 + }, + "isolatedSum": { + "p50": 162.6880019903183, + "p90": 227.58399695158005, + "p95": 245.27999013662338, + "p99": 310.8160048723221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.06399738788605, + "p90": 86.14400029182434, + "p95": 95.51999717950821, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 80.06399869918823, + "p95": 85.66399663686752, + "p99": 102.52799838781357 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 146.43199741840363, + "p95": 154.7199934720993, + "p99": 173.47200214862823 + }, + "isolatedSum": { + "p50": 140.73599874973297, + "p90": 166.20799899101257, + "p95": 181.18399381637573, + "p99": 214.4000008702278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.9919970035553, + "p90": 93.12000125646591, + "p95": 103.2319962978363, + "p99": 120.7360029220581 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 82.07999914884567, + "p95": 88.41600269079208, + "p99": 100.67199915647507 + }, + "roundtrip": { + "p50": 124.1919994354248, + "p90": 152.8639942407608, + "p95": 164.09599781036377, + "p99": 197.85599410533905 + }, + "isolatedSum": { + "p50": 142.2399953007698, + "p90": 175.20000040531158, + "p95": 191.6479989886284, + "p99": 221.40800207853317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 69.72800195217133, + "p90": 91.36000275611877, + "p95": 105.66399991512299, + "p99": 141.56800508499146 + }, + "combine": { + "p50": 70.592001080513, + "p90": 82.04799890518188, + "p95": 87.3280018568039, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 151.32799744606018, + "p95": 162.23999857902527, + "p99": 186.46399676799774 + }, + "isolatedSum": { + "p50": 140.32000303268433, + "p90": 173.40800166130066, + "p95": 192.99200177192688, + "p99": 241.02400243282318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.85599744319916, + "p90": 108.89600217342377, + "p95": 117.95199662446976, + "p99": 130.5599957704544 + }, + "combine": { + "p50": 77.56800204515457, + "p90": 96.25600278377533, + "p95": 99.7759997844696, + "p99": 110.43199896812439 + }, + "roundtrip": { + "p50": 136.19199395179749, + "p90": 168.19199919700623, + "p95": 180.25599420070648, + "p99": 210.01599729061127 + }, + "isolatedSum": { + "p50": 159.42399948835373, + "p90": 205.1520049571991, + "p95": 217.72799640893936, + "p99": 240.9919947385788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.81599926948547, + "p90": 107.16799646615982, + "p95": 116.99199676513672, + "p99": 140.6719982624054 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 97.79199957847595, + "p95": 106.04800283908844, + "p99": 131.04000687599182 + }, + "roundtrip": { + "p50": 156.5759927034378, + "p90": 172.19200730323792, + "p95": 179.00800704956055, + "p99": 190.49599766731262 + }, + "isolatedSum": { + "p50": 180.57599663734436, + "p90": 204.95999604463577, + "p95": 223.03999960422516, + "p99": 271.7120051383972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.90400338172913, + "p90": 139.23199474811554, + "p95": 145.47200500965118, + "p99": 182.65600502490997 + }, + "combine": { + "p50": 103.84000092744827, + "p90": 120.25599926710129, + "p95": 126.56000256538391, + "p99": 146.68799936771393 + }, + "roundtrip": { + "p50": 196.19199633598328, + "p90": 217.15199947357178, + "p95": 223.68000447750092, + "p99": 249.2160052061081 + }, + "isolatedSum": { + "p50": 219.7440043091774, + "p90": 259.4879940152168, + "p95": 272.0320075750351, + "p99": 329.3440043926239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b7604172", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", + "colorKey": "h200_d982b749", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:14:07.082435+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": "set:3:07d544ac2af401ec", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272379468", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272379468", + "createdAt": "2026-06-27T00:12:44Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.86400347948074, + "p90": 82.24000036716461, + "p95": 88.73599767684937, + "p99": 117.66400188207626 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 75.39200037717819, + "p95": 80.6720033288002, + "p99": 96.0640013217926 + }, + "roundtrip": { + "p50": 123.90399724245071, + "p90": 139.74399864673615, + "p95": 148.47999811172485, + "p99": 178.75200510025024 + }, + "isolatedSum": { + "p50": 142.88000017404556, + "p90": 157.6320007443428, + "p95": 169.40800100564957, + "p99": 213.72800320386887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.66400343179703, + "p90": 93.53599697351456, + "p95": 100.70399940013885, + "p99": 128.09599936008453 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 84.51200276613235, + "p95": 89.02399986982346, + "p99": 123.6800029873848 + }, + "roundtrip": { + "p50": 135.13599336147308, + "p90": 146.7200070619583, + "p95": 153.9199948310852, + "p99": 176.89600586891174 + }, + "isolatedSum": { + "p50": 159.7760021686554, + "p90": 178.0479997396469, + "p95": 189.7279992699623, + "p99": 251.77600234746933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.63200163841248, + "p90": 127.3919939994812, + "p95": 134.0479999780655, + "p99": 154.94400262832642 + }, + "combine": { + "p50": 104.67199981212616, + "p90": 115.42399972677231, + "p95": 121.98399752378464, + "p99": 159.93599593639374 + }, + "roundtrip": { + "p50": 196.25599682331085, + "p90": 206.08000457286835, + "p95": 214.08000588417053, + "p99": 245.27999758720398 + }, + "isolatedSum": { + "p50": 222.30400145053864, + "p90": 242.8159937262535, + "p95": 256.0319975018501, + "p99": 314.87999856472015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-875c4f49", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "h200_d982b749", + "comparisonKey": "c8b8b28ca3d145bb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:54:14.463003+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28273509838", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273509838", + "createdAt": "2026-06-27T00:52:52Z", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.3919997215271, + "p90": 87.5839963555336, + "p95": 96.3520035147667, + "p99": 139.55199718475342 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 79.55200225114822, + "p95": 84.95999872684479, + "p99": 111.32799834012985 + }, + "roundtrip": { + "p50": 119.55200135707855, + "p90": 147.20000326633453, + "p95": 157.18400478363037, + "p99": 204.6079933643341 + }, + "isolatedSum": { + "p50": 139.48799669742584, + "p90": 167.13599860668182, + "p95": 181.31200224161148, + "p99": 250.87999552488327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.9919970035553, + "p90": 94.36800330877304, + "p95": 103.13600301742554, + "p99": 130.68799674510956 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 80.9599980711937, + "p95": 88.19200098514557, + "p99": 105.15200346708298 + }, + "roundtrip": { + "p50": 121.5360015630722, + "p90": 147.16799557209015, + "p95": 157.98400342464447, + "p99": 185.92000007629395 + }, + "isolatedSum": { + "p50": 141.11999422311783, + "p90": 175.32800137996674, + "p95": 191.3280040025711, + "p99": 235.84000021219254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.02399724721909, + "p90": 95.48799693584442, + "p95": 104.86400127410889, + "p99": 133.08799266815186 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 80.57600259780884, + "p95": 86.30400151014328, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 120.12799829244614, + "p90": 145.56799829006195, + "p95": 155.64799308776855, + "p99": 182.68799781799316 + }, + "isolatedSum": { + "p50": 141.79199934005737, + "p90": 176.06399953365326, + "p95": 191.16800278425217, + "p99": 239.00799453258514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.66400212049484, + "p90": 93.75999867916107, + "p95": 98.65599870681763, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 80.54400235414505, + "p95": 82.49600231647491, + "p99": 91.77599847316742 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 145.9839940071106, + "p95": 156.99200332164764, + "p99": 216.35200083255768 + }, + "isolatedSum": { + "p50": 142.43200421333313, + "p90": 174.30400103330612, + "p95": 181.15200102329254, + "p99": 204.95999604463577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.91999661922455, + "p90": 96.67199850082397, + "p95": 103.2319962978363, + "p99": 125.34399330615997 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 84.03199911117554, + "p95": 89.59999680519104, + "p99": 103.87200117111206 + }, + "roundtrip": { + "p50": 123.9359974861145, + "p90": 155.8080017566681, + "p95": 170.49600183963776, + "p99": 205.6960016489029 + }, + "isolatedSum": { + "p50": 144.6719989180565, + "p90": 180.7039976119995, + "p95": 192.83199310302734, + "p99": 229.21599447727203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19b41153", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "fb9666d12f9a34f8", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:55.021886+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272132556", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272132556", + "createdAt": "2026-06-27T00:04:36Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.31999933719635, + "p90": 95.0080007314682, + "p95": 99.93600100278854, + "p99": 117.69600212574005 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 79.55200225114822, + "p95": 85.79199761152267, + "p99": 114.04799669981003 + }, + "roundtrip": { + "p50": 120.70400267839432, + "p90": 148.60799908638, + "p95": 156.54399991035461, + "p99": 199.0399956703186 + }, + "isolatedSum": { + "p50": 140.32000303268433, + "p90": 174.56000298261642, + "p95": 185.72799861431122, + "p99": 231.74399882555008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.02399724721909, + "p90": 95.87199985980988, + "p95": 102.91200131177902, + "p99": 124.35200065374374 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 82.75199681520462, + "p95": 89.53599631786346, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 123.10399860143661, + "p90": 151.39199793338776, + "p95": 160.19199788570404, + "p99": 189.69599902629852 + }, + "isolatedSum": { + "p50": 141.695998609066, + "p90": 178.6239966750145, + "p95": 192.4479976296425, + "p99": 237.31200397014618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 87.52000331878662, + "p90": 135.23200154304504, + "p95": 142.04800128936768, + "p99": 161.21600568294525 + }, + "combine": { + "p50": 77.504001557827, + "p90": 92.38400310277939, + "p95": 97.120001912117, + "p99": 111.77600175142288 + }, + "roundtrip": { + "p50": 135.77599823474884, + "p90": 158.81599485874176, + "p95": 168.92799735069275, + "p99": 212.67199516296387 + }, + "isolatedSum": { + "p50": 165.02400487661362, + "p90": 227.61600464582443, + "p95": 239.16800320148468, + "p99": 272.99200743436813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.67200177907944, + "p90": 136.83199882507324, + "p95": 145.79200744628906, + "p99": 161.6320013999939 + }, + "combine": { + "p50": 105.76000064611435, + "p90": 121.63200229406357, + "p95": 128.06400656700134, + "p99": 140.60799777507782 + }, + "roundtrip": { + "p50": 195.93599438667297, + "p90": 217.3759937286377, + "p95": 223.4240025281906, + "p99": 252.9279887676239 + }, + "isolatedSum": { + "p50": 222.4320024251938, + "p90": 258.4640011191368, + "p95": 273.8560140132904, + "p99": 302.2399991750717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6b3584db", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "0dade16dc8be5c94", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:19.346761+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "wide-dynamic-range", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272136313", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272136313", + "createdAt": "2026-06-27T00:04:43Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.13599842786789, + "p90": 99.84000027179718, + "p95": 112.38399893045425, + "p99": 175.48799514770508 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 81.95199817419052, + "p95": 87.3280018568039, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 127.51999497413635, + "p90": 157.9200029373169, + "p95": 171.7119961977005, + "p99": 223.26399385929108 + }, + "isolatedSum": { + "p50": 145.08800208568573, + "p90": 181.7919984459877, + "p95": 199.71200078725815, + "p99": 293.43999177217484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.03199779987335, + "p90": 112.73600161075592, + "p95": 125.37600100040436, + "p99": 209.4080001115799 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 86.30400151014328, + "p95": 95.13600170612335, + "p99": 123.16799908876419 + }, + "roundtrip": { + "p50": 125.11999905109406, + "p90": 156.99200332164764, + "p95": 177.47199535369873, + "p99": 251.64800882339478 + }, + "isolatedSum": { + "p50": 146.94400131702423, + "p90": 199.0400031208992, + "p95": 220.5120027065277, + "p99": 332.5759992003441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.28800106048584, + "p90": 108.15999656915665, + "p95": 115.29599875211716, + "p99": 152.70400047302246 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 91.87199920415878, + "p95": 98.55999797582626, + "p99": 110.17599701881409 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 167.23200678825378, + "p95": 179.45599555969238, + "p99": 238.91200125217438 + }, + "isolatedSum": { + "p50": 162.6560017466545, + "p90": 200.03199577331543, + "p95": 213.85599672794342, + "p99": 262.87999749183655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.09599739313126, + "p90": 137.56799697875977, + "p95": 140.76800644397736, + "p99": 157.47199952602386 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 122.97599762678146, + "p95": 125.50400197505951, + "p99": 148.5760062932968 + }, + "roundtrip": { + "p50": 198.7520009279251, + "p90": 219.2319929599762, + "p95": 227.58400440216064, + "p99": 269.3440020084381 + }, + "isolatedSum": { + "p50": 220.44799476861954, + "p90": 260.54399460554123, + "p95": 266.27200841903687, + "p99": 306.0480058193207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4f3e72f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "c5d592397744e4a1", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:52.426268+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272129001", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272129001", + "createdAt": "2026-06-27T00:04:29Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.7600028514862, + "p90": 98.59199821949005, + "p95": 107.77600109577179, + "p99": 133.31200182437897 + }, + "combine": { + "p50": 70.592001080513, + "p90": 84.54400300979614, + "p95": 90.43200314044952, + "p99": 139.26400244235992 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 159.87199544906616, + "p95": 172.57599532604218, + "p99": 367.2960102558136 + }, + "isolatedSum": { + "p50": 144.3520039319992, + "p90": 183.1360012292862, + "p95": 198.2080042362213, + "p99": 272.5760042667389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.87999647855759, + "p90": 107.04000294208527, + "p95": 128.80000472068787, + "p99": 359.391987323761 + }, + "combine": { + "p50": 70.49600034952164, + "p90": 84.06399935483932, + "p95": 89.88799899816513, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 155.10399639606476, + "p95": 165.72800278663635, + "p99": 202.7519941329956 + }, + "isolatedSum": { + "p50": 145.37599682807922, + "p90": 191.1040022969246, + "p95": 218.688003718853, + "p99": 462.3679891228676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.90399813652039, + "p90": 107.07200318574905, + "p95": 115.9679964184761, + "p99": 136.51199638843536 + }, + "combine": { + "p50": 78.33600044250488, + "p90": 91.93599969148636, + "p95": 97.69599884748459, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 137.2160017490387, + "p90": 170.23999989032745, + "p95": 181.37599527835846, + "p99": 215.36000072956085 + }, + "isolatedSum": { + "p50": 162.23999857902527, + "p90": 199.0080028772354, + "p95": 213.6639952659607, + "p99": 245.34399807453156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.99999666213989, + "p90": 135.71199774742126, + "p95": 143.8400000333786, + "p99": 168.67199540138245 + }, + "combine": { + "p50": 104.73600029945374, + "p90": 121.47200107574463, + "p95": 125.47199428081512, + "p99": 163.00800442695618 + }, + "roundtrip": { + "p50": 196.6720074415207, + "p90": 216.19200706481934, + "p95": 220.5120027065277, + "p99": 240.1919960975647 + }, + "isolatedSum": { + "p50": 220.73599696159363, + "p90": 257.1839988231659, + "p95": 269.3119943141937, + "p99": 331.6799998283386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb6d6f9b", + "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "4a72e21e2f542236", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:45.031759+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271615137", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271615137", + "createdAt": "2026-06-26T23:48:21Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.97599655389786, + "p90": 85.50400286912918, + "p95": 95.36000341176987, + "p99": 316.79999828338623 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 74.72000271081924, + "p95": 78.72000336647034, + "p99": 94.2080020904541 + }, + "roundtrip": { + "p50": 122.56000190973282, + "p90": 143.26399564743042, + "p95": 153.1199961900711, + "p99": 172.2240000963211 + }, + "isolatedSum": { + "p50": 139.67999815940857, + "p90": 160.22400557994843, + "p95": 174.0800067782402, + "p99": 411.00800037384033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.12799787521362, + "p90": 85.24800091981888, + "p95": 91.93599969148636, + "p99": 119.48800086975098 + }, + "combine": { + "p50": 68.57600063085556, + "p90": 72.83200323581696, + "p95": 77.15199887752533, + "p99": 83.45600217580795 + }, + "roundtrip": { + "p50": 120.83200365304947, + "p90": 129.2160004377365, + "p95": 133.215993642807, + "p99": 145.75999975204468 + }, + "isolatedSum": { + "p50": 140.70399850606918, + "p90": 158.08000415563583, + "p95": 169.0879985690117, + "p99": 202.94400304555893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.63200187683105, + "p90": 90.84799885749817, + "p95": 103.64799946546555, + "p99": 133.02400708198547 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 80.86399734020233, + "p95": 87.74399757385254, + "p99": 105.6319996714592 + }, + "roundtrip": { + "p50": 123.64800274372101, + "p90": 149.59999918937683, + "p95": 158.33599865436554, + "p99": 186.0480010509491 + }, + "isolatedSum": { + "p50": 144.16000247001648, + "p90": 171.7119961977005, + "p95": 191.39199703931808, + "p99": 238.65600675344467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.18399846553802, + "p90": 83.03999900817871, + "p95": 94.91200000047684, + "p99": 104.09600287675858 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 77.82399654388428, + "p95": 83.10399949550629, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 123.52000176906586, + "p90": 143.19999516010284, + "p95": 152.0960032939911, + "p99": 205.08800446987152 + }, + "isolatedSum": { + "p50": 142.46399700641632, + "p90": 160.863995552063, + "p95": 178.01599949598312, + "p99": 214.1440063714981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.16799801588058, + "p90": 83.36000144481659, + "p95": 94.11200135946274, + "p99": 106.46399855613708 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 78.07999849319458, + "p95": 83.20000022649765, + "p99": 95.71199864149094 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 144.0960019826889, + "p95": 155.008003115654, + "p99": 204.3839991092682 + }, + "isolatedSum": { + "p50": 141.2159949541092, + "p90": 161.43999993801117, + "p95": 177.3120015859604, + "p99": 202.17599719762802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.60800284147263, + "p90": 89.59999680519104, + "p95": 94.81599926948547, + "p99": 117.53600090742111 + }, + "combine": { + "p50": 77.08799839019775, + "p90": 81.95199817419052, + "p95": 87.3280018568039, + "p99": 95.0080007314682 + }, + "roundtrip": { + "p50": 135.19999384880066, + "p90": 148.47999811172485, + "p95": 156.63999319076538, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 157.69600123167038, + "p90": 171.55199497938156, + "p95": 182.14400112628937, + "p99": 212.5440016388893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.68799829483032, + "p90": 114.97599631547928, + "p95": 122.36800044775009, + "p99": 148.03199470043182 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 97.59999811649323, + "p95": 102.9760017991066, + "p99": 113.95200341939926 + }, + "roundtrip": { + "p50": 158.87999534606934, + "p90": 176.15999281406403, + "p95": 185.2159947156906, + "p99": 225.600004196167 + }, + "isolatedSum": { + "p50": 182.0800006389618, + "p90": 212.5759944319725, + "p95": 225.3440022468567, + "p99": 261.9839981198311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.22399836778641, + "p90": 125.18399953842163, + "p95": 132.32000172138214, + "p99": 140.83200693130493 + }, + "combine": { + "p50": 105.34399747848511, + "p90": 111.32799834012985, + "p95": 116.28799885511398, + "p99": 123.83999675512314 + }, + "roundtrip": { + "p50": 197.60000705718994, + "p90": 207.2640061378479, + "p95": 214.81600403785706, + "p99": 241.05599522590637 + }, + "isolatedSum": { + "p50": 221.56799584627151, + "p90": 236.51199787855148, + "p95": 248.60800057649612, + "p99": 264.67200368642807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dea4952a", + "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "f2cda8ef40003c42", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:13.205485+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271728983", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271728983", + "createdAt": "2026-06-26T23:51:48Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.53600114583969, + "p90": 98.14400225877762, + "p95": 107.45599865913391, + "p99": 121.63200229406357 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 82.24000036716461, + "p95": 87.26400136947632, + "p99": 110.07999628782272 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 155.39200603961945, + "p95": 163.68000209331512, + "p99": 201.6959935426712 + }, + "isolatedSum": { + "p50": 141.75999909639359, + "p90": 180.38400262594223, + "p95": 194.72000002861023, + "p99": 231.7119985818863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.87199658155441, + "p90": 103.2319962978363, + "p95": 112.15999722480774, + "p99": 193.05600225925446 + }, + "combine": { + "p50": 68.60800087451935, + "p90": 83.5840031504631, + "p95": 90.30400216579437, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 123.23199957609177, + "p90": 153.31199765205383, + "p95": 164.38399255275726, + "p99": 185.37600338459015 + }, + "isolatedSum": { + "p50": 144.47999745607376, + "p90": 186.8159994482994, + "p95": 202.4639993906021, + "p99": 322.6560056209564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.87999647855759, + "p90": 96.92800045013428, + "p95": 105.85600137710571, + "p99": 121.15199863910675 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 82.56000280380249, + "p95": 90.30400216579437, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 125.2799928188324, + "p90": 152.28800475597382, + "p95": 160.8320027589798, + "p99": 174.55999553203583 + }, + "isolatedSum": { + "p50": 144.15999501943588, + "p90": 179.48800325393677, + "p95": 196.16000354290009, + "p99": 223.2000008225441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.54400169849396, + "p90": 104.2879968881607, + "p95": 111.42399907112122, + "p99": 138.5599970817566 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 85.66399663686752, + "p95": 91.67999774217606, + "p99": 102.59199887514114 + }, + "roundtrip": { + "p50": 126.39999389648438, + "p90": 154.55999970436096, + "p95": 166.97600483894348, + "p99": 208.67200195789337 + }, + "isolatedSum": { + "p50": 147.07200229167938, + "p90": 189.95199352502823, + "p95": 203.10399681329727, + "p99": 241.15199595689774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.40000092983246, + "p90": 99.07200187444687, + "p95": 105.98400235176086, + "p99": 126.3359934091568 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 85.40800213813782, + "p95": 90.27200192213058, + "p99": 109.40799862146378 + }, + "roundtrip": { + "p50": 129.02399897575378, + "p90": 156.2879979610443, + "p95": 166.143998503685, + "p99": 196.51199877262115 + }, + "isolatedSum": { + "p50": 150.01600235700607, + "p90": 184.4800040125847, + "p95": 196.25600427389145, + "p99": 235.74399203062057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.40800213813782, + "p90": 105.12000322341919, + "p95": 114.04799669981003, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 90.59199690818787, + "p95": 96.63999825716019, + "p99": 105.18400371074677 + }, + "roundtrip": { + "p50": 137.2160017490387, + "p90": 163.07200491428375, + "p95": 172.35200107097626, + "p99": 208.064004778862 + }, + "isolatedSum": { + "p50": 163.32799941301346, + "p90": 195.71200013160706, + "p95": 210.68799495697021, + "p99": 236.89600825309753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.55199807882309, + "p90": 126.65599584579468, + "p95": 135.00800728797913, + "p99": 167.10400581359863 + }, + "combine": { + "p50": 89.24800157546997, + "p90": 106.1440035700798, + "p95": 111.23199760913849, + "p99": 126.65599584579468 + }, + "roundtrip": { + "p50": 162.9759967327118, + "p90": 185.88800728321075, + "p95": 193.6960071325302, + "p99": 255.87201118469238 + }, + "isolatedSum": { + "p50": 188.79999965429306, + "p90": 232.79999941587448, + "p95": 246.24000489711761, + "p99": 293.7600016593933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.367999792099, + "p90": 135.42400300502777, + "p95": 143.5520052909851, + "p99": 181.88799917697906 + }, + "combine": { + "p50": 105.34399747848511, + "p90": 119.99999731779099, + "p95": 126.78399682044983, + "p99": 139.0399932861328 + }, + "roundtrip": { + "p50": 197.53600656986237, + "p90": 215.83999693393707, + "p95": 224.48000311851501, + "p99": 253.1839907169342 + }, + "isolatedSum": { + "p50": 223.7119972705841, + "p90": 255.42400032281876, + "p95": 270.33600211143494, + "p99": 320.9279924631119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-99defb8b", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h200_26ff284b", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:19.040813+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": "set:3:24add4cb1eb472b4", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272355894", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272355894", + "createdAt": "2026-06-27T00:11:56Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.51200079917908, + "p90": 88.95999938249588, + "p95": 100.96000134944916, + "p99": 118.23999881744385 + }, + "combine": { + "p50": 66.880002617836, + "p90": 74.81600344181061, + "p95": 79.83999699354172, + "p99": 99.55199807882309 + }, + "roundtrip": { + "p50": 121.24799937009811, + "p90": 136.89599931240082, + "p95": 147.74399995803833, + "p99": 232.92799293994904 + }, + "isolatedSum": { + "p50": 139.39200341701508, + "p90": 163.7760028243065, + "p95": 180.79999834299088, + "p99": 217.79199689626694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.90399813652039, + "p90": 99.84000027179718, + "p95": 112.44799941778183, + "p99": 135.93600690364838 + }, + "combine": { + "p50": 75.19999891519547, + "p90": 84.927998483181, + "p95": 90.81599861383438, + "p99": 107.45599865913391 + }, + "roundtrip": { + "p50": 137.40800321102142, + "p90": 155.35999834537506, + "p95": 164.92800414562225, + "p99": 303.6159873008728 + }, + "isolatedSum": { + "p50": 159.10399705171585, + "p90": 184.76799875497818, + "p95": 203.2639980316162, + "p99": 243.3920055627823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.01600587368011, + "p90": 135.45599579811096, + "p95": 139.8400068283081, + "p99": 168.57600212097168 + }, + "combine": { + "p50": 111.58400028944016, + "p90": 120.64000219106674, + "p95": 128.31999361515045, + "p99": 143.0719941854477 + }, + "roundtrip": { + "p50": 210.81599593162537, + "p90": 224.0000069141388, + "p95": 234.49599742889404, + "p99": 253.76001000404358 + }, + "isolatedSum": { + "p50": 237.60000616312027, + "p90": 256.0959979891777, + "p95": 268.16000044345856, + "p99": 311.6479963064194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-14a4cdc0", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_b02e4015", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:31.348412+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s1", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272358996", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272358996", + "createdAt": "2026-06-27T00:12:03Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.75200229883194, + "p90": 98.39999675750732, + "p95": 108.57599973678589, + "p99": 136.03200018405914 + }, + "combine": { + "p50": 67.52000004053116, + "p90": 79.83999699354172, + "p95": 84.09599959850311, + "p99": 104.09600287675858 + }, + "roundtrip": { + "p50": 122.8799968957901, + "p90": 146.62399888038635, + "p95": 155.32800555229187, + "p99": 178.3359944820404 + }, + "isolatedSum": { + "p50": 138.2720023393631, + "p90": 178.23999375104904, + "p95": 192.671999335289, + "p99": 240.12800306081772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.44800227880478, + "p90": 110.30399799346924, + "p95": 138.20800185203552, + "p99": 196.22400403022766 + }, + "combine": { + "p50": 75.16799867153168, + "p90": 85.34400165081024, + "p95": 91.00800007581711, + "p99": 101.02400183677673 + }, + "roundtrip": { + "p50": 135.3919953107834, + "p90": 156.3200056552887, + "p95": 166.4000004529953, + "p99": 198.36799800395966 + }, + "isolatedSum": { + "p50": 159.61600095033646, + "p90": 195.64799964427948, + "p95": 229.21600192785263, + "p99": 297.2480058670044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.48000228405, + "p90": 142.91200041770935, + "p95": 151.36000514030457, + "p99": 290.0159955024719 + }, + "combine": { + "p50": 111.455999314785, + "p90": 123.6800029873848, + "p95": 127.93600559234619, + "p99": 143.71199905872345 + }, + "roundtrip": { + "p50": 210.81599593162537, + "p90": 223.26399385929108, + "p95": 229.34399545192719, + "p99": 257.79199600219727 + }, + "isolatedSum": { + "p50": 239.936001598835, + "p90": 266.59200340509415, + "p95": 279.29601073265076, + "p99": 433.7279945611954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4bdc0b92", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_ad2e3b5c", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:31.907403+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272362308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272362308", + "createdAt": "2026-06-27T00:12:10Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.32799923419952, + "p90": 93.82399916648865, + "p95": 109.72800105810165, + "p99": 145.1520025730133 + }, + "combine": { + "p50": 66.880002617836, + "p90": 72.25599884986877, + "p95": 80.32000064849854, + "p99": 91.39200299978256 + }, + "roundtrip": { + "p50": 123.48800152540207, + "p90": 140.51200449466705, + "p95": 156.8319946527481, + "p99": 195.64799964427948 + }, + "isolatedSum": { + "p50": 138.20800185203552, + "p90": 166.07999801635742, + "p95": 190.0480017066002, + "p99": 236.54400557279587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.4720019698143, + "p90": 136.48000359535217, + "p95": 151.13599598407745, + "p99": 198.04799556732178 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 89.47200328111649, + "p95": 102.91200131177902, + "p99": 122.36800044775009 + }, + "roundtrip": { + "p50": 134.5279961824417, + "p90": 149.31200444698334, + "p95": 162.9440039396286, + "p99": 204.73599433898926 + }, + "isolatedSum": { + "p50": 157.28000551462173, + "p90": 225.95200687646866, + "p95": 254.04799729585648, + "p99": 320.41599601507187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.60800260305405, + "p90": 131.80799782276154, + "p95": 137.2160017490387, + "p99": 164.35199975967407 + }, + "combine": { + "p50": 111.00800335407257, + "p90": 119.39200013875961, + "p95": 125.5359947681427, + "p99": 155.03999590873718 + }, + "roundtrip": { + "p50": 208.41600000858307, + "p90": 218.6560034751892, + "p95": 229.72799837589264, + "p99": 263.3279860019684 + }, + "isolatedSum": { + "p50": 235.61600595712662, + "p90": 251.19999796152115, + "p95": 262.7519965171814, + "p99": 319.39199566841125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fcadbf18", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_ae2e3cef", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:36.495887+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s3", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272365812", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272365812", + "createdAt": "2026-06-27T00:12:17Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.48800045251846, + "p90": 94.46399658918381, + "p95": 99.29600358009338, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 80.09599894285202, + "p95": 84.54400300979614, + "p99": 108.31999778747559 + }, + "roundtrip": { + "p50": 119.61600184440613, + "p90": 148.83199334144592, + "p95": 158.01599621772766, + "p99": 279.9359858036041 + }, + "isolatedSum": { + "p50": 138.59199732542038, + "p90": 174.55999553203583, + "p95": 183.84000658988953, + "p99": 218.55999529361725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.83199709653854, + "p90": 99.45599734783173, + "p95": 105.05600273609161, + "p99": 118.07999759912491 + }, + "combine": { + "p50": 75.23199915885925, + "p90": 87.52000331878662, + "p95": 92.0960009098053, + "p99": 108.51199924945831 + }, + "roundtrip": { + "p50": 133.91999900341034, + "p90": 154.78399395942688, + "p95": 162.04799711704254, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 156.0639962553978, + "p90": 186.97600066661835, + "p95": 197.1520036458969, + "p99": 226.59199684858322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.15200674533844, + "p90": 135.96799969673157, + "p95": 141.79199934005737, + "p99": 205.34400641918182 + }, + "combine": { + "p50": 109.72800105810165, + "p90": 120.15999853610992, + "p95": 123.36000055074692, + "p99": 136.7039978504181 + }, + "roundtrip": { + "p50": 207.96799659729004, + "p90": 225.50399601459503, + "p95": 231.77599906921387, + "p99": 246.20799720287323 + }, + "isolatedSum": { + "p50": 234.8800078034401, + "p90": 256.1279982328415, + "p95": 265.1519998908043, + "p99": 342.0480042695999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f361a9a4", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", + "colorKey": "h200_b5c683eb", + "comparisonKey": "d82096ba4baa0cd5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:27.284944+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2279937619f3971", + "workloadId": "set:4:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271830346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271830346", + "createdAt": "2026-06-26T23:54:59Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 76.73600316047668, + "p90": 126.24000012874603, + "p95": 134.46399569511414, + "p99": 156.63999319076538 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 83.71199667453766, + "p95": 87.07199990749359, + "p99": 99.13600236177444 + }, + "roundtrip": { + "p50": 128.38399410247803, + "p90": 148.03199470043182, + "p95": 154.62400019168854, + "p99": 179.6479970216751 + }, + "isolatedSum": { + "p50": 149.1520032286644, + "p90": 209.9519968032837, + "p95": 221.53599560260773, + "p99": 255.77599555253983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.83999633789062, + "p90": 87.99999952316284, + "p95": 98.11200201511383, + "p99": 113.02399635314941 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 79.6160027384758, + "p95": 85.56800335645676, + "p99": 95.87199985980988 + }, + "roundtrip": { + "p50": 126.81600451469421, + "p90": 139.67999815940857, + "p95": 149.63200688362122, + "p99": 170.20800709724426 + }, + "isolatedSum": { + "p50": 147.67999947071075, + "p90": 167.61600226163864, + "p95": 183.6800053715706, + "p99": 208.8959962129593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.73599833250046, + "p90": 105.50399869680405, + "p95": 112.41599917411804, + "p99": 132.60799646377563 + }, + "combine": { + "p50": 81.98399841785431, + "p90": 93.56799721717834, + "p95": 99.58399832248688, + "p99": 112.57600039243698 + }, + "roundtrip": { + "p50": 148.70400726795197, + "p90": 168.7999963760376, + "p95": 180.7679980993271, + "p99": 196.6720074415207 + }, + "isolatedSum": { + "p50": 174.71999675035477, + "p90": 199.0719959139824, + "p95": 211.99999749660492, + "p99": 245.18399685621262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.00800049304962, + "p90": 175.99999904632568, + "p95": 181.34400248527527, + "p99": 197.91999459266663 + }, + "combine": { + "p50": 127.20000743865967, + "p90": 150.68799257278442, + "p95": 153.6639928817749, + "p99": 160.5439931154251 + }, + "roundtrip": { + "p50": 232.92799293994904, + "p90": 266.04801416397095, + "p95": 271.5199887752533, + "p99": 294.20799016952515 + }, + "isolatedSum": { + "p50": 266.2080079317093, + "p90": 326.6879916191101, + "p95": 335.00799536705017, + "p99": 358.46398770809174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d65f5a76", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_b5c683eb", + "comparisonKey": "d82096ba4baa0cd5", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:47.642624+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272028751", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272028751", + "createdAt": "2026-06-27T00:01:16Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.99999755620956, + "p90": 106.175996363163, + "p95": 117.60000139474869, + "p99": 352.512001991272 + }, + "combine": { + "p50": 70.68800181150436, + "p90": 85.9839990735054, + "p95": 90.52799642086029, + "p99": 104.12800312042236 + }, + "roundtrip": { + "p50": 124.60800260305405, + "p90": 158.62399339675903, + "p95": 166.46400094032288, + "p99": 186.27199530601501 + }, + "isolatedSum": { + "p50": 146.68799936771393, + "p90": 192.1599954366684, + "p95": 208.12799781560898, + "p99": 456.64000511169434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.65600222349167, + "p90": 98.91200065612793, + "p95": 104.09600287675858, + "p99": 114.84800279140472 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 87.20000088214874, + "p95": 91.32800251245499, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 161.02400422096252, + "p95": 170.78399658203125, + "p99": 197.05599546432495 + }, + "isolatedSum": { + "p50": 145.31200379133224, + "p90": 186.11200153827667, + "p95": 195.42400538921356, + "p99": 221.3120013475418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.20799881219864, + "p90": 102.24000364542007, + "p95": 111.35999858379364, + "p99": 129.63199615478516 + }, + "combine": { + "p50": 71.87200337648392, + "p90": 88.22400122880936, + "p95": 94.52799707651138, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 125.08800625801086, + "p90": 153.53600680828094, + "p95": 163.87200355529785, + "p99": 176.86399817466736 + }, + "isolatedSum": { + "p50": 146.08000218868256, + "p90": 190.46400487422943, + "p95": 205.88799566030502, + "p99": 244.89599466323853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.30399954319, + "p90": 98.49599748849869, + "p95": 106.59199953079224, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 86.36800199747086, + "p95": 90.52799642086029, + "p99": 109.40799862146378 + }, + "roundtrip": { + "p50": 124.64000284671783, + "p90": 156.73600137233734, + "p95": 164.48000073432922, + "p99": 189.15200233459473 + }, + "isolatedSum": { + "p50": 145.4719975590706, + "p90": 184.86399948596954, + "p95": 197.11999595165253, + "p99": 229.5999974012375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.26399940252304, + "p90": 102.59199887514114, + "p95": 109.76000130176544, + "p99": 125.59999525547028 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 91.23200178146362, + "p95": 94.94400024414062, + "p99": 105.82400113344193 + }, + "roundtrip": { + "p50": 128.7039965391159, + "p90": 160.51200032234192, + "p95": 171.07200622558594, + "p99": 223.13599288463593 + }, + "isolatedSum": { + "p50": 152.19199657440186, + "p90": 193.82400065660477, + "p95": 204.70400154590607, + "p99": 231.4239963889122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.59199690818787, + "p90": 110.52799969911575, + "p95": 119.00799721479416, + "p99": 143.39199662208557 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 98.11200201511383, + "p95": 105.79200088977814, + "p99": 123.4240010380745 + }, + "roundtrip": { + "p50": 145.4080045223236, + "p90": 173.0239987373352, + "p95": 180.4479956626892, + "p99": 203.45599949359894 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 208.64000171422958, + "p95": 224.7999981045723, + "p99": 266.81599766016006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.84800213575363, + "p90": 130.17599284648895, + "p95": 137.56799697875977, + "p99": 212.12799847126007 + }, + "combine": { + "p50": 95.13600170612335, + "p90": 114.20799791812897, + "p95": 124.57600235939026, + "p99": 243.42399835586548 + }, + "roundtrip": { + "p50": 178.14399302005768, + "p90": 205.24799823760986, + "p95": 233.40800404548645, + "p99": 432.2560131549835 + }, + "isolatedSum": { + "p50": 205.98400384187698, + "p90": 244.38399076461792, + "p95": 262.14399933815, + "p99": 455.55199682712555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.44000279903412, + "p90": 152.48000621795654, + "p95": 157.95199573040009, + "p99": 172.2240000963211 + }, + "combine": { + "p50": 125.72799623012543, + "p90": 140.60799777507782, + "p95": 145.31199634075165, + "p99": 176.7359972000122 + }, + "roundtrip": { + "p50": 237.2480034828186, + "p90": 255.51998615264893, + "p95": 262.65600323677063, + "p99": 295.9040105342865 + }, + "isolatedSum": { + "p50": 259.16799902915955, + "p90": 293.08800399303436, + "p95": 303.26399207115173, + "p99": 348.9599972963333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26bc6c27", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_d0dfa19a", + "comparisonKey": "5d5c9be2dc9b5f1f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:33.428125+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": "set:4:2eebbed158fe1320", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271837870", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271837870", + "createdAt": "2026-06-26T23:55:13Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 65.95200300216675, + "p90": 102.7199998497963, + "p95": 115.55200070142746, + "p99": 166.6560024023056 + }, + "combine": { + "p50": 58.6559996008873, + "p90": 72.4480003118515, + "p95": 78.59200239181519, + "p99": 95.64799815416336 + }, + "roundtrip": { + "p50": 112.44799941778183, + "p90": 152.70400047302246, + "p95": 159.2320054769516, + "p99": 181.2479943037033 + }, + "isolatedSum": { + "p50": 124.60800260305405, + "p90": 175.1680001616478, + "p95": 194.14400309324265, + "p99": 262.30400055646896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.9360032081604, + "p90": 85.66399663686752, + "p95": 95.0080007314682, + "p99": 111.00800335407257 + }, + "combine": { + "p50": 59.93599817156792, + "p90": 70.88000327348709, + "p95": 77.18399912118912, + "p99": 92.03200042247772 + }, + "roundtrip": { + "p50": 112.2559979557991, + "p90": 138.11199367046356, + "p95": 150.2400040626526, + "p99": 209.6319943666458 + }, + "isolatedSum": { + "p50": 127.87200137972832, + "p90": 156.54399991035461, + "p95": 172.19199985265732, + "p99": 203.0400037765503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.97599852085114, + "p90": 103.71199995279312, + "p95": 112.15999722480774, + "p99": 135.23200154304504 + }, + "combine": { + "p50": 69.76000219583511, + "p90": 85.05599945783615, + "p95": 93.88799965381622, + "p99": 128.60800325870514 + }, + "roundtrip": { + "p50": 125.56800246238708, + "p90": 148.70400726795197, + "p95": 165.92000424861908, + "p99": 200.3519982099533 + }, + "isolatedSum": { + "p50": 152.73600071668625, + "p90": 188.76799941062927, + "p95": 206.04799687862396, + "p99": 263.8400048017502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.39200168848038, + "p90": 109.27999764680862, + "p95": 120.28799951076508, + "p99": 153.18399667739868 + }, + "combine": { + "p50": 69.50400024652481, + "p90": 82.87999778985977, + "p95": 90.27200192213058, + "p99": 100.89600086212158 + }, + "roundtrip": { + "p50": 128.67200374603271, + "p90": 153.53600680828094, + "p95": 162.62400150299072, + "p99": 190.65600633621216 + }, + "isolatedSum": { + "p50": 152.8960019350052, + "p90": 192.1599954366684, + "p95": 210.56000143289566, + "p99": 254.07999753952026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b2e52442", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_06544e53", + "comparisonKey": "57040e121807e028", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:47.649756+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272031884", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272031884", + "createdAt": "2026-06-27T00:01:23Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.15999746322632, + "p90": 100.60799866914749, + "p95": 110.72000116109848, + "p99": 138.75199854373932 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 72.86400347948074, + "p95": 79.3600007891655, + "p99": 86.11200004816055 + }, + "roundtrip": { + "p50": 116.92799627780914, + "p90": 150.2079963684082, + "p95": 158.6879938840866, + "p99": 184.83200669288635 + }, + "isolatedSum": { + "p50": 128.86399775743484, + "p90": 173.47200214862823, + "p95": 190.08000195026398, + "p99": 224.86399859189987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.29599899053574, + "p90": 104.54399883747101, + "p95": 113.8560026884079, + "p99": 152.99199521541595 + }, + "combine": { + "p50": 61.983998864889145, + "p90": 78.97599786520004, + "p95": 83.5840031504631, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 116.5120005607605, + "p90": 149.88799393177032, + "p95": 163.71199488639832, + "p99": 195.45599818229675 + }, + "isolatedSum": { + "p50": 133.27999785542488, + "p90": 183.51999670267105, + "p95": 197.440005838871, + "p99": 251.48799270391464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.4480003118515, + "p90": 95.04000097513199, + "p95": 104.63999956846237, + "p99": 125.40799379348755 + }, + "combine": { + "p50": 61.664000153541565, + "p90": 73.02399724721909, + "p95": 81.82399719953537, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 117.08799749612808, + "p90": 144.41600441932678, + "p95": 157.72800147533417, + "p99": 314.88001346588135 + }, + "isolatedSum": { + "p50": 134.11200046539307, + "p90": 168.06399822235107, + "p95": 186.46399676799774, + "p99": 224.99199211597443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.95199686288834, + "p90": 99.2640033364296, + "p95": 105.8880016207695, + "p99": 122.27199971675873 + }, + "combine": { + "p50": 62.6240000128746, + "p90": 84.25600081682205, + "p95": 90.11200070381165, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 116.15999788045883, + "p90": 150.36800503730774, + "p95": 161.69600188732147, + "p99": 189.08800184726715 + }, + "isolatedSum": { + "p50": 136.57599687576294, + "p90": 183.52000415325165, + "p95": 196.00000232458115, + "p99": 225.0560000538826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.87199658155441, + "p90": 96.70399874448776, + "p95": 106.01600259542465, + "p99": 122.94399738311768 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 78.65600287914276, + "p95": 84.73599702119827, + "p99": 96.6079980134964 + }, + "roundtrip": { + "p50": 117.15199798345566, + "p90": 145.11999487876892, + "p95": 153.47200632095337, + "p99": 190.75199961662292 + }, + "isolatedSum": { + "p50": 138.8159990310669, + "p90": 175.36000162363052, + "p95": 190.75199961662292, + "p99": 219.55199539661407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.82399654388428, + "p90": 99.32799637317657, + "p95": 108.22399705648422, + "p99": 131.52000308036804 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 80.35200089216232, + "p95": 87.74399757385254, + "p99": 170.23999989032745 + }, + "roundtrip": { + "p50": 119.90399658679962, + "p90": 146.7519998550415, + "p95": 154.4959992170334, + "p99": 167.4879938364029 + }, + "isolatedSum": { + "p50": 144.15999501943588, + "p90": 179.6799972653389, + "p95": 195.96799463033676, + "p99": 301.7600029706955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.39200168848038, + "p90": 107.93600231409073, + "p95": 117.47200042009354, + "p99": 157.82399475574493 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 82.36800134181976, + "p95": 89.59999680519104, + "p99": 102.7199998497963 + }, + "roundtrip": { + "p50": 127.51999497413635, + "p90": 154.7199934720993, + "p95": 170.04799842834473, + "p99": 201.27999782562256 + }, + "isolatedSum": { + "p50": 153.56799960136414, + "p90": 190.3040036559105, + "p95": 207.07199722528458, + "p99": 260.54399460554123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.19200229644775, + "p90": 114.04799669981003, + "p95": 123.83999675512314, + "p99": 167.4560010433197 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 95.87199985980988, + "p95": 99.93600100278854, + "p99": 113.92000317573547 + }, + "roundtrip": { + "p50": 156.3200056552887, + "p90": 175.64800381660461, + "p95": 185.56800484657288, + "p99": 221.15199267864227 + }, + "isolatedSum": { + "p50": 180.67200481891632, + "p90": 209.9199965596199, + "p95": 223.77599775791168, + "p99": 281.3760042190552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4d9691e", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h200_ca3ee133", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:51.846779+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": "set:3:8fd05d9ebee41064", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272342148", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272342148", + "createdAt": "2026-06-27T00:11:29Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.0479975938797, + "p90": 96.41599655151367, + "p95": 105.8880016207695, + "p99": 125.5359947681427 + }, + "combine": { + "p50": 69.69600170850754, + "p90": 81.91999793052673, + "p95": 88.95999938249588, + "p99": 114.88000303506851 + }, + "roundtrip": { + "p50": 124.83199685811996, + "p90": 153.82400155067444, + "p95": 160.67199409008026, + "p99": 180.95999956130981 + }, + "isolatedSum": { + "p50": 143.74399930238724, + "p90": 178.3359944820404, + "p95": 194.84800100326538, + "p99": 240.4159978032112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.93599838018417, + "p90": 104.54399883747101, + "p95": 111.35999858379364, + "p99": 132.9919993877411 + }, + "combine": { + "p50": 76.99199765920639, + "p90": 89.37600255012512, + "p95": 96.41599655151367, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 138.20800185203552, + "p90": 157.69599378108978, + "p95": 164.38399255275726, + "p99": 188.54400515556335 + }, + "isolatedSum": { + "p50": 160.92799603939056, + "p90": 193.92000138759613, + "p95": 207.7759951353073, + "p99": 241.63199961185455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.29600006341934, + "p90": 140.00000059604645, + "p95": 145.75999975204468, + "p99": 161.6639941930771 + }, + "combine": { + "p50": 117.85600334405899, + "p90": 129.2800009250641, + "p95": 136.28800213336945, + "p99": 148.80000054836273 + }, + "roundtrip": { + "p50": 222.30400145053864, + "p90": 243.45600605010986, + "p95": 248.99199604988098, + "p99": 268.7999904155731 + }, + "isolatedSum": { + "p50": 241.15200340747833, + "p90": 269.28000152111053, + "p95": 282.0480018854141, + "p99": 310.4639947414398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9febd1e2", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_9779cb2d", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:58.540972+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s1", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272345418", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272345418", + "createdAt": "2026-06-27T00:11:36Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.97599720954895, + "p90": 90.91199934482574, + "p95": 99.32799637317657, + "p99": 128.83199751377106 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 80.1599994301796, + "p95": 89.21600133180618, + "p99": 107.07200318574905 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 145.6959992647171, + "p95": 153.31199765205383, + "p99": 184.54399704933167 + }, + "isolatedSum": { + "p50": 145.24799585342407, + "p90": 171.07199877500534, + "p95": 188.54399770498276, + "p99": 235.9040006995201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.26400071382523, + "p90": 95.93600034713745, + "p95": 101.05600208044052, + "p99": 118.65600198507309 + }, + "combine": { + "p50": 78.8159966468811, + "p90": 86.75199747085571, + "p95": 92.03200042247772, + "p99": 111.84000223875046 + }, + "roundtrip": { + "p50": 139.13600146770477, + "p90": 150.68799257278442, + "p95": 155.20000457763672, + "p99": 181.05599284172058 + }, + "isolatedSum": { + "p50": 162.07999736070633, + "p90": 182.68799781799316, + "p95": 193.08800250291824, + "p99": 230.49600422382355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.95999783277512, + "p90": 135.51999628543854, + "p95": 140.54399728775024, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 118.30399930477142, + "p90": 126.0479986667633, + "p95": 131.00799918174744, + "p99": 152.5759994983673 + }, + "roundtrip": { + "p50": 222.27199375629425, + "p90": 233.5679978132248, + "p95": 239.3600046634674, + "p99": 254.55999374389648 + }, + "isolatedSum": { + "p50": 243.26399713754654, + "p90": 261.56799495220184, + "p95": 271.5519964694977, + "p99": 305.9519976377487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5a9f57f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_9479c674", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:12.398873+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272348704", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272348704", + "createdAt": "2026-06-27T00:11:43Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.87199658155441, + "p90": 93.59999746084213, + "p95": 100.19200295209885, + "p99": 114.56000059843063 + }, + "combine": { + "p50": 71.35999947786331, + "p90": 79.64800298213959, + "p95": 85.63199639320374, + "p99": 97.79199957847595 + }, + "roundtrip": { + "p50": 129.2160004377365, + "p90": 148.5760062932968, + "p95": 158.84800255298615, + "p99": 188.22400271892548 + }, + "isolatedSum": { + "p50": 147.23199605941772, + "p90": 173.24800044298172, + "p95": 185.82399934530258, + "p99": 212.35200017690659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.0960002541542, + "p90": 110.78400164842606, + "p95": 121.72800302505493, + "p99": 175.61599612236023 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 92.3520028591156, + "p95": 98.88000041246414, + "p99": 121.34400010108948 + }, + "roundtrip": { + "p50": 141.37600362300873, + "p90": 164.19200599193573, + "p95": 172.95999825000763, + "p99": 193.7599927186966 + }, + "isolatedSum": { + "p50": 168.8000038266182, + "p90": 203.13600450754166, + "p95": 220.60800343751907, + "p99": 296.9599962234497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.92800569534302, + "p90": 140.3840035200119, + "p95": 146.65600657463074, + "p99": 171.10399901866913 + }, + "combine": { + "p50": 120.28799951076508, + "p90": 132.38400220870972, + "p95": 136.76799833774567, + "p99": 159.36000645160675 + }, + "roundtrip": { + "p50": 224.2880016565323, + "p90": 240.1919960975647, + "p95": 248.1279969215393, + "p99": 276.8320143222809 + }, + "isolatedSum": { + "p50": 249.2160052061081, + "p90": 272.7680057287216, + "p95": 283.4240049123764, + "p99": 330.4640054702759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13ab64c2", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_9579c807", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:19.903361+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s3", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272352256", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272352256", + "createdAt": "2026-06-27T00:11:49Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.40000027418137, + "p90": 94.7519987821579, + "p95": 101.9200012087822, + "p99": 123.36000055074692 + }, + "combine": { + "p50": 70.20799815654755, + "p90": 82.17599987983704, + "p95": 89.37600255012512, + "p99": 105.56799918413162 + }, + "roundtrip": { + "p50": 125.34399330615997, + "p90": 150.04800260066986, + "p95": 162.6559942960739, + "p99": 177.88800597190857 + }, + "isolatedSum": { + "p50": 144.6079984307289, + "p90": 176.92799866199493, + "p95": 191.29600375890732, + "p99": 228.92799973487854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.46400272846222, + "p90": 103.00800204277039, + "p95": 111.7440015077591, + "p99": 129.95199859142303 + }, + "combine": { + "p50": 79.26400005817413, + "p90": 90.97599983215332, + "p95": 96.47999703884125, + "p99": 115.9679964184761 + }, + "roundtrip": { + "p50": 139.8400068283081, + "p90": 156.6080003976822, + "p95": 163.96799683570862, + "p99": 176.35199427604675 + }, + "isolatedSum": { + "p50": 165.72800278663635, + "p90": 193.9840018749237, + "p95": 208.22399854660034, + "p99": 245.91999500989914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.83199685811996, + "p90": 138.59200477600098, + "p95": 144.44799721240997, + "p99": 233.88800024986267 + }, + "combine": { + "p50": 119.07199770212173, + "p90": 130.8159977197647, + "p95": 139.71200585365295, + "p99": 152.5759994983673 + }, + "roundtrip": { + "p50": 222.24000096321106, + "p90": 239.84000086784363, + "p95": 250.65600872039795, + "p99": 283.4239900112152 + }, + "isolatedSum": { + "p50": 243.9039945602417, + "p90": 269.4080024957657, + "p95": 284.1600030660629, + "p99": 386.46399974823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c6f809c", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", + "colorKey": "h200_189562cd", + "comparisonKey": "6b812f29e2dcdef6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:16.217396+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2ad5ef98d328fa1", + "workloadId": "set:4:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271859196", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271859196", + "createdAt": "2026-06-26T23:55:54Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.71200215816498, + "p90": 98.30400347709656, + "p95": 109.69600081443787, + "p99": 295.48799991607666 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 82.07999914884567, + "p95": 88.16000074148178, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 153.24799716472626, + "p95": 161.53599321842194, + "p99": 211.16800606250763 + }, + "isolatedSum": { + "p50": 139.39200341701508, + "p90": 180.38400262594223, + "p95": 197.85600155591965, + "p99": 405.5360034108162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.17599856853485, + "p90": 98.08000177145004, + "p95": 108.0000028014183, + "p99": 146.14400267601013 + }, + "combine": { + "p50": 69.63200122117996, + "p90": 83.13599973917007, + "p95": 89.02399986982346, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 125.40799379348755, + "p90": 153.50399911403656, + "p95": 165.12000560760498, + "p99": 192.83199310302734 + }, + "isolatedSum": { + "p50": 143.8079997897148, + "p90": 181.21600151062012, + "p95": 197.02400267124176, + "p99": 249.34400618076324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.02399855852127, + "p90": 105.76000064611435, + "p95": 114.46399986743927, + "p99": 129.72800433635712 + }, + "combine": { + "p50": 77.2159993648529, + "p90": 89.34400230646133, + "p95": 95.8079993724823, + "p99": 114.97599631547928 + }, + "roundtrip": { + "p50": 137.472003698349, + "p90": 158.91200304031372, + "p95": 166.20799899101257, + "p99": 185.08799374103546 + }, + "isolatedSum": { + "p50": 158.23999792337418, + "p90": 195.10400295257568, + "p95": 210.27199923992157, + "p99": 244.7040006518364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.25599992275238, + "p90": 137.02400028705597, + "p95": 144.51199769973755, + "p99": 166.6879951953888 + }, + "combine": { + "p50": 118.30399930477142, + "p90": 130.14400005340576, + "p95": 135.71199774742126, + "p99": 157.6319932937622 + }, + "roundtrip": { + "p50": 220.06399929523468, + "p90": 239.42400515079498, + "p95": 246.17600440979004, + "p99": 313.6639893054962 + }, + "isolatedSum": { + "p50": 242.5599992275238, + "p90": 267.16800034046173, + "p95": 280.2239954471588, + "p99": 324.319988489151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13c27f2d", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_189562cd", + "comparisonKey": "6b812f29e2dcdef6", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:10.730241+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": "set:8:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272100552", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272100552", + "createdAt": "2026-06-27T00:03:34Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.14399832487106, + "p90": 98.84800016880035, + "p95": 106.36799782514572, + "p99": 130.46400249004364 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 80.19199967384338, + "p95": 86.30400151014328, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 122.17599898576736, + "p90": 154.4319987297058, + "p95": 165.98400473594666, + "p99": 216.44799411296844 + }, + "isolatedSum": { + "p50": 142.30399578809738, + "p90": 179.03999984264374, + "p95": 192.671999335289, + "p99": 229.63200509548187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.19199901819229, + "p90": 103.5199984908104, + "p95": 114.3679991364479, + "p99": 145.9520012140274 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 83.96799862384796, + "p95": 90.11200070381165, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 125.02400577068329, + "p90": 152.3520052433014, + "p95": 163.58399391174316, + "p99": 191.16799533367157 + }, + "isolatedSum": { + "p50": 145.4719975590706, + "p90": 187.48799711465836, + "p95": 204.47999984025955, + "p99": 245.728000998497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 79.16799932718277, + "p90": 122.56000190973282, + "p95": 143.8719928264618, + "p99": 228.03199291229248 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 85.1840004324913, + "p95": 89.9519994854927, + "p99": 98.4639972448349 + }, + "roundtrip": { + "p50": 130.0159990787506, + "p90": 166.17600619792938, + "p95": 180.80000579357147, + "p99": 225.63199698925018 + }, + "isolatedSum": { + "p50": 149.21599626541138, + "p90": 207.74400234222412, + "p95": 233.8239923119545, + "p99": 326.4959901571274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.52000135183334, + "p90": 99.71199929714203, + "p95": 106.62399977445602, + "p99": 121.24799937009811 + }, + "combine": { + "p50": 70.592001080513, + "p90": 88.19200098514557, + "p95": 93.31200271844864, + "p99": 122.49600142240524 + }, + "roundtrip": { + "p50": 127.29600071907043, + "p90": 156.44800662994385, + "p95": 164.2879992723465, + "p99": 200.76799392700195 + }, + "isolatedSum": { + "p50": 146.11200243234634, + "p90": 187.9040002822876, + "p95": 199.93600249290466, + "p99": 243.74400079250336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.42400062084198, + "p90": 99.32799637317657, + "p95": 107.16799646615982, + "p99": 116.44800007343292 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 89.59999680519104, + "p95": 95.551997423172, + "p99": 149.1200029850006 + }, + "roundtrip": { + "p50": 129.5360028743744, + "p90": 163.42400014400482, + "p95": 173.18400740623474, + "p99": 210.36800742149353 + }, + "isolatedSum": { + "p50": 148.12800288200378, + "p90": 188.92799317836761, + "p95": 202.71999388933182, + "p99": 265.56800305843353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.89599823951721, + "p90": 109.31199789047241, + "p95": 117.15199798345566, + "p99": 152.92799472808838 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 95.36000341176987, + "p95": 99.10400211811066, + "p99": 120.06399780511856 + }, + "roundtrip": { + "p50": 140.73599874973297, + "p90": 167.29600727558136, + "p95": 174.01599884033203, + "p99": 211.07199788093567 + }, + "isolatedSum": { + "p50": 163.64800184965134, + "p90": 204.67200130224228, + "p95": 216.25600010156631, + "p99": 272.99199253320694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.91200065612793, + "p90": 116.19199812412262, + "p95": 121.31199985742569, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 91.36000275611877, + "p90": 105.50399869680405, + "p95": 109.92000252008438, + "p99": 130.65600395202637 + }, + "roundtrip": { + "p50": 168.7999963760376, + "p90": 190.8479928970337, + "p95": 195.23200392723083, + "p99": 233.69599878787994 + }, + "isolatedSum": { + "p50": 190.2720034122467, + "p90": 221.69599682092667, + "p95": 231.23200237751007, + "p99": 277.50399708747864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.72799623012543, + "p90": 143.16800236701965, + "p95": 147.90399372577667, + "p99": 170.71999609470367 + }, + "combine": { + "p50": 120.06399780511856, + "p90": 136.48000359535217, + "p95": 141.9840008020401, + "p99": 148.44800531864166 + }, + "roundtrip": { + "p50": 224.09600019454956, + "p90": 247.8400021791458, + "p95": 254.68799471855164, + "p99": 276.38399600982666 + }, + "isolatedSum": { + "p50": 245.791994035244, + "p90": 279.6480059623718, + "p95": 289.8879945278168, + "p99": 319.16800141334534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c4fd916e", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_80a72891", + "comparisonKey": "abe9d0af26c5a0c0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:13.797855+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": "set:8:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272103776", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272103776", + "createdAt": "2026-06-27T00:03:41Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.86400347948074, + "p90": 99.10400211811066, + "p95": 107.35999792814255, + "p99": 136.48000359535217 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 82.30400085449219, + "p95": 87.55200356245041, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 150.62400698661804, + "p95": 160.76800227165222, + "p99": 204.8639953136444 + }, + "isolatedSum": { + "p50": 140.73600620031357, + "p90": 181.40800297260284, + "p95": 194.91200149059296, + "p99": 228.60800474882126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.43200051784515, + "p90": 101.34399682283401, + "p95": 109.66400057077408, + "p99": 138.43199610710144 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 80.76799660921097, + "p95": 85.37600189447403, + "p99": 95.13600170612335 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 151.67999267578125, + "p95": 162.23999857902527, + "p99": 191.64800643920898 + }, + "isolatedSum": { + "p50": 142.33600348234177, + "p90": 182.11199343204498, + "p95": 195.0400024652481, + "p99": 233.5679978132248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.36800003051758, + "p90": 106.30399733781815, + "p95": 112.8000020980835, + "p99": 133.34399461746216 + }, + "combine": { + "p50": 69.31199878454208, + "p90": 85.75999736785889, + "p95": 93.05600076913834, + "p99": 108.41599851846695 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 152.16000378131866, + "p95": 162.33600676059723, + "p99": 187.80800700187683 + }, + "isolatedSum": { + "p50": 143.67999881505966, + "p90": 192.06399470567703, + "p95": 205.85600286722183, + "p99": 241.7599931359291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.72800260782242, + "p90": 94.94400024414062, + "p95": 102.30399668216705, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 68.44799965620041, + "p90": 81.91999793052673, + "p95": 88.03199976682663, + "p99": 102.52799838781357 + }, + "roundtrip": { + "p50": 124.22399967908859, + "p90": 154.14400398731232, + "p95": 164.60800170898438, + "p99": 177.44000256061554 + }, + "isolatedSum": { + "p50": 142.17600226402283, + "p90": 176.86399817466736, + "p95": 190.33599644899368, + "p99": 223.80799800157547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.12799787521362, + "p90": 98.55999797582626, + "p95": 106.01600259542465, + "p99": 130.62399625778198 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 83.29600095748901, + "p95": 89.28000181913376, + "p99": 106.75200074911118 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 149.63200688362122, + "p95": 158.4639996290207, + "p99": 176.54399573802948 + }, + "isolatedSum": { + "p50": 142.04800128936768, + "p90": 181.85599893331528, + "p95": 195.2960044145584, + "p99": 237.37599700689316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.48000252246857, + "p90": 110.75200140476227, + "p95": 119.61600184440613, + "p99": 152.41600573062897 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 91.07200056314468, + "p95": 98.36799651384354, + "p99": 130.17599284648895 + }, + "roundtrip": { + "p50": 134.783998131752, + "p90": 159.04000401496887, + "p95": 166.97600483894348, + "p99": 194.36800479888916 + }, + "isolatedSum": { + "p50": 161.72800213098526, + "p90": 201.82400196790695, + "p95": 217.98399835824966, + "p99": 282.5919985771179 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.02400118112564, + "p90": 121.2799996137619, + "p95": 137.95199990272522, + "p99": 238.87999355793 + }, + "combine": { + "p50": 90.94399958848953, + "p90": 106.97600245475769, + "p95": 113.98400366306305, + "p99": 139.3280029296875 + }, + "roundtrip": { + "p50": 161.05599701404572, + "p90": 182.17599391937256, + "p95": 191.23199582099915, + "p99": 230.27199506759644 + }, + "isolatedSum": { + "p50": 187.96800076961517, + "p90": 228.2560020685196, + "p95": 251.93600356578827, + "p99": 378.2079964876175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.03199690580368, + "p90": 134.39999520778656, + "p95": 142.5279974937439, + "p99": 206.11199736595154 + }, + "combine": { + "p50": 103.04000228643417, + "p90": 118.23999881744385, + "p95": 122.079998254776, + "p99": 137.69599795341492 + }, + "roundtrip": { + "p50": 195.99999487400055, + "p90": 214.33599293231964, + "p95": 224.5440036058426, + "p99": 265.02400636672974 + }, + "isolatedSum": { + "p50": 219.07199919223785, + "p90": 252.6399940252304, + "p95": 264.6079957485199, + "p99": 343.80799531936646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34b2b051", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h200_2a7f12a0", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:14:22.620116+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform·empty-rank", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272386143", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272386143", + "createdAt": "2026-06-27T00:12:58Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 73.15199822187424, + "p90": 92.76799857616425, + "p95": 100.28800368309021, + "p99": 131.58400356769562 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 83.64800363779068, + "p95": 88.92799913883209, + "p99": 102.11200267076492 + }, + "roundtrip": { + "p50": 121.66400253772736, + "p90": 145.37599682807922, + "p95": 157.18400478363037, + "p99": 189.56799805164337 + }, + "isolatedSum": { + "p50": 142.11200177669525, + "p90": 176.41600221395493, + "p95": 189.2160028219223, + "p99": 233.69600623846054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 82.43200182914734, + "p90": 96.28800302743912, + "p95": 103.84000092744827, + "p99": 123.07199835777283 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 86.65599673986435, + "p95": 92.28800237178802, + "p99": 107.84000158309937 + }, + "roundtrip": { + "p50": 134.49600338935852, + "p90": 156.031996011734, + "p95": 167.4879938364029, + "p99": 228.12800109386444 + }, + "isolatedSum": { + "p50": 159.04000401496887, + "p90": 182.94399976730347, + "p95": 196.1280032992363, + "p99": 230.9119999408722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 115.90400338172913, + "p90": 130.49599528312683, + "p95": 136.86400651931763, + "p99": 152.319997549057 + }, + "combine": { + "p50": 108.92800241708755, + "p90": 121.31199985742569, + "p95": 126.8479973077774, + "p99": 144.06399428844452 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 216.5759950876236, + "p95": 222.33599424362183, + "p99": 238.5919988155365 + }, + "isolatedSum": { + "p50": 224.83200579881668, + "p90": 251.80799514055252, + "p95": 263.71200382709503, + "p99": 296.3839918375015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2de6a2af", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_58b5650b", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:14:22.294115+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform·linear", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272382939", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272382939", + "createdAt": "2026-06-27T00:12:51Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.19999891519547, + "p90": 97.18400239944458, + "p95": 107.84000158309937, + "p99": 136.1279934644699 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 80.48000186681747, + "p95": 86.62399649620056, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 122.27199971675873, + "p90": 154.6880006790161, + "p95": 166.97600483894348, + "p99": 202.78400182724 + }, + "isolatedSum": { + "p50": 144.19199526309967, + "p90": 177.66400426626205, + "p95": 194.46399807929993, + "p99": 232.60799050331116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.69599688053131, + "p90": 105.8880016207695, + "p95": 113.63200098276138, + "p99": 147.2959965467453 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 89.85599875450134, + "p95": 95.93600034713745, + "p99": 106.84800148010254 + }, + "roundtrip": { + "p50": 134.62400436401367, + "p90": 154.81600165367126, + "p95": 166.1120057106018, + "p99": 190.0160014629364 + }, + "isolatedSum": { + "p50": 164.09599781036377, + "p90": 195.74400037527084, + "p95": 209.56800132989883, + "p99": 254.14399802684784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.76000261306763, + "p90": 139.13600146770477, + "p95": 149.9200016260147, + "p99": 190.94400107860565 + }, + "combine": { + "p50": 114.88000303506851, + "p90": 121.88799679279327, + "p95": 128.1599998474121, + "p99": 155.61600029468536 + }, + "roundtrip": { + "p50": 208.25600624084473, + "p90": 228.57600450515747, + "p95": 237.37600445747375, + "p99": 271.64798974990845 + }, + "isolatedSum": { + "p50": 232.64000564813614, + "p90": 261.02399826049805, + "p95": 278.0800014734268, + "p99": 346.560001373291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6ff3844b", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_580d7b05", + "comparisonKey": "46ecc7ff5ccb7c5d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:26.011362+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272020269", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272020269", + "createdAt": "2026-06-27T00:01:03Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 76.76800340414047, + "p90": 99.23200309276581, + "p95": 110.59200018644333, + "p99": 139.71200585365295 + }, + "combine": { + "p50": 68.1919977068901, + "p90": 80.09599894285202, + "p95": 84.06399935483932, + "p99": 98.65599870681763 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 143.90400052070618, + "p95": 155.8080017566681, + "p99": 181.5679967403412 + }, + "isolatedSum": { + "p50": 144.96000111103058, + "p90": 179.32800203561783, + "p95": 194.65599954128265, + "p99": 238.36800456047058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.09599828720093, + "p90": 102.55999863147736, + "p95": 112.70400136709213, + "p99": 138.5599970817566 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 79.83999699354172, + "p95": 83.39200168848038, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 125.791996717453, + "p90": 143.96800100803375, + "p95": 156.67200088500977, + "p99": 176.5120029449463 + }, + "isolatedSum": { + "p50": 146.04800194501877, + "p90": 182.39999562501907, + "p95": 196.0960030555725, + "p99": 230.49599677324295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.05599814653397, + "p90": 99.0080013871193, + "p95": 106.6880002617836, + "p99": 139.77600634098053 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 82.49600231647491, + "p95": 85.56800335645676, + "p99": 100.09600222110748 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 161.6320013999939, + "p95": 169.24799978733063, + "p99": 194.43200528621674 + }, + "isolatedSum": { + "p50": 147.10399508476257, + "p90": 181.5040037035942, + "p95": 192.25600361824036, + "p99": 239.872008562088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.34400033950806, + "p90": 90.94399958848953, + "p95": 97.9200005531311, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 79.9039974808693, + "p95": 84.06399935483932, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 130.0159990787506, + "p90": 153.08800339698792, + "p95": 165.24800658226013, + "p99": 195.3279972076416 + }, + "isolatedSum": { + "p50": 148.54399859905243, + "p90": 170.84799706935883, + "p95": 181.98399990797043, + "p99": 226.20799392461777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.48800241947174, + "p90": 106.20799660682678, + "p95": 114.78400230407715, + "p99": 256.0960054397583 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 86.17600053548813, + "p95": 91.51999652385712, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 132.9919993877411, + "p90": 166.24000668525696, + "p95": 176.35199427604675, + "p99": 203.5519927740097 + }, + "isolatedSum": { + "p50": 156.47999942302704, + "p90": 192.3839971423149, + "p95": 206.30399882793427, + "p99": 364.9280071258545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.1840010881424, + "p90": 104.51199859380722, + "p95": 112.44799941778183, + "p99": 135.5839967727661 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 87.26400136947632, + "p95": 92.73599833250046, + "p99": 111.32799834012985 + }, + "roundtrip": { + "p50": 139.90400731563568, + "p90": 159.2639982700348, + "p95": 169.3439930677414, + "p99": 189.02400135993958 + }, + "isolatedSum": { + "p50": 168.5440018773079, + "p90": 191.77599996328354, + "p95": 205.1839977502823, + "p99": 246.91199511289597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.2080027461052, + "p90": 113.40799927711487, + "p95": 119.99999731779099, + "p99": 140.19200205802917 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 98.7199991941452, + "p95": 102.7199998497963, + "p99": 111.455999314785 + }, + "roundtrip": { + "p50": 162.7199947834015, + "p90": 182.0800006389618, + "p95": 189.60000574588776, + "p99": 210.4640007019043 + }, + "isolatedSum": { + "p50": 187.32800334692, + "p90": 212.12799847126007, + "p95": 222.71999716758728, + "p99": 251.64800137281418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.46400117874146, + "p90": 136.51199638843536, + "p95": 143.64799857139587, + "p99": 156.41599893569946 + }, + "combine": { + "p50": 106.33599758148193, + "p90": 117.91999638080597, + "p95": 122.079998254776, + "p99": 132.09599256515503 + }, + "roundtrip": { + "p50": 200.15999674797058, + "p90": 217.72800385951996, + "p95": 223.29600155353546, + "p99": 246.87999486923218 + }, + "isolatedSum": { + "p50": 228.7999987602234, + "p90": 254.43199276924133, + "p95": 265.7279968261719, + "p99": 288.5119915008545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f68ea439", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_b6aa6110", + "comparisonKey": "5971fba5c9d29fa7", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:10.278228+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272042133", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272042133", + "createdAt": "2026-06-27T00:01:43Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.68000191450119, + "p90": 93.44000369310379, + "p95": 102.68799960613251, + "p99": 140.1599943637848 + }, + "combine": { + "p50": 67.4239993095398, + "p90": 79.45600152015686, + "p95": 86.496002972126, + "p99": 106.01600259542465 + }, + "roundtrip": { + "p50": 119.4240003824234, + "p90": 146.59200608730316, + "p95": 155.07200360298157, + "p99": 181.34400248527527 + }, + "isolatedSum": { + "p50": 139.10400122404099, + "p90": 172.89600521326065, + "p95": 189.18400257825851, + "p99": 246.17599695920944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.69600236415863, + "p90": 100.92800110578537, + "p95": 109.66400057077408, + "p99": 146.04799449443817 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 80.76799660921097, + "p95": 85.69599688053131, + "p99": 152.8320014476776 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 147.77599275112152, + "p95": 155.71199357509613, + "p99": 193.7599927186966 + }, + "isolatedSum": { + "p50": 141.9840008020401, + "p90": 181.69599771499634, + "p95": 195.3599974513054, + "p99": 298.8799959421158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.73600250482559, + "p90": 96.12800180912018, + "p95": 103.90400141477585, + "p99": 168.06399822235107 + }, + "combine": { + "p50": 66.91200286149979, + "p90": 78.65600287914276, + "p95": 82.2720006108284, + "p99": 94.71999853849411 + }, + "roundtrip": { + "p50": 118.9119964838028, + "p90": 143.8080072402954, + "p95": 155.71199357509613, + "p99": 209.6959948539734 + }, + "isolatedSum": { + "p50": 139.64800536632538, + "p90": 174.78400468826294, + "p95": 186.17600202560425, + "p99": 262.7839967608452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.56800138950348, + "p90": 93.82399916648865, + "p95": 101.47199779748917, + "p99": 132.7359974384308 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 79.6160027384758, + "p95": 83.23200047016144, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 119.26399916410446, + "p90": 145.24799585342407, + "p95": 154.4959992170334, + "p99": 191.71200692653656 + }, + "isolatedSum": { + "p50": 141.24800264835358, + "p90": 173.44000190496445, + "p95": 184.7039982676506, + "p99": 233.95200073719025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.66400277614594, + "p90": 104.12800312042236, + "p95": 114.30399864912033, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 84.32000130414963, + "p95": 90.7519981265068, + "p99": 122.27199971675873 + }, + "roundtrip": { + "p50": 125.95200538635254, + "p90": 157.151997089386, + "p95": 166.81599617004395, + "p99": 207.23199844360352 + }, + "isolatedSum": { + "p50": 148.51200580596924, + "p90": 188.448004424572, + "p95": 205.05599677562714, + "p99": 262.91200518608093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.94399827718735, + "p90": 101.56799852848053, + "p95": 110.88000237941742, + "p99": 162.11199760437012 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 87.67999708652496, + "p95": 90.68799763917923, + "p99": 98.33600372076035 + }, + "roundtrip": { + "p50": 135.71199774742126, + "p90": 155.20000457763672, + "p95": 165.6000018119812, + "p99": 222.27199375629425 + }, + "isolatedSum": { + "p50": 159.2639982700348, + "p90": 189.2479956150055, + "p95": 201.56800001859665, + "p99": 260.44800132513046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.76799923181534, + "p90": 109.11999642848969, + "p95": 116.5120005607605, + "p99": 174.01599884033203 + }, + "combine": { + "p50": 86.17600053548813, + "p90": 97.31200337409973, + "p95": 103.07200253009796, + "p99": 120.64000219106674 + }, + "roundtrip": { + "p50": 160.67199409008026, + "p90": 175.61599612236023, + "p95": 181.40800297260284, + "p99": 218.9439982175827 + }, + "isolatedSum": { + "p50": 182.94399976730347, + "p90": 206.43199980258942, + "p95": 219.58400309085846, + "p99": 294.6560010313988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.3360002040863, + "p90": 132.47999548912048, + "p95": 135.51999628543854, + "p99": 155.90399503707886 + }, + "combine": { + "p50": 112.86400258541107, + "p90": 121.8239963054657, + "p95": 126.62400305271149, + "p99": 136.76799833774567 + }, + "roundtrip": { + "p50": 214.52799439430237, + "p90": 232.92799293994904, + "p95": 243.42399835586548, + "p99": 306.97599053382874 + }, + "isolatedSum": { + "p50": 235.20000278949738, + "p90": 254.30399179458618, + "p95": 262.14399933815, + "p99": 292.6719933748245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e42f709", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "h200_b6aa6110", + "comparisonKey": "5971fba5c9d29fa7", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:48.444120+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271844665", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271844665", + "createdAt": "2026-06-26T23:55:26Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.3919997215271, + "p90": 101.1200025677681, + "p95": 115.1999980211258, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 64.4799992442131, + "p90": 82.78399705886841, + "p95": 91.48799628019333, + "p99": 104.67199981212616 + }, + "roundtrip": { + "p50": 117.98399686813354, + "p90": 156.22399747371674, + "p95": 165.3120070695877, + "p99": 193.12000274658203 + }, + "isolatedSum": { + "p50": 135.8719989657402, + "p90": 183.9039996266365, + "p95": 206.68799430131912, + "p99": 249.11999702453613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.08799773454666, + "p90": 95.77599912881851, + "p95": 105.66399991512299, + "p99": 147.32800424098969 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 82.59200304746628, + "p95": 89.02399986982346, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 121.2799996137619, + "p90": 152.63999998569489, + "p95": 167.4560010433197, + "p99": 201.7280012369156 + }, + "isolatedSum": { + "p50": 140.76799899339676, + "p90": 178.3680021762848, + "p95": 194.68799978494644, + "p99": 255.96800446510315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.66400343179703, + "p90": 98.30400347709656, + "p95": 109.98400300741196, + "p99": 134.14399325847626 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 89.21600133180618, + "p95": 95.90400010347366, + "p99": 118.6240017414093 + }, + "roundtrip": { + "p50": 136.00000739097595, + "p90": 157.53600001335144, + "p95": 172.7360039949417, + "p99": 212.25599944591522 + }, + "isolatedSum": { + "p50": 157.98400342464447, + "p90": 187.52000480890274, + "p95": 205.88800311088562, + "p99": 252.76799499988556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.52800166606903, + "p90": 136.6720050573349, + "p95": 148.15999567508698, + "p99": 160.89600324630737 + }, + "combine": { + "p50": 112.03200370073318, + "p90": 125.21600723266602, + "p95": 132.4480026960373, + "p99": 149.02399480342865 + }, + "roundtrip": { + "p50": 211.58400177955627, + "p90": 233.2800030708313, + "p95": 244.159996509552, + "p99": 292.03200340270996 + }, + "isolatedSum": { + "p50": 234.56000536680222, + "p90": 261.8880122900009, + "p95": 280.60799837112427, + "p99": 309.919998049736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1823392", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_c5b3365a", + "comparisonKey": "73e84f1c938d90c0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:44.997855+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272086516", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272086516", + "createdAt": "2026-06-27T00:03:05Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.92800265550613, + "p90": 98.62399846315384, + "p95": 109.27999764680862, + "p99": 182.23999440670013 + }, + "combine": { + "p50": 60.92799827456474, + "p90": 75.42400062084198, + "p95": 80.6720033288002, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 116.57600104808807, + "p90": 152.44799852371216, + "p95": 162.81600296497345, + "p99": 179.51999604701996 + }, + "isolatedSum": { + "p50": 125.85600093007088, + "p90": 174.04799908399582, + "p95": 189.95200097560883, + "p99": 278.78399193286896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 65.50399959087372, + "p90": 96.47999703884125, + "p95": 104.86400127410889, + "p99": 137.56799697875977 + }, + "combine": { + "p50": 59.808000922203064, + "p90": 72.83200323581696, + "p95": 78.84799689054489, + "p99": 92.19200164079666 + }, + "roundtrip": { + "p50": 110.97600311040878, + "p90": 140.00000059604645, + "p95": 150.87999403476715, + "p99": 177.72799730300903 + }, + "isolatedSum": { + "p50": 125.31200051307678, + "p90": 169.3120002746582, + "p95": 183.71199816465378, + "p99": 229.75999861955643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.18399846553802, + "p90": 93.63199770450592, + "p95": 108.67200046777725, + "p99": 126.01600587368011 + }, + "combine": { + "p50": 62.20800057053566, + "p90": 70.52800059318542, + "p95": 78.07999849319458, + "p99": 100.51199793815613 + }, + "roundtrip": { + "p50": 116.67200177907944, + "p90": 144.1279947757721, + "p95": 158.91200304031372, + "p99": 186.17600202560425 + }, + "isolatedSum": { + "p50": 135.39199903607368, + "p90": 164.15999829769135, + "p95": 186.75199896097183, + "p99": 226.52800381183624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.64000177383423, + "p90": 98.39999675750732, + "p95": 103.93600165843964, + "p99": 132.28799402713776 + }, + "combine": { + "p50": 60.99199876189232, + "p90": 72.06399738788605, + "p95": 79.52000200748444, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 118.94399672746658, + "p90": 150.30400454998016, + "p95": 160.3199988603592, + "p99": 178.78399789333344 + }, + "isolatedSum": { + "p50": 133.63200053572655, + "p90": 170.46399414539337, + "p95": 183.45600366592407, + "p99": 223.83999079465866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.4480003118515, + "p90": 85.9839990735054, + "p95": 96.99200093746185, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 77.11999863386154, + "p95": 83.74399691820145, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 118.40000003576279, + "p90": 138.11199367046356, + "p95": 145.11999487876892, + "p99": 157.18400478363037 + }, + "isolatedSum": { + "p50": 139.55199718475342, + "p90": 163.10399770736694, + "p95": 180.7359978556633, + "p99": 226.33600234985352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.86399668455124, + "p90": 97.31200337409973, + "p95": 106.36799782514572, + "p99": 120.25599926710129 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 82.78399705886841, + "p95": 87.80799806118011, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 152.63999998569489, + "p95": 163.10399770736694, + "p99": 197.37599790096283 + }, + "isolatedSum": { + "p50": 146.33599668741226, + "p90": 180.09600043296814, + "p95": 194.17599588632584, + "p99": 223.23200106620789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.8079993724823, + "p90": 111.68000102043152, + "p95": 120.99199742078781, + "p99": 207.61600136756897 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 93.75999867916107, + "p95": 102.24000364542007, + "p99": 131.1360001564026 + }, + "roundtrip": { + "p50": 155.96799552440643, + "p90": 171.23199999332428, + "p95": 179.9360066652298, + "p99": 195.93599438667297 + }, + "isolatedSum": { + "p50": 177.34400182962418, + "p90": 205.4399996995926, + "p95": 223.23200106620789, + "p99": 338.75200152397156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.49600076675415, + "p90": 133.40799510478973, + "p95": 137.472003698349, + "p99": 168.09600591659546 + }, + "combine": { + "p50": 108.51199924945831, + "p90": 121.37600034475327, + "p95": 125.18399953842163, + "p99": 135.74400544166565 + }, + "roundtrip": { + "p50": 205.76000213623047, + "p90": 222.78399765491486, + "p95": 227.84000635147095, + "p99": 288.2879972457886 + }, + "isolatedSum": { + "p50": 227.00800001621246, + "p90": 254.783995449543, + "p95": 262.65600323677063, + "p99": 303.8400113582611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1cebdc77", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", + "colorKey": "h200_c5b3365a", + "comparisonKey": "73e84f1c938d90c0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:04.169845+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fddabb3277bec", + "workloadId": "set:4:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271852422", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271852422", + "createdAt": "2026-06-26T23:55:40Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.27199864387512, + "p90": 103.64799946546555, + "p95": 125.791996717453, + "p99": 208.15999805927277 + }, + "combine": { + "p50": 61.95199862122536, + "p90": 75.45600086450577, + "p95": 80.6720033288002, + "p99": 99.07200187444687 + }, + "roundtrip": { + "p50": 117.37599968910217, + "p90": 144.83200013637543, + "p95": 152.73599326610565, + "p99": 179.58399653434753 + }, + "isolatedSum": { + "p50": 132.22399726510048, + "p90": 179.1040003299713, + "p95": 206.4640000462532, + "p99": 307.23199993371964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.26399940252304, + "p90": 98.11200201511383, + "p95": 106.175996363163, + "p99": 138.3039951324463 + }, + "combine": { + "p50": 63.90400230884552, + "p90": 78.43200117349625, + "p95": 83.99999886751175, + "p99": 94.11200135946274 + }, + "roundtrip": { + "p50": 119.48800086975098, + "p90": 151.16800367832184, + "p95": 161.53599321842194, + "p99": 214.4320011138916 + }, + "isolatedSum": { + "p50": 139.16800171136856, + "p90": 176.54400318861008, + "p95": 190.17599523067474, + "p99": 232.41599649190903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.17599987983704, + "p90": 105.98400235176086, + "p95": 113.11999708414078, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 88.76799792051315, + "p95": 93.28000247478485, + "p99": 116.57600104808807 + }, + "roundtrip": { + "p50": 134.49600338935852, + "p90": 162.432000041008, + "p95": 173.47200214862823, + "p99": 268.8640058040619 + }, + "isolatedSum": { + "p50": 154.33599799871445, + "p90": 194.75200027227402, + "p95": 206.39999955892563, + "p99": 249.7600018978119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.58400160074234, + "p90": 131.96800649166107, + "p95": 136.3839954137802, + "p99": 154.59200739860535 + }, + "combine": { + "p50": 109.31199789047241, + "p90": 120.67200243473053, + "p95": 125.69600343704224, + "p99": 135.3919953107834 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 222.91199862957, + "p95": 232.86400735378265, + "p99": 284.89598631858826 + }, + "isolatedSum": { + "p50": 228.89599949121475, + "p90": 252.6400089263916, + "p95": 262.07999885082245, + "p99": 289.98400270938873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-78ae7872", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_06aa1194", + "comparisonKey": "85dbd46cb77d1362", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:54.232728+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272090308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272090308", + "createdAt": "2026-06-27T00:03:13Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.21599805355072, + "p90": 83.55200290679932, + "p95": 92.83199906349182, + "p99": 110.75200140476227 + }, + "combine": { + "p50": 67.45599955320358, + "p90": 76.12799853086472, + "p95": 81.53600245714188, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 122.079998254776, + "p90": 140.4159963130951, + "p95": 148.25600385665894, + "p99": 178.3680021762848 + }, + "isolatedSum": { + "p50": 136.6719976067543, + "p90": 159.68000143766403, + "p95": 174.3680015206337, + "p99": 199.2960050702095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.03199714422226, + "p90": 80.57600259780884, + "p95": 86.40000224113464, + "p99": 97.34400361776352 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 75.13599842786789, + "p95": 79.0719985961914, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 120.7360029220581, + "p90": 138.49599659442902, + "p95": 162.01600432395935, + "p99": 265.21599292755127 + }, + "isolatedSum": { + "p50": 139.64799791574478, + "p90": 155.71200102567673, + "p95": 165.47200083732605, + "p99": 183.74400585889816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.30399954319, + "p90": 86.91199868917465, + "p95": 100.12800246477127, + "p99": 123.48800152540207 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 77.85599678754807, + "p95": 84.70399677753448, + "p99": 112.15999722480774 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 140.25600254535675, + "p95": 151.64799988269806, + "p99": 177.66399681568146 + }, + "isolatedSum": { + "p50": 142.815999686718, + "p90": 164.76799547672272, + "p95": 184.83199924230576, + "p99": 235.6479987502098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.65600222349167, + "p90": 88.28800171613693, + "p95": 97.98400104045868, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 69.56800073385239, + "p90": 78.87999713420868, + "p95": 83.16799998283386, + "p99": 94.84799951314926 + }, + "roundtrip": { + "p50": 126.36800110340118, + "p90": 164.57599401474, + "p95": 172.44799435138702, + "p99": 196.22400403022766 + }, + "isolatedSum": { + "p50": 144.22400295734406, + "p90": 167.1679988503456, + "p95": 181.15200102329254, + "p99": 216.12799912691116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.53600114583969, + "p90": 88.0960002541542, + "p95": 96.03200107812881, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 70.39999961853027, + "p90": 78.91199737787247, + "p95": 86.36800199747086, + "p99": 98.9760011434555 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 143.96800100803375, + "p95": 153.6960005760193, + "p99": 172.8000044822693 + }, + "isolatedSum": { + "p50": 143.93600076436996, + "p90": 167.00799763202667, + "p95": 182.40000307559967, + "p99": 220.2560007572174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.02399855852127, + "p90": 94.71999853849411, + "p95": 106.11200332641602, + "p99": 144.6399986743927 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 88.54400366544724, + "p95": 96.76799923181534, + "p99": 107.00800269842148 + }, + "roundtrip": { + "p50": 135.29600203037262, + "p90": 158.78400206565857, + "p95": 170.84799706935883, + "p99": 241.43999814987183 + }, + "isolatedSum": { + "p50": 157.72800147533417, + "p90": 183.26400220394135, + "p95": 202.88000255823135, + "p99": 251.64800137281418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.96000003814697, + "p90": 105.47199845314026, + "p95": 115.80800265073776, + "p99": 153.56799960136414 + }, + "combine": { + "p50": 86.87999844551086, + "p90": 96.03200107812881, + "p95": 102.33599692583084, + "p99": 112.67200112342834 + }, + "roundtrip": { + "p50": 158.4320068359375, + "p90": 171.26399278640747, + "p95": 179.967999458313, + "p99": 206.43199980258942 + }, + "isolatedSum": { + "p50": 179.83999848365784, + "p90": 201.50399953126907, + "p95": 218.1439995765686, + "p99": 266.2400007247925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.18399822711945, + "p90": 127.68000364303589, + "p95": 131.3599944114685, + "p99": 140.44800400733948 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 113.76000195741653, + "p95": 121.98399752378464, + "p99": 137.28000223636627 + }, + "roundtrip": { + "p50": 196.28800451755524, + "p90": 208.95999670028687, + "p95": 216.5759950876236, + "p99": 241.56799912452698 + }, + "isolatedSum": { + "p50": 221.50399535894394, + "p90": 241.44000560045242, + "p95": 253.34399193525314, + "p99": 277.72800624370575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fa5aaad", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_6a794fcd", + "comparisonKey": "50f5858697d33730", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:36.902996+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272056705", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272056705", + "createdAt": "2026-06-27T00:02:10Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.98399710655212, + "p90": 98.84800016880035, + "p95": 105.98400235176086, + "p99": 125.21600723266602 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 81.66400343179703, + "p95": 86.496002972126, + "p99": 102.88000106811523 + }, + "roundtrip": { + "p50": 119.93599683046341, + "p90": 147.93600142002106, + "p95": 157.53600001335144, + "p99": 168.09600591659546 + }, + "isolatedSum": { + "p50": 142.94400066137314, + "p90": 180.51200360059738, + "p95": 192.48000532388687, + "p99": 228.09600830078125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.08799773454666, + "p90": 88.73599767684937, + "p95": 100.09600222110748, + "p99": 118.20799857378006 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 77.08799839019775, + "p95": 82.84799754619598, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 123.3920007944107, + "p90": 151.296004652977, + "p95": 158.84800255298615, + "p99": 186.27199530601501 + }, + "isolatedSum": { + "p50": 141.4399966597557, + "p90": 165.82399606704712, + "p95": 182.94399976730347, + "p99": 209.82399582862854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.51200145483017, + "p90": 102.88000106811523, + "p95": 109.11999642848969, + "p99": 128.31999361515045 + }, + "combine": { + "p50": 69.82400268316269, + "p90": 81.44000172615051, + "p95": 86.75199747085571, + "p99": 98.04800152778625 + }, + "roundtrip": { + "p50": 126.14400684833527, + "p90": 157.6640009880066, + "p95": 167.84000396728516, + "p99": 190.88000059127808 + }, + "isolatedSum": { + "p50": 146.33600413799286, + "p90": 184.32000279426575, + "p95": 195.8719938993454, + "p99": 226.3679951429367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.9039968252182, + "p90": 97.4079966545105, + "p95": 101.88800096511841, + "p99": 117.60000139474869 + }, + "combine": { + "p50": 70.62400132417679, + "p90": 84.73599702119827, + "p95": 90.11200070381165, + "p99": 107.42399841547012 + }, + "roundtrip": { + "p50": 125.69600343704224, + "p90": 150.751993060112, + "p95": 158.30400586128235, + "p99": 175.4239946603775 + }, + "isolatedSum": { + "p50": 146.527998149395, + "p90": 182.14399367570877, + "p95": 192.00000166893005, + "p99": 225.0239998102188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.60800218582153, + "p90": 93.47199648618698, + "p95": 101.9200012087822, + "p99": 109.82400178909302 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 84.09599959850311, + "p95": 88.32000195980072, + "p99": 100.89600086212158 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 152.96000242233276, + "p95": 160.76800227165222, + "p99": 201.92000269889832 + }, + "isolatedSum": { + "p50": 147.87200093269348, + "p90": 177.5679960846901, + "p95": 190.24000316858292, + "p99": 210.7200026512146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.8719978928566, + "p90": 102.55999863147736, + "p95": 108.92800241708755, + "p99": 121.76000326871872 + }, + "combine": { + "p50": 78.43200117349625, + "p90": 91.839998960495, + "p95": 96.57599776983261, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 160.19199788570404, + "p95": 168.09600591659546, + "p99": 186.14399433135986 + }, + "isolatedSum": { + "p50": 162.30399906635284, + "p90": 194.39999759197235, + "p95": 205.50400018692017, + "p99": 229.88799959421158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.29600358009338, + "p90": 117.79200285673141, + "p95": 125.44000148773193, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 90.14400094747543, + "p90": 102.91200131177902, + "p95": 110.17599701881409, + "p99": 119.35999989509583 + }, + "roundtrip": { + "p50": 166.75199568271637, + "p90": 185.7600063085556, + "p95": 193.02399456501007, + "p99": 220.60799598693848 + }, + "isolatedSum": { + "p50": 189.44000452756882, + "p90": 220.70400416851044, + "p95": 235.61599850654602, + "p99": 273.376002907753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.4800016283989, + "p90": 135.93600690364838, + "p95": 141.184002161026, + "p99": 167.23200678825378 + }, + "combine": { + "p50": 115.68000167608261, + "p90": 127.29600071907043, + "p95": 131.99999928474426, + "p99": 150.78400075435638 + }, + "roundtrip": { + "p50": 216.95999801158905, + "p90": 232.80000686645508, + "p95": 238.27199637889862, + "p99": 261.02399826049805 + }, + "isolatedSum": { + "p50": 240.1600033044815, + "p90": 263.2320076227188, + "p95": 273.18400144577026, + "p99": 318.01600754261017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ffad9f17", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_b2ffaf91", + "comparisonKey": "b3b8e5cc27948267", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:43.326778+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272060649", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272060649", + "createdAt": "2026-06-27T00:02:17Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.9919970035553, + "p90": 96.73599898815155, + "p95": 102.7199998497963, + "p99": 128.83199751377106 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 81.05599880218506, + "p95": 86.40000224113464, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 122.30399996042252, + "p90": 153.85599434375763, + "p95": 167.23200678825378, + "p99": 196.03200256824493 + }, + "isolatedSum": { + "p50": 141.15199446678162, + "p90": 177.7919977903366, + "p95": 189.12000209093094, + "p99": 223.7439975142479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.02399724721909, + "p90": 95.77599912881851, + "p95": 103.74400019645691, + "p99": 121.72800302505493 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 80.73599636554718, + "p95": 87.39200234413147, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 121.34400010108948, + "p90": 149.1840034723282, + "p95": 156.76799416542053, + "p99": 182.36799538135529 + }, + "isolatedSum": { + "p50": 140.83199948072433, + "p90": 176.5119954943657, + "p95": 191.13600254058838, + "p99": 221.18400037288666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.40800017118454, + "p90": 92.54399687051773, + "p95": 101.15200281143188, + "p99": 184.28799510002136 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 82.40000158548355, + "p95": 88.03199976682663, + "p99": 100.44799745082855 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 158.59200060367584, + "p95": 172.2240000963211, + "p99": 259.42400097846985 + }, + "isolatedSum": { + "p50": 141.695998609066, + "p90": 174.94399845600128, + "p95": 189.18400257825851, + "p99": 284.7359925508499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.5600014925003, + "p90": 100.12800246477127, + "p95": 106.78400099277496, + "p99": 138.11199367046356 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 81.28000050783157, + "p95": 86.81599795818329, + "p99": 96.67199850082397 + }, + "roundtrip": { + "p50": 123.23199957609177, + "p90": 151.58399939537048, + "p95": 159.87199544906616, + "p99": 174.6560037136078 + }, + "isolatedSum": { + "p50": 143.64799857139587, + "p90": 181.40800297260284, + "p95": 193.59999895095825, + "p99": 234.78399217128754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.65600222349167, + "p90": 99.32799637317657, + "p95": 105.56799918413162, + "p99": 127.20000743865967 + }, + "combine": { + "p50": 69.88800317049026, + "p90": 83.10399949550629, + "p95": 88.639996945858, + "p99": 99.35999661684036 + }, + "roundtrip": { + "p50": 124.9919980764389, + "p90": 151.48800611495972, + "p95": 159.5200002193451, + "p99": 197.88800179958344 + }, + "isolatedSum": { + "p50": 144.54400539398193, + "p90": 182.43199586868286, + "p95": 194.20799612998962, + "p99": 226.56000405550003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.35200089216232, + "p90": 101.02400183677673, + "p95": 105.6319996714592, + "p99": 116.7680025100708 + }, + "combine": { + "p50": 76.80000364780426, + "p90": 88.86399865150452, + "p95": 94.17600184679031, + "p99": 101.56799852848053 + }, + "roundtrip": { + "p50": 135.04000008106232, + "p90": 155.29599785804749, + "p95": 165.50399363040924, + "p99": 190.43199717998505 + }, + "isolatedSum": { + "p50": 157.15200453996658, + "p90": 189.88800048828125, + "p95": 199.8080015182495, + "p99": 218.33600103855133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.120001912117, + "p90": 111.00800335407257, + "p95": 117.11999773979187, + "p99": 134.39999520778656 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 99.32799637317657, + "p95": 105.6319996714592, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 159.2320054769516, + "p90": 177.2480010986328, + "p95": 184.28799510002136, + "p99": 207.71199464797974 + }, + "isolatedSum": { + "p50": 184.51200425624847, + "p90": 210.33599972724915, + "p95": 222.75199741125107, + "p99": 255.5839940905571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.23199826478958, + "p90": 132.4159950017929, + "p95": 140.47999680042267, + "p99": 171.64799571037292 + }, + "combine": { + "p50": 102.84800082445145, + "p90": 114.07999694347382, + "p95": 119.1679984331131, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 195.90400159358978, + "p90": 210.11200547218323, + "p95": 217.15199947357178, + "p99": 243.74400079250336 + }, + "isolatedSum": { + "p50": 218.07999908924103, + "p90": 246.49599194526672, + "p95": 259.64799523353577, + "p99": 301.2479990720749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49529f9d", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_f2b19f62", + "comparisonKey": "cc27e02aea0a210a", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:04.313162+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:120a8dc1dba92ca9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272072315", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272072315", + "createdAt": "2026-06-27T00:02:38Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.95999675989151, + "p90": 100.73599964380264, + "p95": 110.88000237941742, + "p99": 152.99199521541595 + }, + "combine": { + "p50": 65.2799978852272, + "p90": 80.9599980711937, + "p95": 85.28000116348267, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 155.20000457763672, + "p95": 166.27199947834015, + "p99": 225.11999309062958 + }, + "isolatedSum": { + "p50": 138.2399946451187, + "p90": 181.69599771499634, + "p95": 196.16000354290009, + "p99": 255.16799837350845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.2479989528656, + "p90": 105.47199845314026, + "p95": 114.84800279140472, + "p99": 135.74400544166565 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 79.83999699354172, + "p95": 83.5840031504631, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 119.64800208806992, + "p90": 145.56799829006195, + "p95": 150.91200172901154, + "p99": 165.18400609493256 + }, + "isolatedSum": { + "p50": 140.86399972438812, + "p90": 185.31199544668198, + "p95": 198.43200594186783, + "p99": 228.7360057234764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.79200309515, + "p90": 102.88000106811523, + "p95": 112.0000034570694, + "p99": 131.8719983100891 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 78.8159966468811, + "p95": 83.29600095748901, + "p99": 102.08000242710114 + }, + "roundtrip": { + "p50": 120.60800194740295, + "p90": 144.44799721240997, + "p95": 152.67199277877808, + "p99": 166.59200191497803 + }, + "isolatedSum": { + "p50": 141.60000532865524, + "p90": 181.69599771499634, + "p95": 195.2960044145584, + "p99": 233.95200073719025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.79200309515, + "p90": 97.75999933481216, + "p95": 105.92000186443329, + "p99": 117.69600212574005 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 81.56800270080566, + "p95": 87.39200234413147, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 153.98399531841278, + "p95": 162.78399527072906, + "p99": 199.5519995689392 + }, + "isolatedSum": { + "p50": 141.85599982738495, + "p90": 179.32800203561783, + "p95": 193.31200420856476, + "p99": 222.1440002322197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.44000041484833, + "p90": 97.69599884748459, + "p95": 103.74400019645691, + "p99": 117.15199798345566 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 83.16799998283386, + "p95": 88.51200342178345, + "p99": 98.59199821949005 + }, + "roundtrip": { + "p50": 125.91999769210815, + "p90": 152.0320028066635, + "p95": 167.7439957857132, + "p99": 200.54399967193604 + }, + "isolatedSum": { + "p50": 143.42399686574936, + "p90": 180.86399883031845, + "p95": 192.25600361824036, + "p99": 215.7439962029457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.4400023818016, + "p90": 105.05600273609161, + "p95": 111.93600296974182, + "p99": 135.48800349235535 + }, + "combine": { + "p50": 76.12799853086472, + "p90": 88.60799670219421, + "p95": 92.41600334644318, + "p99": 124.06399846076965 + }, + "roundtrip": { + "p50": 136.4479959011078, + "p90": 159.04000401496887, + "p95": 166.81599617004395, + "p99": 204.12799715995789 + }, + "isolatedSum": { + "p50": 161.56800091266632, + "p90": 193.66399943828583, + "p95": 204.352006316185, + "p99": 259.552001953125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.55199807882309, + "p90": 115.84000289440155, + "p95": 124.79999661445618, + "p99": 159.5200002193451 + }, + "combine": { + "p50": 86.65599673986435, + "p90": 98.68799895048141, + "p95": 104.032002389431, + "p99": 120.28799951076508 + }, + "roundtrip": { + "p50": 162.23999857902527, + "p90": 177.7919977903366, + "p95": 186.62400543689728, + "p99": 207.58399367332458 + }, + "isolatedSum": { + "p50": 186.20799481868744, + "p90": 214.52800184488297, + "p95": 228.83199900388718, + "p99": 279.80799973011017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.16799908876419, + "p90": 138.08000087738037, + "p95": 149.59999918937683, + "p99": 160.35200655460358 + }, + "combine": { + "p50": 112.47999966144562, + "p90": 122.36800044775009, + "p95": 127.45599448680878, + "p99": 136.9280070066452 + }, + "roundtrip": { + "p50": 213.4079933166504, + "p90": 239.16800320148468, + "p95": 253.6959946155548, + "p99": 450.3040015697479 + }, + "isolatedSum": { + "p50": 235.6479987502098, + "p90": 260.44800132513046, + "p95": 277.0559936761856, + "p99": 297.2800135612488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-904f847b", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_bac4102c", + "comparisonKey": "6234055b9069f2f2", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:21.213602+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:120a8dc1dba92ca9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272075655", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272075655", + "createdAt": "2026-06-27T00:02:45Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.99999690055847, + "p90": 99.35999661684036, + "p95": 108.47999900579453, + "p99": 130.8480054140091 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 80.64000308513641, + "p95": 84.44800227880478, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 149.4079977273941, + "p95": 161.24799847602844, + "p99": 199.8080015182495 + }, + "isolatedSum": { + "p50": 139.13599401712418, + "p90": 179.99999970197678, + "p95": 192.9280012845993, + "p99": 238.97600173950195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.98399710655212, + "p90": 101.79200023412704, + "p95": 111.7120012640953, + "p99": 146.33600413799286 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 82.04799890518188, + "p95": 88.73599767684937, + "p99": 105.21599650382996 + }, + "roundtrip": { + "p50": 124.41600114107132, + "p90": 160.0320041179657, + "p95": 172.86400496959686, + "p99": 196.44799828529358 + }, + "isolatedSum": { + "p50": 142.71999895572662, + "p90": 183.83999913930893, + "p95": 200.44799894094467, + "p99": 251.55200064182281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.22399926185608, + "p90": 108.76800119876862, + "p95": 123.1359988451004, + "p99": 148.8640010356903 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 82.14399963617325, + "p95": 88.54400366544724, + "p99": 105.02400249242783 + }, + "roundtrip": { + "p50": 124.25599992275238, + "p90": 160.0320041179657, + "p95": 170.01600563526154, + "p99": 244.89599466323853 + }, + "isolatedSum": { + "p50": 144.96000111103058, + "p90": 190.91200083494186, + "p95": 211.68000251054764, + "p99": 253.88800352811813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.62400197982788, + "p90": 101.21600329875946, + "p95": 111.10399663448334, + "p99": 145.47200500965118 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 84.70399677753448, + "p95": 89.50400352478027, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 125.37600100040436, + "p90": 159.4880074262619, + "p95": 170.1119989156723, + "p99": 203.23200523853302 + }, + "isolatedSum": { + "p50": 143.96800100803375, + "p90": 185.92000007629395, + "p95": 200.6080001592636, + "p99": 249.92000311613083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.73600250482559, + "p90": 97.75999933481216, + "p95": 108.03200304508209, + "p99": 141.9840008020401 + }, + "combine": { + "p50": 70.36799937486649, + "p90": 88.28800171613693, + "p95": 94.68799829483032, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 127.00800597667694, + "p90": 156.12800419330597, + "p95": 166.9439971446991, + "p99": 198.33600521087646 + }, + "isolatedSum": { + "p50": 143.10400187969208, + "p90": 186.0480010509491, + "p95": 202.72000133991241, + "p99": 246.5279996395111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.98399841785431, + "p90": 106.65600001811981, + "p95": 116.22399836778641, + "p99": 165.69599509239197 + }, + "combine": { + "p50": 76.9599974155426, + "p90": 90.87999910116196, + "p95": 97.120001912117, + "p99": 118.23999881744385 + }, + "roundtrip": { + "p50": 135.74400544166565, + "p90": 164.48000073432922, + "p95": 176.70400440692902, + "p99": 220.22399306297302 + }, + "isolatedSum": { + "p50": 158.9439958333969, + "p90": 197.53599911928177, + "p95": 213.3440002799034, + "p99": 283.9359939098358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.21600264310837, + "p90": 125.50400197505951, + "p95": 140.99200069904327, + "p99": 185.85599958896637 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 105.53599894046783, + "p95": 113.0559965968132, + "p99": 125.63200294971466 + }, + "roundtrip": { + "p50": 159.7760021686554, + "p90": 186.65599822998047, + "p95": 201.53599977493286, + "p99": 221.69600427150726 + }, + "isolatedSum": { + "p50": 184.9920004606247, + "p90": 231.04000091552734, + "p95": 254.04799729585648, + "p99": 311.48800253868103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.40000003576279, + "p90": 141.7279988527298, + "p95": 151.296004652977, + "p99": 174.84800517559052 + }, + "combine": { + "p50": 103.74400019645691, + "p90": 121.21599912643433, + "p95": 128.60800325870514, + "p99": 147.13600277900696 + }, + "roundtrip": { + "p50": 198.08000326156616, + "p90": 219.7760045528412, + "p95": 227.55199670791626, + "p99": 265.3760015964508 + }, + "isolatedSum": { + "p50": 222.1440002322197, + "p90": 262.9439979791641, + "p95": 279.90400791168213, + "p99": 321.9840079545975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06bd64b9", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_1eda221e", + "comparisonKey": "00e2c45e1159b581", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:16.896756+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272045914", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272045914", + "createdAt": "2026-06-27T00:01:50Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.15999811887741, + "p90": 99.0080013871193, + "p95": 105.56799918413162, + "p99": 131.80799782276154 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 83.23200047016144, + "p95": 88.8959988951683, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 121.60000205039978, + "p90": 151.8079936504364, + "p95": 162.88000345230103, + "p99": 197.63199985027313 + }, + "isolatedSum": { + "p50": 140.86399972438812, + "p90": 182.24000185728073, + "p95": 194.46399807929993, + "p99": 249.2159977555275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.7600028514862, + "p90": 99.96800124645233, + "p95": 106.97600245475769, + "p99": 125.63200294971466 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 79.52000200748444, + "p95": 84.35200154781342, + "p99": 95.61599791049957 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 150.52799880504608, + "p95": 158.9760035276413, + "p99": 188.51199746131897 + }, + "isolatedSum": { + "p50": 141.34400337934494, + "p90": 179.48800325393677, + "p95": 191.3280040025711, + "p99": 221.24800086021423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.23999905586243, + "p90": 98.36799651384354, + "p95": 105.8880016207695, + "p99": 117.60000139474869 + }, + "combine": { + "p50": 68.57600063085556, + "p90": 81.82399719953537, + "p95": 86.496002972126, + "p99": 94.62399780750275 + }, + "roundtrip": { + "p50": 123.19999933242798, + "p90": 152.92799472808838, + "p95": 164.12800550460815, + "p99": 221.98399901390076 + }, + "isolatedSum": { + "p50": 142.815999686718, + "p90": 180.1919937133789, + "p95": 192.3840045928955, + "p99": 212.22399920225143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.06399804353714, + "p90": 117.8240031003952, + "p95": 132.03200697898865, + "p99": 183.45600366592407 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 85.02399921417236, + "p95": 89.66399729251862, + "p99": 100.3199964761734 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 152.8639942407608, + "p95": 160.96000373363495, + "p99": 184.1920018196106 + }, + "isolatedSum": { + "p50": 145.4399973154068, + "p90": 202.84800231456757, + "p95": 221.69600427150726, + "p99": 283.7760001420975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.67200267314911, + "p90": 107.10400342941284, + "p95": 114.20799791812897, + "p99": 128.9599984884262 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 88.76799792051315, + "p95": 96.00000083446503, + "p99": 114.75200206041336 + }, + "roundtrip": { + "p50": 128.31999361515045, + "p90": 158.6879938840866, + "p95": 168.89600455760956, + "p99": 192.89599359035492 + }, + "isolatedSum": { + "p50": 148.92800152301788, + "p90": 195.872001349926, + "p95": 210.207998752594, + "p99": 243.71200054883957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.39200168848038, + "p90": 103.80800068378448, + "p95": 109.43999886512756, + "p99": 126.71999633312225 + }, + "combine": { + "p50": 77.18399912118912, + "p90": 89.79199826717377, + "p95": 95.10400146245956, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 134.783998131752, + "p90": 157.79200196266174, + "p95": 167.13599860668182, + "p99": 210.94399690628052 + }, + "isolatedSum": { + "p50": 160.5760008096695, + "p90": 193.59999895095825, + "p95": 204.54400032758713, + "p99": 232.70399868488312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.56799721717834, + "p90": 113.63200098276138, + "p95": 120.2239990234375, + "p99": 133.4719955921173 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 101.72799974679947, + "p95": 105.6319996714592, + "p99": 116.48000031709671 + }, + "roundtrip": { + "p50": 157.9200029373169, + "p90": 181.34400248527527, + "p95": 187.42400407791138, + "p99": 211.87199652194977 + }, + "isolatedSum": { + "p50": 179.967999458313, + "p90": 215.36000072956085, + "p95": 225.8559986948967, + "p99": 249.95199590921402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.15199798345566, + "p90": 137.1839940547943, + "p95": 142.33599603176117, + "p99": 165.79200327396393 + }, + "combine": { + "p50": 106.84800148010254, + "p90": 119.32799965143204, + "p95": 122.81599640846252, + "p99": 133.53599607944489 + }, + "roundtrip": { + "p50": 197.56799936294556, + "p90": 213.85599672794342, + "p95": 221.3120013475418, + "p99": 245.37600576877594 + }, + "isolatedSum": { + "p50": 223.9999994635582, + "p90": 256.51199370622635, + "p95": 265.1519924402237, + "p99": 299.3279993534088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d6ef23b", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_c851a534", + "comparisonKey": "6b4f4d7f65293019", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:45.312905+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254392935", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", + "createdAt": "2026-06-26T17:28:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.11199808120728, + "p90": 94.11200135946274, + "p95": 104.35199737548828, + "p99": 138.0160003900528 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 78.72000336647034, + "p95": 83.48800241947174, + "p99": 105.72800040245056 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 144.31999623775482, + "p95": 156.3200056552887, + "p99": 193.53599846363068 + }, + "isolatedSum": { + "p50": 142.5279974937439, + "p90": 172.83200472593307, + "p95": 187.83999979496002, + "p99": 243.74400079250336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.33599978685379, + "p90": 99.42399710416794, + "p95": 109.66400057077408, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 83.00799876451492, + "p95": 90.40000289678574, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 122.43200093507767, + "p90": 144.6080058813095, + "p95": 154.62400019168854, + "p99": 173.69599640369415 + }, + "isolatedSum": { + "p50": 144.19200271368027, + "p90": 182.43199586868286, + "p95": 200.06400346755981, + "p99": 246.0480034351349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.97599720954895, + "p90": 95.29600292444229, + "p95": 104.12800312042236, + "p99": 139.74399864673615 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 81.63200318813324, + "p95": 88.22400122880936, + "p99": 119.4240003824234 + }, + "roundtrip": { + "p50": 123.74400347471237, + "p90": 150.36800503730774, + "p95": 160.3199988603592, + "p99": 204.8960030078888 + }, + "isolatedSum": { + "p50": 144.3839967250824, + "p90": 176.92800611257553, + "p95": 192.35200434923172, + "p99": 259.16799902915955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.78400319814682, + "p90": 92.25600212812424, + "p95": 102.91200131177902, + "p99": 123.16799908876419 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 81.95199817419052, + "p95": 87.48800307512283, + "p99": 100.51199793815613 + }, + "roundtrip": { + "p50": 124.03199821710587, + "p90": 147.20000326633453, + "p95": 153.9199948310852, + "p99": 180.00000715255737 + }, + "isolatedSum": { + "p50": 145.31200379133224, + "p90": 174.20800030231476, + "p95": 190.40000438690186, + "p99": 223.67999702692032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.18399846553802, + "p90": 92.83199906349182, + "p95": 103.61599922180176, + "p99": 195.93599438667297 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 86.33600175380707, + "p95": 92.03200042247772, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 129.72800433635712, + "p90": 161.31199896335602, + "p95": 172.86400496959686, + "p99": 215.10399878025055 + }, + "isolatedSum": { + "p50": 144.51199769973755, + "p90": 179.1680008172989, + "p95": 195.64799964427948, + "p99": 316.73599779605865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.2720006108284, + "p90": 100.80000013113022, + "p95": 108.92800241708755, + "p99": 134.88000631332397 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 89.40800279378891, + "p95": 94.97600048780441, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 130.8480054140091, + "p90": 154.33600544929504, + "p95": 164.73600268363953, + "p99": 204.0639966726303 + }, + "isolatedSum": { + "p50": 158.30399841070175, + "p90": 190.20800292491913, + "p95": 203.90400290489197, + "p99": 252.83200293779373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.32800251245499, + "p90": 110.04800349473953, + "p95": 116.86400324106216, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 98.36799651384354, + "p95": 104.70400005578995, + "p99": 124.92799758911133 + }, + "roundtrip": { + "p50": 156.031996011734, + "p90": 173.24799299240112, + "p95": 180.38399517536163, + "p99": 215.39199352264404 + }, + "isolatedSum": { + "p50": 178.6240041255951, + "p90": 208.41600000858307, + "p95": 221.5680032968521, + "p99": 271.7759907245636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.03199690580368, + "p90": 129.7599971294403, + "p95": 136.57599687576294, + "p99": 149.24800395965576 + }, + "combine": { + "p50": 103.42399775981903, + "p90": 116.54400080442429, + "p95": 123.3920007944107, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 192.54399836063385, + "p90": 208.8959962129593, + "p95": 215.64799547195435, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 219.4559946656227, + "p90": 246.3039979338646, + "p95": 259.96799767017365, + "p99": 291.1999970674515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0f126172", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", + "colorKey": "h200_a1e795ec", + "comparisonKey": "467cf4a4daff1cff", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:47.472039+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254443915", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", + "createdAt": "2026-06-26T17:29:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.95199686288834, + "p90": 88.0960002541542, + "p95": 97.24800288677216, + "p99": 108.25599730014801 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 81.60000294446945, + "p95": 87.26400136947632, + "p99": 97.28000313043594 + }, + "roundtrip": { + "p50": 125.2480000257492, + "p90": 149.63200688362122, + "p95": 157.85600244998932, + "p99": 175.04000663757324 + }, + "isolatedSum": { + "p50": 144.86400038003922, + "p90": 169.69600319862366, + "p95": 184.51200425624847, + "p99": 205.53600043058395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.23999905586243, + "p90": 91.00800007581711, + "p95": 98.88000041246414, + "p99": 130.23999333381653 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 79.71200346946716, + "p95": 85.50400286912918, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 123.6800029873848, + "p90": 142.07999408245087, + "p95": 152.99199521541595, + "p99": 184.35199558734894 + }, + "isolatedSum": { + "p50": 144.76799964904785, + "p90": 170.72000354528427, + "p95": 184.38400328159332, + "p99": 236.7039918899536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.03999769687653, + "p90": 97.9200005531311, + "p95": 108.47999900579453, + "p99": 140.09599387645721 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 81.34400099515915, + "p95": 86.496002972126, + "p99": 99.29600358009338 + }, + "roundtrip": { + "p50": 125.69600343704224, + "p90": 151.36000514030457, + "p95": 159.55199301242828, + "p99": 178.3359944820404 + }, + "isolatedSum": { + "p50": 145.1519951224327, + "p90": 179.26400154829025, + "p95": 194.97600197792053, + "p99": 239.3919974565506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.56800138950348, + "p90": 94.17600184679031, + "p95": 102.62399911880493, + "p99": 126.14400684833527 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 82.04799890518188, + "p95": 86.43200248479843, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 125.69600343704224, + "p90": 148.0640023946762, + "p95": 156.76799416542053, + "p99": 182.72000551223755 + }, + "isolatedSum": { + "p50": 144.28800344467163, + "p90": 176.2240007519722, + "p95": 189.05600160360336, + "p99": 222.6240038871765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.46400076150894, + "p90": 90.71999788284302, + "p95": 96.44799679517746, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 84.70399677753448, + "p95": 91.16800129413605, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 129.60000336170197, + "p90": 153.6960005760193, + "p95": 161.3440066576004, + "p99": 196.28800451755524 + }, + "isolatedSum": { + "p50": 150.4959985613823, + "p90": 175.4239946603775, + "p95": 187.6159980893135, + "p99": 216.73599630594254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.20000022649765, + "p90": 100.12800246477127, + "p95": 107.45599865913391, + "p99": 122.3360002040863 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 89.88799899816513, + "p95": 95.36000341176987, + "p99": 100.54399818181992 + }, + "roundtrip": { + "p50": 142.17600226402283, + "p90": 155.45600652694702, + "p95": 165.3439998626709, + "p99": 182.0800006389618 + }, + "isolatedSum": { + "p50": 163.9999970793724, + "p90": 190.0160014629364, + "p95": 202.81600207090378, + "p99": 222.87999838590622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.61599987745285, + "p90": 121.0239976644516, + "p95": 127.07200646400452, + "p99": 148.73600006103516 + }, + "combine": { + "p50": 95.87199985980988, + "p90": 105.3759977221489, + "p95": 112.60800063610077, + "p99": 123.29600006341934 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 191.80800020694733, + "p95": 203.5840004682541, + "p99": 225.98400712013245 + }, + "isolatedSum": { + "p50": 203.48799973726273, + "p90": 226.3999953866005, + "p95": 239.68000710010529, + "p99": 272.0320001244545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.66399705410004, + "p90": 146.04799449443817, + "p95": 152.41600573062897, + "p99": 162.56000101566315 + }, + "combine": { + "p50": 118.52800101041794, + "p90": 127.68000364303589, + "p95": 130.91200590133667, + "p99": 144.67200636863708 + }, + "roundtrip": { + "p50": 225.92000663280487, + "p90": 240.48000574111938, + "p95": 251.3279914855957, + "p99": 700.223982334137 + }, + "isolatedSum": { + "p50": 252.19199806451797, + "p90": 273.72799813747406, + "p95": 283.32801163196564, + "p99": 307.23200738430023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e3ecfeb", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", + "colorKey": "h200_0a93a01f", + "comparisonKey": "c7e35a057338b2fa", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:04.173894+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254452252", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", + "createdAt": "2026-06-26T17:29:31Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.27199929952621, + "p90": 108.83200168609619, + "p95": 118.49600076675415, + "p99": 155.5200070142746 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 84.03199911117554, + "p95": 90.20800143480301, + "p99": 114.88000303506851 + }, + "roundtrip": { + "p50": 123.07199835777283, + "p90": 153.08800339698792, + "p95": 165.8560037612915, + "p99": 205.9199959039688 + }, + "isolatedSum": { + "p50": 142.65599846839905, + "p90": 192.86400079727173, + "p95": 208.70400220155716, + "p99": 270.4000100493431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.95199686288834, + "p90": 97.82399982213974, + "p95": 106.6880002617836, + "p99": 132.9919993877411 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 80.51200211048126, + "p95": 85.37600189447403, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 123.36000055074692, + "p90": 150.176003575325, + "p95": 158.4639996290207, + "p99": 181.63199722766876 + }, + "isolatedSum": { + "p50": 142.59199798107147, + "p90": 178.336001932621, + "p95": 192.06400215625763, + "p99": 231.48799687623978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.5040009021759, + "p90": 95.551997423172, + "p95": 104.86400127410889, + "p99": 123.4240010380745 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 78.46400141716003, + "p95": 84.95999872684479, + "p99": 125.2799928188324 + }, + "roundtrip": { + "p50": 122.78400361537933, + "p90": 150.65599977970123, + "p95": 159.07199680805206, + "p99": 200.51200687885284 + }, + "isolatedSum": { + "p50": 141.31200313568115, + "p90": 174.01599884033203, + "p95": 189.82400000095367, + "p99": 248.7039938569069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.23199915885925, + "p90": 103.32799702882767, + "p95": 111.87200248241425, + "p99": 143.26399564743042 + }, + "combine": { + "p50": 69.60000097751617, + "p90": 85.79199761152267, + "p95": 91.71199798583984, + "p99": 124.12799894809723 + }, + "roundtrip": { + "p50": 126.36800110340118, + "p90": 160.12799739837646, + "p95": 167.64800250530243, + "p99": 193.2159960269928 + }, + "isolatedSum": { + "p50": 144.83200013637543, + "p90": 189.11999464035034, + "p95": 203.5840004682541, + "p99": 267.39199459552765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.40800082683563, + "p90": 104.63999956846237, + "p95": 113.43999952077866, + "p99": 144.0960019826889 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 87.23200112581253, + "p95": 90.94399958848953, + "p99": 101.1200025677681 + }, + "roundtrip": { + "p50": 127.6479959487915, + "p90": 161.85599565505981, + "p95": 175.7120043039322, + "p99": 230.27199506759644 + }, + "isolatedSum": { + "p50": 147.93600142002106, + "p90": 191.8720006942749, + "p95": 204.3839991092682, + "p99": 245.216004550457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.0719992518425, + "p90": 109.50399935245514, + "p95": 115.61600118875504, + "p99": 128.1599998474121 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 91.64799749851227, + "p95": 95.61599791049957, + "p99": 112.73600161075592 + }, + "roundtrip": { + "p50": 132.60799646377563, + "p90": 157.0879966020584, + "p95": 165.0560051202774, + "p99": 194.20799612998962 + }, + "isolatedSum": { + "p50": 160.41599959135056, + "p90": 201.1519968509674, + "p95": 211.2319990992546, + "p99": 240.89600145816803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.15200215578079, + "p90": 111.35999858379364, + "p95": 121.31199985742569, + "p99": 134.8479986190796 + }, + "combine": { + "p50": 87.5839963555336, + "p90": 99.80800002813339, + "p95": 104.06400263309479, + "p99": 116.95999652147293 + }, + "roundtrip": { + "p50": 161.9199961423874, + "p90": 177.72799730300903, + "p95": 184.67199802398682, + "p99": 235.61599850654602 + }, + "isolatedSum": { + "p50": 184.7359985113144, + "p90": 211.16799861192703, + "p95": 225.37600249052048, + "p99": 251.80799514055252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.97599762678146, + "p90": 147.10399508476257, + "p95": 156.25600516796112, + "p99": 183.07200074195862 + }, + "combine": { + "p50": 110.49599945545197, + "p90": 123.87199699878693, + "p95": 129.40800189971924, + "p99": 150.751993060112 + }, + "roundtrip": { + "p50": 208.73600244522095, + "p90": 225.43999552726746, + "p95": 233.024001121521, + "p99": 256.415992975235 + }, + "isolatedSum": { + "p50": 233.47199708223343, + "p90": 270.9759920835495, + "p95": 285.66400706768036, + "p99": 333.8239938020706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9efea369", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", + "colorKey": "h200_993777bf", + "comparisonKey": "cdec001c60a84b85", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:46:59.245966+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255303840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", + "createdAt": "2026-06-26T17:45:35Z", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.89600372314453, + "p90": 99.45599734783173, + "p95": 108.73600095510483, + "p99": 128.86400520801544 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 78.3040001988411, + "p95": 82.46400207281113, + "p99": 102.65599936246872 + }, + "roundtrip": { + "p50": 119.32799965143204, + "p90": 147.77599275112152, + "p95": 155.07200360298157, + "p99": 171.03999853134155 + }, + "isolatedSum": { + "p50": 140.0960013270378, + "p90": 177.75999754667282, + "p95": 191.20000302791595, + "p99": 231.52000457048416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.18399846553802, + "p90": 94.27200257778168, + "p95": 104.5759990811348, + "p99": 122.68800288438797 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 81.15199953317642, + "p95": 86.17600053548813, + "p99": 113.3119985461235 + }, + "roundtrip": { + "p50": 120.31999975442886, + "p90": 147.45600521564484, + "p95": 157.82399475574493, + "p99": 190.08000195026398 + }, + "isolatedSum": { + "p50": 141.27999544143677, + "p90": 175.4240021109581, + "p95": 190.75199961662292, + "p99": 236.00000143051147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.62400263547897, + "p90": 130.5920034646988, + "p95": 144.54400539398193, + "p99": 178.847998380661 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 80.51200211048126, + "p95": 87.87199854850769, + "p99": 104.19200360774994 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 154.14400398731232, + "p95": 165.15199840068817, + "p99": 194.68800723552704 + }, + "isolatedSum": { + "p50": 147.71199971437454, + "p90": 211.10400557518005, + "p95": 232.41600394248962, + "p99": 283.04000198841095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.46400076150894, + "p90": 99.39199686050415, + "p95": 109.76000130176544, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 83.64800363779068, + "p95": 90.14400094747543, + "p99": 115.35999923944473 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 155.7759940624237, + "p95": 170.56000232696533, + "p99": 186.91200017929077 + }, + "isolatedSum": { + "p50": 143.23200285434723, + "p90": 183.04000049829483, + "p95": 199.90400224924088, + "p99": 256.00000470876694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.25599950551987, + "p90": 106.9440022110939, + "p95": 120.7360029220581, + "p99": 149.24800395965576 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 85.24800091981888, + "p95": 90.04800021648407, + "p99": 104.5759990811348 + }, + "roundtrip": { + "p50": 129.98400628566742, + "p90": 161.05599701404572, + "p95": 173.8560050725937, + "p99": 205.21600544452667 + }, + "isolatedSum": { + "p50": 146.7840000987053, + "p90": 192.19200313091278, + "p95": 210.78400313854218, + "p99": 253.82400304079056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.91999793052673, + "p90": 99.07200187444687, + "p95": 107.04000294208527, + "p99": 128.57599556446075 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 89.63199704885483, + "p95": 96.54399752616882, + "p99": 106.08000308275223 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 156.76799416542053, + "p95": 167.29600727558136, + "p99": 217.3440009355545 + }, + "isolatedSum": { + "p50": 157.95199573040009, + "p90": 188.7039989233017, + "p95": 203.5840004682541, + "p99": 234.65599864721298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.79200023412704, + "p90": 136.6720050573349, + "p95": 146.36799693107605, + "p99": 175.10400712490082 + }, + "combine": { + "p50": 93.44000369310379, + "p90": 112.76800185441971, + "p95": 117.15199798345566, + "p99": 131.71200454235077 + }, + "roundtrip": { + "p50": 165.43999314308167, + "p90": 204.44799959659576, + "p95": 212.38400042057037, + "p99": 240.03200232982635 + }, + "isolatedSum": { + "p50": 195.23200392723083, + "p90": 249.4400069117546, + "p95": 263.5199949145317, + "p99": 306.8160116672516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.68000167608261, + "p90": 135.29600203037262, + "p95": 142.17600226402283, + "p99": 160.64000129699707 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 118.04799735546112, + "p95": 122.68800288438797, + "p99": 147.64800667762756 + }, + "roundtrip": { + "p50": 194.97600197792053, + "p90": 212.64000236988068, + "p95": 220.19200026988983, + "p99": 234.78400707244873 + }, + "isolatedSum": { + "p50": 220.64000368118286, + "p90": 253.34399938583374, + "p95": 264.8640051484108, + "p99": 308.28800797462463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cee2e19b", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_edd92e38", + "comparisonKey": "4a9eb2a61bfd9462", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:08.901856+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254409438", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", + "createdAt": "2026-06-26T17:28:41Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.97600269317627, + "p90": 86.40000224113464, + "p95": 94.14400160312653, + "p99": 136.9599997997284 + }, + "combine": { + "p50": 69.21599805355072, + "p90": 82.04799890518188, + "p95": 87.20000088214874, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 109.98400300741196, + "p90": 133.08799266815186, + "p95": 140.8960074186325, + "p99": 178.27199399471283 + }, + "isolatedSum": { + "p50": 132.192000746727, + "p90": 168.44800114631653, + "p95": 181.34400248527527, + "p99": 235.45599728822708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 64.38399851322174, + "p90": 88.73599767684937, + "p95": 94.87999975681305, + "p99": 119.48800086975098 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 83.52000266313553, + "p95": 88.95999938249588, + "p99": 107.10400342941284 + }, + "roundtrip": { + "p50": 110.20799726247787, + "p90": 138.2720023393631, + "p95": 145.37599682807922, + "p99": 175.55199563503265 + }, + "isolatedSum": { + "p50": 133.66399705410004, + "p90": 172.2560003399849, + "p95": 183.83999913930893, + "p99": 226.59200429916382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 64.41599875688553, + "p90": 90.52799642086029, + "p95": 101.88800096511841, + "p99": 132.28799402713776 + }, + "combine": { + "p50": 70.62400132417679, + "p90": 85.34400165081024, + "p95": 90.71999788284302, + "p99": 102.27199643850327 + }, + "roundtrip": { + "p50": 113.43999952077866, + "p90": 141.79199934005737, + "p95": 148.22399616241455, + "p99": 183.58400464057922 + }, + "isolatedSum": { + "p50": 135.04000008106232, + "p90": 175.87199807167053, + "p95": 192.60799884796143, + "p99": 234.55999046564102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 63.1679967045784, + "p90": 82.75199681520462, + "p95": 87.96799927949905, + "p99": 107.744000852108 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 85.1840004324913, + "p95": 90.46400338411331, + "p99": 100.99200159311295 + }, + "roundtrip": { + "p50": 112.44799941778183, + "p90": 139.20000195503235, + "p95": 152.38399803638458, + "p99": 206.7520022392273 + }, + "isolatedSum": { + "p50": 133.02399963140488, + "p90": 167.93599724769592, + "p95": 178.43200266361237, + "p99": 208.73600244522095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.35200133919716, + "p90": 85.02399921417236, + "p95": 91.67999774217606, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 86.87999844551086, + "p95": 89.82399851083755, + "p99": 99.35999661684036 + }, + "roundtrip": { + "p50": 116.03199690580368, + "p90": 141.34399592876434, + "p95": 148.3519971370697, + "p99": 184.9920004606247 + }, + "isolatedSum": { + "p50": 130.68800047039986, + "p90": 171.90399765968323, + "p95": 181.5039962530136, + "p99": 210.4959934949875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.74400240182877, + "p90": 90.71999788284302, + "p95": 96.73599898815155, + "p99": 118.23999881744385 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 93.05600076913834, + "p95": 97.69599884748459, + "p99": 108.92800241708755 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 149.05600249767303, + "p95": 159.61599349975586, + "p99": 184.12800133228302 + }, + "isolatedSum": { + "p50": 149.4080051779747, + "p90": 183.77599865198135, + "p95": 194.43199783563614, + "p99": 227.1680012345314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 82.65600353479385, + "p90": 100.3199964761734, + "p95": 109.15199667215347, + "p99": 139.39200341701508 + }, + "combine": { + "p50": 91.45600348711014, + "p90": 106.52799904346466, + "p95": 114.30399864912033, + "p99": 132.22399353981018 + }, + "roundtrip": { + "p50": 147.42399752140045, + "p90": 165.3439998626709, + "p95": 174.20800030231476, + "p99": 198.65599274635315 + }, + "isolatedSum": { + "p50": 174.112007021904, + "p90": 206.84799551963806, + "p95": 223.4559953212738, + "p99": 271.61599695682526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.12000322341919, + "p90": 118.40000003576279, + "p95": 122.81599640846252, + "p99": 147.32800424098969 + }, + "combine": { + "p50": 104.73600029945374, + "p90": 122.11199849843979, + "p95": 126.75200402736664, + "p99": 138.84800672531128 + }, + "roundtrip": { + "p50": 184.38400328159332, + "p90": 200.41599869728088, + "p95": 207.96799659729004, + "p99": 272.44800329208374 + }, + "isolatedSum": { + "p50": 209.85600352287292, + "p90": 240.51199853420258, + "p95": 249.56800043582916, + "p99": 286.17601096630096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a74732f", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_76bb7d5d", + "comparisonKey": "b4a52819ec3c25b8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:31.596673+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271608834", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271608834", + "createdAt": "2026-06-26T23:48:07Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.144000083208084, + "p90": 86.62399649620056, + "p95": 98.49599748849869, + "p99": 125.5359947681427 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 84.41600203514099, + "p95": 92.83199906349182, + "p99": 123.07199835777283 + }, + "roundtrip": { + "p50": 109.31199789047241, + "p90": 135.29600203037262, + "p95": 143.77599954605103, + "p99": 159.84000265598297 + }, + "isolatedSum": { + "p50": 130.68800047039986, + "p90": 171.03999853134155, + "p95": 191.3279965519905, + "p99": 248.60799312591553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.74400109052658, + "p90": 91.26400202512741, + "p95": 99.87200051546097, + "p99": 171.9679981470108 + }, + "combine": { + "p50": 70.81600278615952, + "p90": 194.75199282169342, + "p95": 206.94400370121002, + "p99": 256.9279968738556 + }, + "roundtrip": { + "p50": 110.04800349473953, + "p90": 140.1599943637848, + "p95": 147.13600277900696, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 134.5600038766861, + "p90": 286.01599484682083, + "p95": 306.816004216671, + "p99": 428.8959950208664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 62.94400244951248, + "p90": 80.51200211048126, + "p95": 89.02399986982346, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 79.8719972372055, + "p95": 88.54400366544724, + "p99": 100.54399818181992 + }, + "roundtrip": { + "p50": 111.16799712181091, + "p90": 139.80799913406372, + "p95": 148.41599762439728, + "p99": 167.07199811935425 + }, + "isolatedSum": { + "p50": 131.32800161838531, + "p90": 160.38399934768677, + "p95": 177.5680035352707, + "p99": 211.93599700927734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.88800185918808, + "p90": 83.16799998283386, + "p95": 92.51199662685394, + "p99": 104.06400263309479 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 82.84799754619598, + "p95": 88.639996945858, + "p99": 105.05600273609161 + }, + "roundtrip": { + "p50": 110.84800213575363, + "p90": 140.79999923706055, + "p95": 148.0640023946762, + "p99": 159.2639982700348 + }, + "isolatedSum": { + "p50": 130.560003221035, + "p90": 166.01599752902985, + "p95": 181.15199357271194, + "p99": 209.1200053691864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.84000116586685, + "p90": 82.17599987983704, + "p95": 92.32000261545181, + "p99": 105.92000186443329 + }, + "combine": { + "p50": 69.72800195217133, + "p90": 84.19200032949448, + "p95": 90.68799763917923, + "p99": 106.91200196743011 + }, + "roundtrip": { + "p50": 112.12799698114395, + "p90": 134.62400436401367, + "p95": 145.9839940071106, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 129.56800311803818, + "p90": 166.3680002093315, + "p95": 183.00800025463104, + "p99": 212.8320038318634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.20799815654755, + "p90": 94.08000111579895, + "p95": 101.15200281143188, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 91.2960022687912, + "p95": 97.43999689817429, + "p99": 105.27999699115753 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 148.3519971370697, + "p95": 155.29599785804749, + "p99": 175.135999917984 + }, + "isolatedSum": { + "p50": 146.84800058603287, + "p90": 185.37600338459015, + "p95": 198.59199970960617, + "p99": 223.4559953212738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.43200248479843, + "p90": 99.48799759149551, + "p95": 106.84800148010254, + "p99": 127.42400169372559 + }, + "combine": { + "p50": 85.82399785518646, + "p90": 96.63999825716019, + "p95": 104.76800054311752, + "p99": 113.21599781513214 + }, + "roundtrip": { + "p50": 147.8399932384491, + "p90": 164.5440012216568, + "p95": 169.95200514793396, + "p99": 197.53600656986237 + }, + "isolatedSum": { + "p50": 172.2560003399849, + "p90": 196.1279958486557, + "p95": 211.61600202322006, + "p99": 240.63999950885773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.7760004401207, + "p90": 118.9119964838028, + "p95": 127.16799974441528, + "p99": 134.97599959373474 + }, + "combine": { + "p50": 105.15200346708298, + "p90": 119.00799721479416, + "p95": 124.35200065374374, + "p99": 139.55199718475342 + }, + "roundtrip": { + "p50": 185.2799952030182, + "p90": 201.7280012369156, + "p95": 207.39200711250305, + "p99": 224.95999932289124 + }, + "isolatedSum": { + "p50": 208.92800390720367, + "p90": 237.91999369859695, + "p95": 251.52000039815903, + "p99": 274.52799677848816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-274a06b0", + "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_c9aeae24", + "comparisonKey": "0abd2163f516521c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:44.931546+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271645585", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271645585", + "createdAt": "2026-06-26T23:49:15Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 37.567999213933945, + "p90": 48.0320006608963, + "p95": 52.41600051522255, + "p99": 62.33600154519081 + }, + "combine": { + "p50": 33.663999289274216, + "p90": 44.38399896025658, + "p95": 46.879999339580536, + "p99": 61.85600161552429 + }, + "roundtrip": { + "p50": 51.231998950242996, + "p90": 70.14399766921997, + "p95": 77.31200009584427, + "p99": 100.0640019774437 + }, + "isolatedSum": { + "p50": 71.23199850320816, + "p90": 92.41599962115288, + "p95": 99.29599985480309, + "p99": 124.1920031607151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.88800165057182, + "p90": 49.15200173854828, + "p95": 55.87200075387955, + "p99": 76.89599692821503 + }, + "combine": { + "p50": 32.896000891923904, + "p90": 43.83999854326248, + "p95": 47.07200080156326, + "p99": 67.74400174617767 + }, + "roundtrip": { + "p50": 51.00800096988678, + "p90": 67.9360032081604, + "p95": 74.20799881219864, + "p99": 96.83199971914291 + }, + "isolatedSum": { + "p50": 70.78400254249573, + "p90": 92.99200028181076, + "p95": 102.94400155544281, + "p99": 144.6399986743927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 37.53599897027016, + "p90": 44.95999962091446, + "p95": 51.61599814891815, + "p99": 66.30399823188782 + }, + "combine": { + "p50": 29.791999608278275, + "p90": 39.16800022125244, + "p95": 44.064000248909, + "p99": 53.63199859857559 + }, + "roundtrip": { + "p50": 51.13599821925163, + "p90": 63.519999384880066, + "p95": 71.77600264549255, + "p99": 81.34400099515915 + }, + "isolatedSum": { + "p50": 67.32799857854843, + "p90": 84.1279998421669, + "p95": 95.67999839782715, + "p99": 119.93599683046341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 38.27200084924698, + "p90": 51.32799968123436, + "p95": 57.08799883723259, + "p99": 66.97600334882736 + }, + "combine": { + "p50": 34.623999148607254, + "p90": 44.03200000524521, + "p95": 46.62400111556053, + "p99": 54.55999821424484 + }, + "roundtrip": { + "p50": 55.39200082421303, + "p90": 67.58400052785873, + "p95": 75.42400062084198, + "p99": 95.0080007314682 + }, + "isolatedSum": { + "p50": 72.89599999785423, + "p90": 95.35999968647957, + "p95": 103.71199995279312, + "p99": 121.5360015630722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 38.816001266241074, + "p90": 54.17599901556969, + "p95": 57.72799998521805, + "p99": 75.00799745321274 + }, + "combine": { + "p50": 36.288000643253326, + "p90": 46.01600021123886, + "p95": 48.00000041723251, + "p99": 69.47200000286102 + }, + "roundtrip": { + "p50": 59.967998415231705, + "p90": 73.05599749088287, + "p95": 77.2159993648529, + "p99": 92.12800115346909 + }, + "isolatedSum": { + "p50": 75.1040019094944, + "p90": 100.19199922680855, + "p95": 105.72800040245056, + "p99": 144.47999745607376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.40799930691719, + "p90": 55.32800033688545, + "p95": 60.15999987721443, + "p99": 70.88000327348709 + }, + "combine": { + "p50": 43.87199878692627, + "p90": 53.53600159287453, + "p95": 55.32800033688545, + "p99": 67.9360032081604 + }, + "roundtrip": { + "p50": 72.35199958086014, + "p90": 82.8159973025322, + "p95": 86.01599931716919, + "p99": 98.88000041246414 + }, + "isolatedSum": { + "p50": 89.27999809384346, + "p90": 108.86400192975998, + "p95": 115.48800021409988, + "p99": 138.8160064816475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.296000093221664, + "p90": 66.6240006685257, + "p95": 70.36799937486649, + "p99": 88.16000074148178 + }, + "combine": { + "p50": 59.07199904322624, + "p90": 67.71200150251389, + "p95": 70.43199986219406, + "p99": 79.3600007891655 + }, + "roundtrip": { + "p50": 97.34400361776352, + "p90": 109.3439981341362, + "p95": 115.32799899578094, + "p99": 128.12800705432892 + }, + "isolatedSum": { + "p50": 114.3679991364479, + "p90": 134.33600217103958, + "p95": 140.79999923706055, + "p99": 167.52000153064728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.05599880218506, + "p90": 91.26400202512741, + "p95": 95.77599912881851, + "p99": 104.38399761915207 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 98.36799651384354, + "p95": 102.84800082445145, + "p99": 111.96800321340561 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 162.88000345230103, + "p95": 168.16000640392303, + "p99": 178.24000120162964 + }, + "isolatedSum": { + "p50": 167.4560010433197, + "p90": 189.63199853897095, + "p95": 198.62399995326996, + "p99": 216.35200083255768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-81e223f4", + "identity": "h200|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_7cfa04c4", + "comparisonKey": "72cd529af4968fe8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:48.529187+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271650161", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271650161", + "createdAt": "2026-06-26T23:49:22Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 38.55999931693077, + "p90": 52.25599929690361, + "p95": 57.69599974155426, + "p99": 68.70400160551071 + }, + "combine": { + "p50": 33.440001308918, + "p90": 46.23999819159508, + "p95": 50.36799982190132, + "p99": 62.912002205848694 + }, + "roundtrip": { + "p50": 52.70399898290634, + "p90": 70.43199986219406, + "p95": 77.85599678754807, + "p99": 90.27200192213058 + }, + "isolatedSum": { + "p50": 72.00000062584877, + "p90": 98.49599748849869, + "p95": 108.06399956345558, + "p99": 131.6160038113594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.76000067591667, + "p90": 48.25599864125252, + "p95": 55.93600124120712, + "p99": 79.68000322580338 + }, + "combine": { + "p50": 32.80000016093254, + "p90": 41.120000183582306, + "p95": 44.863998889923096, + "p99": 49.8879998922348 + }, + "roundtrip": { + "p50": 52.83199995756149, + "p90": 65.88800251483917, + "p95": 71.80800288915634, + "p99": 80.60800284147263 + }, + "isolatedSum": { + "p50": 70.56000083684921, + "p90": 89.37599882483482, + "p95": 100.80000013113022, + "p99": 129.56800311803818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 38.495998829603195, + "p90": 52.44800075888634, + "p95": 56.543998420238495, + "p99": 76.4480009675026 + }, + "combine": { + "p50": 33.055998384952545, + "p90": 44.16000097990036, + "p95": 45.951999723911285, + "p99": 53.568001836538315 + }, + "roundtrip": { + "p50": 52.70399898290634, + "p90": 64.2239972949028, + "p95": 71.96799665689468, + "p99": 81.53600245714188 + }, + "isolatedSum": { + "p50": 71.55199721455574, + "p90": 96.6080017387867, + "p95": 102.49599814414978, + "p99": 130.0160028040409 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 39.07199949026108, + "p90": 52.960000932216644, + "p95": 56.992001831531525, + "p99": 65.43999910354614 + }, + "combine": { + "p50": 34.04799848794937, + "p90": 44.19200122356415, + "p95": 46.1760014295578, + "p99": 57.472001761198044 + }, + "roundtrip": { + "p50": 54.11199852824211, + "p90": 68.60800087451935, + "p95": 74.78400319814682, + "p99": 85.28000116348267 + }, + "isolatedSum": { + "p50": 73.11999797821045, + "p90": 97.15200215578079, + "p95": 103.16800326108932, + "p99": 122.91200086474419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 40.12800008058548, + "p90": 55.00800162553787, + "p95": 59.29600074887276, + "p99": 66.81600213050842 + }, + "combine": { + "p50": 38.047999143600464, + "p90": 49.82399940490723, + "p95": 52.799999713897705, + "p99": 63.19999694824219 + }, + "roundtrip": { + "p50": 61.5679994225502, + "p90": 75.48800110816956, + "p95": 82.36800134181976, + "p99": 96.89600020647049 + }, + "isolatedSum": { + "p50": 78.17599922418594, + "p90": 104.8320010304451, + "p95": 112.09600046277046, + "p99": 130.0159990787506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 46.23999819159508, + "p90": 56.57599866390228, + "p95": 62.30400130152702, + "p99": 70.8480030298233 + }, + "combine": { + "p50": 43.96799951791763, + "p90": 53.75999957323074, + "p95": 58.33600088953972, + "p99": 61.216000467538834 + }, + "roundtrip": { + "p50": 71.19999825954437, + "p90": 80.86399734020233, + "p95": 85.28000116348267, + "p99": 93.21600198745728 + }, + "isolatedSum": { + "p50": 90.20799770951271, + "p90": 110.33599823713303, + "p95": 120.64000219106674, + "p99": 132.06400349736214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 56.60799890756607, + "p90": 77.53600180149078, + "p95": 85.31200140714645, + "p99": 192.03199446201324 + }, + "combine": { + "p50": 58.240000158548355, + "p90": 67.29599833488464, + "p95": 69.56800073385239, + "p99": 77.82399654388428 + }, + "roundtrip": { + "p50": 96.28800302743912, + "p90": 107.39199817180634, + "p95": 111.58400028944016, + "p99": 126.52799487113953 + }, + "isolatedSum": { + "p50": 114.84799906611443, + "p90": 144.83200013637543, + "p95": 154.88000214099884, + "p99": 269.8559910058975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.7599967122078, + "p90": 92.67199784517288, + "p95": 95.13600170612335, + "p99": 128.38399410247803 + }, + "combine": { + "p50": 86.27200126647949, + "p90": 94.91200000047684, + "p95": 97.120001912117, + "p99": 105.27999699115753 + }, + "roundtrip": { + "p50": 147.2959965467453, + "p90": 157.56799280643463, + "p95": 162.36799955368042, + "p99": 174.9120056629181 + }, + "isolatedSum": { + "p50": 168.0319979786873, + "p90": 187.58399784564972, + "p95": 192.25600361824036, + "p99": 233.66399109363556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-43b4144e", + "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_0a1a73b3", + "comparisonKey": "14196b9d68f90910", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:32.638567+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254426529", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254426529", + "createdAt": "2026-06-26T17:29:02Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 36.86400130391121, + "p90": 47.13600128889084, + "p95": 51.52000114321709, + "p99": 63.32799792289734 + }, + "combine": { + "p50": 33.440001308918, + "p90": 42.527999728918076, + "p95": 46.81599885225296, + "p99": 52.22399905323982 + }, + "roundtrip": { + "p50": 50.52800104022026, + "p90": 65.15199691057205, + "p95": 71.03999704122543, + "p99": 78.68800312280655 + }, + "isolatedSum": { + "p50": 70.30400261282921, + "p90": 89.66400101780891, + "p95": 98.33599999547005, + "p99": 115.55199697613716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.408001720905304, + "p90": 48.608001321554184, + "p95": 54.687999188899994, + "p99": 65.2799978852272 + }, + "combine": { + "p50": 32.735999673604965, + "p90": 42.59200021624565, + "p95": 45.05600035190582, + "p99": 51.35999992489815 + }, + "roundtrip": { + "p50": 51.4880008995533, + "p90": 66.72000139951706, + "p95": 72.54400104284286, + "p99": 85.08799970149994 + }, + "isolatedSum": { + "p50": 70.14400139451027, + "p90": 91.20000153779984, + "p95": 99.74399954080582, + "p99": 116.63999781012535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 37.79200091958046, + "p90": 49.27999898791313, + "p95": 54.91200089454651, + "p99": 61.08799949288368 + }, + "combine": { + "p50": 31.231999397277832, + "p90": 43.487999588251114, + "p95": 47.26399853825569, + "p99": 65.31199812889099 + }, + "roundtrip": { + "p50": 51.58400163054466, + "p90": 68.89600306749344, + "p95": 73.95199686288834, + "p99": 91.61599725484848 + }, + "isolatedSum": { + "p50": 69.02400031685829, + "p90": 92.76799857616425, + "p95": 102.1759994328022, + "p99": 126.39999762177467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 37.53599897027016, + "p90": 48.128001391887665, + "p95": 54.75199967622757, + "p99": 62.111999839544296 + }, + "combine": { + "p50": 34.46400165557861, + "p90": 44.544000178575516, + "p95": 47.231998294591904, + "p99": 57.37600103020668 + }, + "roundtrip": { + "p50": 54.687999188899994, + "p90": 67.4239993095398, + "p95": 73.44000041484833, + "p99": 91.96799993515015 + }, + "isolatedSum": { + "p50": 72.00000062584877, + "p90": 92.67200157046318, + "p95": 101.98399797081947, + "p99": 119.48800086975098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 39.29600119590759, + "p90": 51.35999992489815, + "p95": 55.64799904823303, + "p99": 64.96000289916992 + }, + "combine": { + "p50": 36.67199984192848, + "p90": 46.62400111556053, + "p95": 50.56000128388405, + "p99": 60.38400158286095 + }, + "roundtrip": { + "p50": 60.47999858856201, + "p90": 74.5920017361641, + "p95": 79.3600007891655, + "p99": 87.87199854850769 + }, + "isolatedSum": { + "p50": 75.96800103783607, + "p90": 97.98400104045868, + "p95": 106.20800033211708, + "p99": 125.34400448203087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.05600035190582, + "p90": 55.00800162553787, + "p95": 57.95200169086456, + "p99": 66.01600348949432 + }, + "combine": { + "p50": 44.28799822926521, + "p90": 53.05600166320801, + "p95": 55.904000997543335, + "p99": 61.3120011985302 + }, + "roundtrip": { + "p50": 72.64000177383423, + "p90": 84.16000008583069, + "p95": 88.03199976682663, + "p99": 106.30399733781815 + }, + "isolatedSum": { + "p50": 89.34399858117104, + "p90": 108.06400328874588, + "p95": 113.8560026884079, + "p99": 127.32800468802452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.23199960589409, + "p90": 65.63200056552887, + "p95": 71.48800045251846, + "p99": 79.55200225114822 + }, + "combine": { + "p50": 58.43200162053108, + "p90": 69.37599927186966, + "p95": 71.07199728488922, + "p99": 79.42400127649307 + }, + "roundtrip": { + "p50": 96.8639999628067, + "p90": 108.44799876213074, + "p95": 113.72800171375275, + "p99": 121.72800302505493 + }, + "isolatedSum": { + "p50": 113.66400122642517, + "p90": 135.00799983739853, + "p95": 142.55999773740768, + "p99": 158.9760035276413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 79.26400005817413, + "p90": 88.44800293445587, + "p95": 92.6399976015091, + "p99": 101.69599950313568 + }, + "combine": { + "p50": 86.01599931716919, + "p90": 95.0080007314682, + "p95": 97.02400118112564, + "p99": 103.32799702882767 + }, + "roundtrip": { + "p50": 147.32800424098969, + "p90": 157.53600001335144, + "p95": 161.47199273109436, + "p99": 169.0240055322647 + }, + "isolatedSum": { + "p50": 165.27999937534332, + "p90": 183.45600366592407, + "p95": 189.66399878263474, + "p99": 205.02399653196335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b5299c0b", + "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_87683f6c", + "comparisonKey": "0d3b5b81799f76d5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:33.916655+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271736220", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271736220", + "createdAt": "2026-06-26T23:52:01Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 221.15199267864227, + "p90": 287.26398944854736, + "p95": 315.39198756217957, + "p99": 401.98400616645813 + }, + "combine": { + "p50": 47.87199944257736, + "p90": 66.27199798822403, + "p95": 73.91999661922455, + "p99": 92.51199662685394 + }, + "roundtrip": { + "p50": 246.75199389457703, + "p90": 302.2400140762329, + "p95": 335.61599254608154, + "p99": 400.160014629364 + }, + "isolatedSum": { + "p50": 269.02399212121964, + "p90": 353.5359874367714, + "p95": 389.3119841814041, + "p99": 494.4960027933121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 264.6079957485199, + "p90": 342.3680067062378, + "p95": 371.0399866104126, + "p99": 447.00801372528076 + }, + "combine": { + "p50": 54.46400120854378, + "p90": 68.03199648857117, + "p95": 74.8480036854744, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 257.2160065174103, + "p90": 336.4480137825012, + "p95": 375.10401010513306, + "p99": 443.93599033355713 + }, + "isolatedSum": { + "p50": 319.0719969570637, + "p90": 410.40000319480896, + "p95": 445.887990295887, + "p99": 535.8400121331215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 210.14399826526642, + "p90": 260.0319981575012, + "p95": 276.99199318885803, + "p99": 401.856005191803 + }, + "combine": { + "p50": 49.02400076389313, + "p90": 61.983998864889145, + "p95": 68.57600063085556, + "p99": 82.43200182914734 + }, + "roundtrip": { + "p50": 252.73600220680237, + "p90": 308.51200222969055, + "p95": 325.76000690460205, + "p99": 404.2240083217621 + }, + "isolatedSum": { + "p50": 259.16799902915955, + "p90": 322.01599702239037, + "p95": 345.5679938197136, + "p99": 484.2880070209503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 229.40799593925476, + "p90": 285.91999411582947, + "p95": 302.97601222991943, + "p99": 384.799987077713 + }, + "combine": { + "p50": 50.6879985332489, + "p90": 65.95200300216675, + "p95": 71.48800045251846, + "p99": 85.56800335645676 + }, + "roundtrip": { + "p50": 262.7840042114258, + "p90": 331.9680094718933, + "p95": 359.6160113811493, + "p99": 441.0560131072998 + }, + "isolatedSum": { + "p50": 280.09599447250366, + "p90": 351.8719971179962, + "p95": 374.4640126824379, + "p99": 470.36799043416977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 211.67999505996704, + "p90": 262.0159983634949, + "p95": 281.5360128879547, + "p99": 434.4319999217987 + }, + "combine": { + "p50": 50.87999999523163, + "p90": 67.74400174617767, + "p95": 72.76800274848938, + "p99": 100.47999769449234 + }, + "roundtrip": { + "p50": 261.1199915409088, + "p90": 332.5119912624359, + "p95": 354.8800051212311, + "p99": 414.2720103263855 + }, + "isolatedSum": { + "p50": 262.55999505519867, + "p90": 329.76000010967255, + "p95": 354.3040156364441, + "p99": 534.911997616291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 210.68799495697021, + "p90": 258.91199707984924, + "p95": 279.87200021743774, + "p99": 326.1440098285675 + }, + "combine": { + "p50": 53.85600030422211, + "p90": 68.67200136184692, + "p95": 72.51200079917908, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 265.6959891319275, + "p90": 326.2079954147339, + "p95": 351.52000188827515, + "p99": 446.3360011577606 + }, + "isolatedSum": { + "p50": 264.5439952611923, + "p90": 327.58399844169617, + "p95": 352.3840010166168, + "p99": 418.0480092763901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 209.6640020608902, + "p90": 265.21599292755127, + "p95": 291.0720109939575, + "p99": 366.14400148391724 + }, + "combine": { + "p50": 61.43999844789505, + "p90": 73.91999661922455, + "p95": 79.42400127649307, + "p99": 92.06400066614151 + }, + "roundtrip": { + "p50": 262.2399926185608, + "p90": 317.7280128002167, + "p95": 350.7840037345886, + "p99": 447.9680061340332 + }, + "isolatedSum": { + "p50": 271.10400050878525, + "p90": 339.1359895467758, + "p95": 370.4960122704506, + "p99": 458.20800215005875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.8879976272583, + "p90": 276.99199318885803, + "p95": 317.05600023269653, + "p99": 742.6559925079346 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 88.54400366544724, + "p95": 92.47999638319016, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 273.44000339508057, + "p90": 323.5520124435425, + "p95": 345.0239896774292, + "p99": 420.3520119190216 + }, + "isolatedSum": { + "p50": 290.5599996447563, + "p90": 365.53599685430527, + "p95": 409.5359966158867, + "p99": 855.679988861084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a3751d3c", + "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_87683f6c", + "comparisonKey": "972ab14012f6276a", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:56.538326+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271751941", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271751941", + "createdAt": "2026-06-26T23:52:29Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 217.3759937286377, + "p90": 269.1839933395386, + "p95": 295.1360046863556, + "p99": 345.69600224494934 + }, + "combine": { + "p50": 50.592001527547836, + "p90": 66.46399945020676, + "p95": 71.74400240182877, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 245.60000002384186, + "p90": 292.64000058174133, + "p95": 306.0480058193207, + "p99": 346.8160033226013 + }, + "isolatedSum": { + "p50": 267.96799525618553, + "p90": 335.64799278974533, + "p95": 366.88000708818436, + "p99": 435.0400045514107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 206.2399983406067, + "p90": 254.94399666786194, + "p95": 270.4960107803345, + "p99": 337.21598982810974 + }, + "combine": { + "p50": 51.263999193906784, + "p90": 65.72800129652023, + "p95": 70.52800059318542, + "p99": 75.58400183916092 + }, + "roundtrip": { + "p50": 245.15199661254883, + "p90": 296.31999135017395, + "p95": 316.1279857158661, + "p99": 367.3279881477356 + }, + "isolatedSum": { + "p50": 257.5039975345135, + "p90": 320.6719979643822, + "p95": 341.0240113735199, + "p99": 412.79999166727066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 220.38400173187256, + "p90": 289.15199637413025, + "p95": 331.5519988536835, + "p99": 1036.1599922180176 + }, + "combine": { + "p50": 52.191998809576035, + "p90": 65.21599739789963, + "p95": 68.96000355482101, + "p99": 77.88799703121185 + }, + "roundtrip": { + "p50": 248.79999458789825, + "p90": 299.71200227737427, + "p95": 314.5279884338379, + "p99": 352.09599137306213 + }, + "isolatedSum": { + "p50": 272.5760005414486, + "p90": 354.3679937720299, + "p95": 400.5120024085045, + "p99": 1114.0479892492294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 221.91999852657318, + "p90": 292.4480140209198, + "p95": 316.3520097732544, + "p99": 412.76800632476807 + }, + "combine": { + "p50": 54.84800040721893, + "p90": 71.61600142717361, + "p95": 80.64000308513641, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 249.24799799919128, + "p90": 305.5360019207001, + "p95": 325.1520097255707, + "p99": 406.9119989871979 + }, + "isolatedSum": { + "p50": 276.7679989337921, + "p90": 364.0640154480934, + "p95": 396.9920128583908, + "p99": 514.9440094828606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 209.75999534130096, + "p90": 260.73598861694336, + "p95": 279.7119915485382, + "p99": 349.98399019241333 + }, + "combine": { + "p50": 54.88000065088272, + "p90": 69.34399902820587, + "p95": 73.91999661922455, + "p99": 101.08800232410431 + }, + "roundtrip": { + "p50": 254.36800718307495, + "p90": 305.2160143852234, + "p95": 330.55999875068665, + "p99": 445.72800397872925 + }, + "isolatedSum": { + "p50": 264.6399959921837, + "p90": 330.07998764514923, + "p95": 353.63198816776276, + "p99": 451.07199251651764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 212.16000616550446, + "p90": 261.34398579597473, + "p95": 274.4959890842438, + "p99": 355.9679985046387 + }, + "combine": { + "p50": 59.487998485565186, + "p90": 75.9039968252182, + "p95": 79.29600030183792, + "p99": 111.13599687814713 + }, + "roundtrip": { + "p50": 262.4320089817047, + "p90": 318.33600997924805, + "p95": 339.4559919834137, + "p99": 384.0320110321045 + }, + "isolatedSum": { + "p50": 271.64800465106964, + "p90": 337.24798262119293, + "p95": 353.7919893860817, + "p99": 467.1039953827858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 213.44000101089478, + "p90": 259.99999046325684, + "p95": 280.2880108356476, + "p99": 418.08000206947327 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 79.1039988398552, + "p95": 86.94399893283844, + "p99": 97.59999811649323 + }, + "roundtrip": { + "p50": 273.98398518562317, + "p90": 361.2799942493439, + "p95": 384.0959966182709, + "p99": 485.24799942970276 + }, + "isolatedSum": { + "p50": 280.70399910211563, + "p90": 339.10398930311203, + "p95": 367.232009768486, + "p99": 515.6800001859665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.53600239753723, + "p90": 271.9680070877075, + "p95": 288.8000011444092, + "p99": 367.71199107170105 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 95.90400010347366, + "p95": 99.16800260543823, + "p99": 122.56000190973282 + }, + "roundtrip": { + "p50": 289.6000146865845, + "p90": 337.69598603248596, + "p95": 350.847989320755, + "p99": 431.4559996128082 + }, + "isolatedSum": { + "p50": 298.2719987630844, + "p90": 367.8720071911812, + "p95": 387.9680037498474, + "p99": 490.27199298143387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1bedbd87", + "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_87683f6c", + "comparisonKey": "73242cc56a07dc73", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:22.337969+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271767522", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271767522", + "createdAt": "2026-06-26T23:52:56Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 217.43999421596527, + "p90": 302.7519881725311, + "p95": 334.4320058822632, + "p99": 396.06401324272156 + }, + "combine": { + "p50": 55.1999993622303, + "p90": 72.03199714422226, + "p95": 78.23999971151352, + "p99": 108.09600353240967 + }, + "roundtrip": { + "p50": 251.71199440956116, + "p90": 317.27999448776245, + "p95": 335.10398864746094, + "p99": 397.92001247406006 + }, + "isolatedSum": { + "p50": 272.6399935781956, + "p90": 374.7839853167534, + "p95": 412.6720055937767, + "p99": 504.1600167751312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 206.81600272655487, + "p90": 269.6639895439148, + "p95": 289.6000146865845, + "p99": 343.23200583457947 + }, + "combine": { + "p50": 55.135998874902725, + "p90": 71.77600264549255, + "p95": 77.47200131416321, + "p99": 96.09600156545639 + }, + "roundtrip": { + "p50": 247.93599545955658, + "p90": 305.63199520111084, + "p95": 323.168009519577, + "p99": 380.12799620628357 + }, + "isolatedSum": { + "p50": 261.9520016014576, + "p90": 341.43999218940735, + "p95": 367.0720160007477, + "p99": 439.32800740003586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 211.04000508785248, + "p90": 283.32799673080444, + "p95": 302.65599489212036, + "p99": 377.6639997959137 + }, + "combine": { + "p50": 56.89600110054016, + "p90": 70.68800181150436, + "p95": 78.3040001988411, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 251.52000784873962, + "p90": 306.4959943294525, + "p95": 319.64799761772156, + "p99": 344.1599905490875 + }, + "isolatedSum": { + "p50": 267.93600618839264, + "p90": 354.0159985423088, + "p95": 380.95999509096146, + "p99": 463.1040021777153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 204.92799580097198, + "p90": 272.09600806236267, + "p95": 291.29600524902344, + "p99": 364.3519878387451 + }, + "combine": { + "p50": 56.96000158786774, + "p90": 71.96799665689468, + "p95": 77.79199630022049, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 245.69599330425262, + "p90": 303.16799879074097, + "p95": 321.9519853591919, + "p99": 421.1199879646301 + }, + "isolatedSum": { + "p50": 261.8879973888397, + "p90": 344.06400471925735, + "p95": 369.0880015492439, + "p99": 451.26398652791977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 211.61599457263947, + "p90": 274.3679881095886, + "p95": 311.2959861755371, + "p99": 390.8799886703491 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 74.68800246715546, + "p95": 80.09599894285202, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 250.65600872039795, + "p90": 313.24800848960876, + "p95": 336.1920118331909, + "p99": 386.59200072288513 + }, + "isolatedSum": { + "p50": 270.33599466085434, + "p90": 349.0559905767441, + "p95": 391.39198511838913, + "p99": 478.4639850258827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 204.92799580097198, + "p90": 262.62399554252625, + "p95": 280.5440127849579, + "p99": 327.4880051612854 + }, + "combine": { + "p50": 64.54399973154068, + "p90": 81.85599744319916, + "p95": 87.8399983048439, + "p99": 104.41599786281586 + }, + "roundtrip": { + "p50": 262.59198784828186, + "p90": 327.7440071105957, + "p95": 351.6159951686859, + "p99": 406.0800075531006 + }, + "isolatedSum": { + "p50": 269.47199553251266, + "p90": 344.4799929857254, + "p95": 368.3840110898018, + "p99": 431.90400302410126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 247.5840002298355, + "p90": 392.5119936466217, + "p95": 406.14399313926697, + "p99": 443.5200095176697 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 89.85599875450134, + "p95": 94.68799829483032, + "p99": 119.32799965143204 + }, + "roundtrip": { + "p50": 261.85598969459534, + "p90": 329.24801111221313, + "p95": 345.15199065208435, + "p99": 426.1760115623474 + }, + "isolatedSum": { + "p50": 319.42400336265564, + "p90": 482.36799240112305, + "p95": 500.8319914340973, + "p99": 562.8480091691017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 218.33600103855133, + "p90": 282.6240062713623, + "p95": 299.1040050983429, + "p99": 340.831995010376 + }, + "combine": { + "p50": 87.16800063848495, + "p90": 104.67199981212616, + "p95": 109.18399691581726, + "p99": 127.32799351215363 + }, + "roundtrip": { + "p50": 291.83998703956604, + "p90": 343.6479866504669, + "p95": 355.48800230026245, + "p99": 407.1680009365082 + }, + "isolatedSum": { + "p50": 305.5040016770363, + "p90": 387.29600608348846, + "p95": 408.28800201416016, + "p99": 468.1599885225296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d12a6ce", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_9979edfc", + "comparisonKey": "df5e7066c74d5d30", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:59.289355+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271622347", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271622347", + "createdAt": "2026-06-26T23:48:34Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.81600147485733, + "p90": 80.86399734020233, + "p95": 92.6399976015091, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 57.792000472545624, + "p90": 63.58399987220764, + "p95": 70.0799971818924, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 147.77599275112152, + "p90": 179.6479970216751, + "p95": 193.53599846363068, + "p99": 309.6640110015869 + }, + "isolatedSum": { + "p50": 120.60800194740295, + "p90": 144.44799721240997, + "p95": 162.7199947834015, + "p99": 198.81600141525269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 67.61600077152252, + "p90": 106.27199709415436, + "p95": 119.84000355005264, + "p99": 203.93599569797516 + }, + "combine": { + "p50": 59.23200026154518, + "p90": 77.37600058317184, + "p95": 84.95999872684479, + "p99": 107.29599744081497 + }, + "roundtrip": { + "p50": 156.44800662994385, + "p90": 205.50400018692017, + "p95": 228.38400304317474, + "p99": 356.79998993873596 + }, + "isolatedSum": { + "p50": 126.8480010330677, + "p90": 183.6479976773262, + "p95": 204.80000227689743, + "p99": 311.23199313879013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 66.27199798822403, + "p90": 84.86399799585342, + "p95": 93.56799721717834, + "p99": 116.09599739313126 + }, + "combine": { + "p50": 59.29600074887276, + "p90": 72.95999675989151, + "p95": 80.25600016117096, + "p99": 107.51999914646149 + }, + "roundtrip": { + "p50": 156.8319946527481, + "p90": 185.40799617767334, + "p95": 197.4399983882904, + "p99": 227.1679937839508 + }, + "isolatedSum": { + "p50": 125.56799873709679, + "p90": 157.82399475574493, + "p95": 173.8239973783493, + "p99": 223.61599653959274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.77600198984146, + "p90": 101.05600208044052, + "p95": 115.61600118875504, + "p99": 160.38399934768677 + }, + "combine": { + "p50": 60.99199876189232, + "p90": 78.20799946784973, + "p95": 82.94399827718735, + "p99": 110.78400164842606 + }, + "roundtrip": { + "p50": 156.89599514007568, + "p90": 197.4720060825348, + "p95": 210.36800742149353, + "p99": 302.4959862232208 + }, + "isolatedSum": { + "p50": 128.76800075173378, + "p90": 179.26400154829025, + "p95": 198.55999946594238, + "p99": 271.1680009961128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 68.38399916887283, + "p90": 93.24800223112106, + "p95": 108.57599973678589, + "p99": 143.74400675296783 + }, + "combine": { + "p50": 62.463998794555664, + "p90": 78.78399640321732, + "p95": 87.23200112581253, + "p99": 103.90400141477585 + }, + "roundtrip": { + "p50": 162.1440052986145, + "p90": 208.00000429153442, + "p95": 221.18400037288666, + "p99": 262.30400800704956 + }, + "isolatedSum": { + "p50": 130.8479979634285, + "p90": 172.03199863433838, + "p95": 195.80800086259842, + "p99": 247.64800816774368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 67.71200150251389, + "p90": 98.65599870681763, + "p95": 107.68000036478043, + "p99": 143.19999516010284 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 82.78399705886841, + "p95": 88.0960002541542, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 163.2319986820221, + "p90": 208.73600244522095, + "p95": 238.65599930286407, + "p99": 287.55199909210205 + }, + "isolatedSum": { + "p50": 135.71200519800186, + "p90": 181.43999576568604, + "p95": 195.77600061893463, + "p99": 256.99199736118317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.19999957084656, + "p90": 95.551997423172, + "p95": 104.032002389431, + "p99": 137.85600662231445 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 87.45600283145905, + "p95": 96.22400254011154, + "p99": 126.39999389648438 + }, + "roundtrip": { + "p50": 175.135999917984, + "p90": 217.92000532150269, + "p95": 236.735999584198, + "p99": 292.86399483680725 + }, + "isolatedSum": { + "p50": 156.96000307798386, + "p90": 183.00800025463104, + "p95": 200.25600492954254, + "p99": 264.2560005187988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.52000266313553, + "p90": 106.65600001811981, + "p95": 115.77600240707397, + "p99": 151.8400013446808 + }, + "combine": { + "p50": 95.29600292444229, + "p90": 108.96000266075134, + "p95": 119.45600062608719, + "p99": 141.02399349212646 + }, + "roundtrip": { + "p50": 208.70399475097656, + "p90": 241.43999814987183, + "p95": 261.75999641418457, + "p99": 290.97598791122437 + }, + "isolatedSum": { + "p50": 178.81600558757782, + "p90": 215.61600267887115, + "p95": 235.23200303316116, + "p99": 292.86399483680725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a6e69f6", + "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_87683f6c", + "comparisonKey": "c387c5e642249761", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:29.289162+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271636896", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271636896", + "createdAt": "2026-06-26T23:49:02Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 228.70400547981262, + "p90": 269.6959972381592, + "p95": 279.5200049877167, + "p99": 338.1119966506958 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 73.5040009021759, + "p95": 82.20800012350082, + "p99": 98.33600372076035 + }, + "roundtrip": { + "p50": 271.232008934021, + "p90": 306.94401264190674, + "p95": 324.2560029029846, + "p99": 374.65599179267883 + }, + "isolatedSum": { + "p50": 289.7920049726963, + "p90": 343.1999981403351, + "p95": 361.7280051112175, + "p99": 436.44800037145615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 216.06400609016418, + "p90": 246.33599817752838, + "p95": 261.3759934902191, + "p99": 341.40801429748535 + }, + "combine": { + "p50": 59.7120001912117, + "p90": 68.09599697589874, + "p95": 74.46400076150894, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 268.99200677871704, + "p90": 305.08801341056824, + "p95": 324.41601157188416, + "p99": 433.0880045890808 + }, + "isolatedSum": { + "p50": 275.7760062813759, + "p90": 314.4319951534271, + "p95": 335.83999425172806, + "p99": 430.9440106153488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 229.98400032520294, + "p90": 283.07199478149414, + "p95": 300.00001192092896, + "p99": 371.2959885597229 + }, + "combine": { + "p50": 61.055999249219894, + "p90": 78.68800312280655, + "p95": 83.55200290679932, + "p99": 112.47999966144562 + }, + "roundtrip": { + "p50": 274.1119861602783, + "p90": 337.0879888534546, + "p95": 358.7520122528076, + "p99": 398.75200390815735 + }, + "isolatedSum": { + "p50": 291.03999957442284, + "p90": 361.7599979043007, + "p95": 383.55201482772827, + "p99": 483.7759882211685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 218.87999773025513, + "p90": 251.55198574066162, + "p95": 265.855997800827, + "p99": 311.39200925827026 + }, + "combine": { + "p50": 62.111999839544296, + "p90": 71.6480016708374, + "p95": 77.11999863386154, + "p99": 90.40000289678574 + }, + "roundtrip": { + "p50": 266.9120132923126, + "p90": 300.57600140571594, + "p95": 317.8560137748718, + "p99": 357.02401399612427 + }, + "isolatedSum": { + "p50": 280.9919975697994, + "p90": 323.199987411499, + "p95": 342.97599643468857, + "p99": 401.792012155056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 219.29599344730377, + "p90": 267.61600375175476, + "p95": 287.00798749923706, + "p99": 346.8160033226013 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 79.77599650621414, + "p95": 84.95999872684479, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 265.4719948768616, + "p90": 309.9519908428192, + "p95": 323.8399922847748, + "p99": 397.8559970855713 + }, + "isolatedSum": { + "p50": 283.1359952688217, + "p90": 347.3920002579689, + "p95": 371.96798622608185, + "p99": 445.3120008111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 219.10400688648224, + "p90": 245.5040067434311, + "p95": 260.3200078010559, + "p99": 308.0959916114807 + }, + "combine": { + "p50": 69.50400024652481, + "p90": 78.33600044250488, + "p95": 83.96799862384796, + "p99": 95.8079993724823 + }, + "roundtrip": { + "p50": 275.2319872379303, + "p90": 308.9599907398224, + "p95": 331.07200264930725, + "p99": 425.6319999694824 + }, + "isolatedSum": { + "p50": 288.60800713300705, + "p90": 323.840007185936, + "p95": 344.28800642490387, + "p99": 403.903990983963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 221.27999365329742, + "p90": 263.90400528907776, + "p95": 282.20799565315247, + "p99": 368.51200461387634 + }, + "combine": { + "p50": 79.77599650621414, + "p90": 91.32800251245499, + "p95": 96.6079980134964, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 288.4159982204437, + "p90": 336.41600608825684, + "p95": 353.7920117378235, + "p99": 471.1360037326813 + }, + "isolatedSum": { + "p50": 301.05599015951157, + "p90": 355.23200780153275, + "p95": 378.81599366664886, + "p99": 475.040003657341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 233.024001121521, + "p90": 284.4479978084564, + "p95": 301.63198709487915, + "p99": 392.5760090351105 + }, + "combine": { + "p50": 97.50399738550186, + "p90": 109.76000130176544, + "p95": 115.99999666213989, + "p99": 127.93600559234619 + }, + "roundtrip": { + "p50": 316.6399896144867, + "p90": 356.06399178504944, + "p95": 368.5759902000427, + "p99": 464.352011680603 + }, + "isolatedSum": { + "p50": 330.52799850702286, + "p90": 394.20799911022186, + "p95": 417.63198375701904, + "p99": 520.5120146274567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-180681db", + "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_87683f6c", + "comparisonKey": "3006922c66758d92", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:15.049258+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271721386", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271721386", + "createdAt": "2026-06-26T23:51:33Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 212.44800090789795, + "p90": 272.8320062160492, + "p95": 292.32001304626465, + "p99": 382.752001285553 + }, + "combine": { + "p50": 58.75200033187866, + "p90": 73.40800017118454, + "p95": 78.5600021481514, + "p99": 96.12800180912018 + }, + "roundtrip": { + "p50": 247.26399779319763, + "p90": 306.36799335479736, + "p95": 325.1200020313263, + "p99": 389.8560106754303 + }, + "isolatedSum": { + "p50": 271.2000012397766, + "p90": 346.24000638723373, + "p95": 370.88001519441605, + "p99": 478.88000309467316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 212.09600567817688, + "p90": 273.69600534439087, + "p95": 297.791987657547, + "p99": 586.5920186042786 + }, + "combine": { + "p50": 58.17599967122078, + "p90": 74.81600344181061, + "p95": 79.71200346946716, + "p99": 97.120001912117 + }, + "roundtrip": { + "p50": 265.3760015964508, + "p90": 339.6799862384796, + "p95": 375.5840063095093, + "p99": 458.8159918785095 + }, + "isolatedSum": { + "p50": 270.27200534939766, + "p90": 348.5120087862015, + "p95": 377.50399112701416, + "p99": 683.7120205163956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.6960003376007, + "p90": 252.8960108757019, + "p95": 267.64801144599915, + "p99": 318.59201192855835 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 70.49600034952164, + "p95": 76.4160007238388, + "p99": 87.36000210046768 + }, + "roundtrip": { + "p50": 246.91200256347656, + "p90": 306.2080144882202, + "p95": 339.1680121421814, + "p99": 585.1519703865051 + }, + "isolatedSum": { + "p50": 255.61600178480148, + "p90": 323.39201122522354, + "p95": 344.06401216983795, + "p99": 405.95201402902603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 211.93599700927734, + "p90": 265.1520073413849, + "p95": 276.6079902648926, + "p99": 336.5760147571564 + }, + "combine": { + "p50": 59.647999703884125, + "p90": 77.02399790287018, + "p95": 82.94399827718735, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 259.5840096473694, + "p90": 317.6639974117279, + "p95": 331.9680094718933, + "p99": 400.06399154663086 + }, + "isolatedSum": { + "p50": 271.58399671316147, + "p90": 342.17600524425507, + "p95": 359.5519885420799, + "p99": 433.1200122833252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 214.01600539684296, + "p90": 275.90399980545044, + "p95": 303.9039969444275, + "p99": 374.30399656295776 + }, + "combine": { + "p50": 61.76000088453293, + "p90": 80.4160013794899, + "p95": 84.79999750852585, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 258.59200954437256, + "p90": 322.9120075702667, + "p95": 347.104012966156, + "p99": 422.39999771118164 + }, + "isolatedSum": { + "p50": 275.7760062813759, + "p90": 356.32000118494034, + "p95": 388.70399445295334, + "p99": 473.471999168396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 209.9200040102005, + "p90": 263.7439966201782, + "p95": 275.2639949321747, + "p99": 311.13600730895996 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 84.09599959850311, + "p95": 87.42400258779526, + "p99": 103.90400141477585 + }, + "roundtrip": { + "p50": 263.5520100593567, + "p90": 318.30400228500366, + "p95": 334.5920145511627, + "p99": 403.80799770355225 + }, + "isolatedSum": { + "p50": 277.50400453805923, + "p90": 347.83999621868134, + "p95": 362.68799751996994, + "p99": 415.0400087237358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 205.82400262355804, + "p90": 253.02401185035706, + "p95": 266.36800169944763, + "p99": 311.5200102329254 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 92.76799857616425, + "p95": 98.04800152778625, + "p99": 111.07199639081955 + }, + "roundtrip": { + "p50": 272.7360129356384, + "p90": 325.50400495529175, + "p95": 342.6879942417145, + "p99": 378.6559998989105 + }, + "isolatedSum": { + "p50": 284.2240035533905, + "p90": 345.7920104265213, + "p95": 364.4160032272339, + "p99": 422.59200662374496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 223.23200106620789, + "p90": 271.61601185798645, + "p95": 281.98400139808655, + "p99": 319.96798515319824 + }, + "combine": { + "p50": 96.25600278377533, + "p90": 112.44799941778183, + "p95": 115.61600118875504, + "p99": 127.36000120639801 + }, + "roundtrip": { + "p50": 324.864000082016, + "p90": 388.63998651504517, + "p95": 415.3279960155487, + "p99": 494.3999946117401 + }, + "isolatedSum": { + "p50": 319.4880038499832, + "p90": 384.0640112757683, + "p95": 397.6000025868416, + "p99": 447.32798635959625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1b077c8", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_3a17d46b", + "comparisonKey": "f29f35383c05d38b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:04.228393+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254401482", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", + "createdAt": "2026-06-26T17:28:31Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.23999840021133, + "p90": 92.38400310277939, + "p95": 101.88800096511841, + "p99": 121.15199863910675 + }, + "combine": { + "p50": 58.88000130653381, + "p90": 70.3359991312027, + "p95": 78.65600287914276, + "p99": 101.43999755382538 + }, + "roundtrip": { + "p50": 159.32799875736237, + "p90": 200.3840059041977, + "p95": 213.69600296020508, + "p99": 243.58400702476501 + }, + "isolatedSum": { + "p50": 129.11999970674515, + "p90": 162.7200022339821, + "p95": 180.54400384426117, + "p99": 222.59199619293213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 67.55200028419495, + "p90": 91.45600348711014, + "p95": 102.33599692583084, + "p99": 144.57599818706512 + }, + "combine": { + "p50": 59.42400172352791, + "p90": 71.6480016708374, + "p95": 81.24800026416779, + "p99": 105.43999820947647 + }, + "roundtrip": { + "p50": 156.12800419330597, + "p90": 199.13600385189056, + "p95": 215.32799303531647, + "p99": 382.4000060558319 + }, + "isolatedSum": { + "p50": 126.97600200772285, + "p90": 163.10400515794754, + "p95": 183.58399718999863, + "p99": 250.0159963965416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.72800195217133, + "p90": 88.54400366544724, + "p95": 98.24000298976898, + "p99": 228.60799729824066 + }, + "combine": { + "p50": 60.92799827456474, + "p90": 72.92799651622772, + "p95": 77.7600035071373, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 160.67199409008026, + "p90": 186.20799481868744, + "p95": 196.44799828529358, + "p99": 242.14400351047516 + }, + "isolatedSum": { + "p50": 130.65600022673607, + "p90": 161.47200018167496, + "p95": 176.00000649690628, + "p99": 319.5199966430664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.49600034952164, + "p90": 97.47199714183807, + "p95": 107.84000158309937, + "p99": 151.90400183200836 + }, + "combine": { + "p50": 61.47199869155884, + "p90": 76.89599692821503, + "p95": 85.28000116348267, + "p99": 107.64800012111664 + }, + "roundtrip": { + "p50": 155.8080017566681, + "p90": 187.45599687099457, + "p95": 205.24799823760986, + "p99": 242.88000166416168 + }, + "isolatedSum": { + "p50": 131.96799904108047, + "p90": 174.3679940700531, + "p95": 193.12000274658203, + "p99": 259.552001953125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 68.4799998998642, + "p90": 86.94399893283844, + "p95": 95.58399766683578, + "p99": 126.08000636100769 + }, + "combine": { + "p50": 63.391998410224915, + "p90": 77.34400033950806, + "p95": 86.62399649620056, + "p99": 119.55200135707855 + }, + "roundtrip": { + "p50": 164.2879992723465, + "p90": 188.09600174427032, + "p95": 203.64800095558167, + "p99": 272.7999985218048 + }, + "isolatedSum": { + "p50": 131.8719983100891, + "p90": 164.2879992723465, + "p95": 182.20799416303635, + "p99": 245.63200771808624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.25599819421768, + "p90": 91.13600105047226, + "p95": 98.91200065612793, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 78.84799689054489, + "p95": 85.40800213813782, + "p99": 92.73599833250046 + }, + "roundtrip": { + "p50": 165.0879979133606, + "p90": 203.45599949359894, + "p95": 221.15199267864227, + "p99": 462.911993265152 + }, + "isolatedSum": { + "p50": 134.5279961824417, + "p90": 169.98399794101715, + "p95": 184.32000279426575, + "p99": 207.5200006365776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 74.46400076150894, + "p90": 89.21600133180618, + "p95": 99.32799637317657, + "p99": 120.57600170373917 + }, + "combine": { + "p50": 80.44800162315369, + "p90": 89.75999802350998, + "p95": 94.65599805116653, + "p99": 122.30399996042252 + }, + "roundtrip": { + "p50": 183.45600366592407, + "p90": 210.78400313854218, + "p95": 228.5439968109131, + "p99": 287.4239981174469 + }, + "isolatedSum": { + "p50": 154.91200238466263, + "p90": 178.97599935531616, + "p95": 193.9839944243431, + "p99": 242.88000166416168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.67199718952179, + "p90": 102.7199998497963, + "p95": 111.93600296974182, + "p99": 128.9920061826706 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 108.86400192975998, + "p95": 114.43199962377548, + "p99": 124.1919994354248 + }, + "roundtrip": { + "p50": 208.99200439453125, + "p90": 229.34399545192719, + "p95": 239.9040013551712, + "p99": 260.22401452064514 + }, + "isolatedSum": { + "p50": 185.5039969086647, + "p90": 211.58400177955627, + "p95": 226.3680025935173, + "p99": 253.1840056180954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2649fd4", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_50a9ee63", + "comparisonKey": "aae31d5755e4ce66", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:20.768220+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254418007", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", + "createdAt": "2026-06-26T17:28:51Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.09599873423576, + "p90": 86.7839977145195, + "p95": 94.97600048780441, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 60.864001512527466, + "p90": 79.64800298213959, + "p95": 85.7279971241951, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 148.60799908638, + "p90": 199.42399859428406, + "p95": 207.45599269866943, + "p99": 260.5440020561218 + }, + "isolatedSum": { + "p50": 116.96000024676323, + "p90": 166.4320006966591, + "p95": 180.7039976119995, + "p99": 219.2320004105568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 52.06400156021118, + "p90": 83.42400193214417, + "p95": 88.99199962615967, + "p99": 123.80799651145935 + }, + "combine": { + "p50": 59.808000922203064, + "p90": 77.91999727487564, + "p95": 84.48000252246857, + "p99": 130.78400492668152 + }, + "roundtrip": { + "p50": 145.82400023937225, + "p90": 194.91200149059296, + "p95": 215.10399878025055, + "p99": 273.79199862480164 + }, + "isolatedSum": { + "p50": 111.87200248241425, + "p90": 161.3439992070198, + "p95": 173.47200214862823, + "p99": 254.59200143814087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 56.60799890756607, + "p90": 89.08800035715103, + "p95": 98.91200065612793, + "p99": 111.7440015077591 + }, + "combine": { + "p50": 60.7680007815361, + "p90": 78.52800190448761, + "p95": 84.22400057315826, + "p99": 97.95200079679489 + }, + "roundtrip": { + "p50": 143.74400675296783, + "p90": 192.7040070295334, + "p95": 212.0320051908493, + "p99": 294.46399211883545 + }, + "isolatedSum": { + "p50": 117.37599968910217, + "p90": 167.61600226163864, + "p95": 183.1360012292862, + "p99": 209.69600230455399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.25599929690361, + "p90": 80.09599894285202, + "p95": 88.35200220346451, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 60.736000537872314, + "p90": 79.48800176382065, + "p95": 85.60000360012054, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 141.12000167369843, + "p90": 183.87199938297272, + "p95": 195.23200392723083, + "p99": 286.24001145362854 + }, + "isolatedSum": { + "p50": 112.99199983477592, + "p90": 159.58400070667267, + "p95": 173.95200580358505, + "p99": 218.01599860191345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.47200110554695, + "p90": 77.60000228881836, + "p95": 85.05599945783615, + "p99": 93.9520001411438 + }, + "combine": { + "p50": 62.49599903821945, + "p90": 77.34400033950806, + "p95": 82.11199939250946, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 142.17600226402283, + "p90": 183.77600610256195, + "p95": 197.79199361801147, + "p99": 241.5360063314438 + }, + "isolatedSum": { + "p50": 115.9680001437664, + "p90": 154.94400262832642, + "p95": 167.1679988503456, + "p99": 189.7279992699623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.39200082421303, + "p90": 81.05599880218506, + "p95": 89.15200084447861, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 66.39999896287918, + "p90": 84.927998483181, + "p95": 88.3840024471283, + "p99": 101.3759970664978 + }, + "roundtrip": { + "p50": 148.15999567508698, + "p90": 191.23199582099915, + "p95": 200.57600736618042, + "p99": 228.4799963235855 + }, + "isolatedSum": { + "p50": 121.79199978709221, + "p90": 165.98399728536606, + "p95": 177.5360032916069, + "p99": 210.9759971499443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 65.08799642324448, + "p90": 90.97599983215332, + "p95": 100.63999891281128, + "p99": 148.28799664974213 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 96.54399752616882, + "p95": 99.23200309276581, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 171.424001455307, + "p90": 216.8000042438507, + "p95": 232.1919947862625, + "p99": 288.38399052619934 + }, + "isolatedSum": { + "p50": 146.14399522542953, + "p90": 187.51999735832214, + "p95": 199.8720020055771, + "p99": 254.8159956932068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.00799745321274, + "p90": 94.14400160312653, + "p95": 99.04000163078308, + "p99": 115.23199826478958 + }, + "combine": { + "p50": 97.34400361776352, + "p90": 115.84000289440155, + "p95": 119.03999745845795, + "p99": 133.56800377368927 + }, + "roundtrip": { + "p50": 197.79199361801147, + "p90": 227.80799865722656, + "p95": 237.8239929676056, + "p99": 276.8320143222809 + }, + "isolatedSum": { + "p50": 172.35200107097626, + "p90": 209.98400449752808, + "p95": 218.07999908924103, + "p99": 248.80000203847885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fdd09e42", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_4f483b60", + "comparisonKey": "95dcff383339100e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:13.723754+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271629782", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271629782", + "createdAt": "2026-06-26T23:48:49Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 51.04000121355057, + "p90": 76.64000242948532, + "p95": 84.48000252246857, + "p99": 115.32799899578094 + }, + "combine": { + "p50": 59.20000001788139, + "p90": 77.47200131416321, + "p95": 87.13600039482117, + "p99": 133.85599851608276 + }, + "roundtrip": { + "p50": 140.73599874973297, + "p90": 177.18400061130524, + "p95": 189.60000574588776, + "p99": 239.3919974565506 + }, + "isolatedSum": { + "p50": 110.24000123143196, + "p90": 154.11200374364853, + "p95": 171.61600291728973, + "p99": 249.1839975118637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 54.71999943256378, + "p90": 82.40000158548355, + "p95": 88.16000074148178, + "p99": 115.10399729013443 + }, + "combine": { + "p50": 60.19200012087822, + "p90": 74.78400319814682, + "p95": 81.44000172615051, + "p99": 106.84800148010254 + }, + "roundtrip": { + "p50": 147.13600277900696, + "p90": 190.75199961662292, + "p95": 217.79200434684753, + "p99": 253.79198789596558 + }, + "isolatedSum": { + "p50": 114.911999553442, + "p90": 157.18400478363037, + "p95": 169.6000024676323, + "p99": 221.95199877023697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 54.048001766204834, + "p90": 77.53600180149078, + "p95": 84.99199897050858, + "p99": 106.4319983124733 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 75.83999633789062, + "p95": 82.36800134181976, + "p99": 106.84800148010254 + }, + "roundtrip": { + "p50": 144.31999623775482, + "p90": 184.4799965620041, + "p95": 193.9840018749237, + "p99": 240.83200097084045 + }, + "isolatedSum": { + "p50": 114.75200206041336, + "p90": 153.3759981393814, + "p95": 167.36000031232834, + "p99": 213.27999979257584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 54.687999188899994, + "p90": 88.25600147247314, + "p95": 94.46399658918381, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 61.824001371860504, + "p90": 77.02399790287018, + "p95": 83.26400071382523, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 140.35199582576752, + "p90": 180.09600043296814, + "p95": 193.53599846363068, + "p99": 230.5919975042343 + }, + "isolatedSum": { + "p50": 116.5120005607605, + "p90": 165.27999937534332, + "p95": 177.72799730300903, + "p99": 222.07999974489212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 54.17599901556969, + "p90": 81.11999928951263, + "p95": 88.8959988951683, + "p99": 129.4720023870468 + }, + "combine": { + "p50": 62.3680017888546, + "p90": 78.36800068616867, + "p95": 82.56000280380249, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 140.47999680042267, + "p90": 177.66399681568146, + "p95": 196.99199497699738, + "p99": 237.7600073814392 + }, + "isolatedSum": { + "p50": 116.54400080442429, + "p90": 159.4879999756813, + "p95": 171.4560016989708, + "p99": 230.68800568580627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.24800005555153, + "p90": 79.64800298213959, + "p95": 85.91999858617783, + "p99": 104.67199981212616 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 82.33600109815598, + "p95": 85.7279971241951, + "p99": 99.10400211811066 + }, + "roundtrip": { + "p50": 145.1520025730133, + "p90": 178.1120002269745, + "p95": 187.6479983329773, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 125.66399946808815, + "p90": 161.98400408029556, + "p95": 171.64799571037292, + "p99": 203.77600193023682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 66.30399823188782, + "p90": 83.90399813652039, + "p95": 90.17600119113922, + "p99": 149.1840034723282 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 93.79199892282486, + "p95": 98.88000041246414, + "p99": 114.01599645614624 + }, + "roundtrip": { + "p50": 164.8319959640503, + "p90": 199.48799908161163, + "p95": 211.2639993429184, + "p99": 271.93599939346313 + }, + "isolatedSum": { + "p50": 145.02400159835815, + "p90": 177.69599705934525, + "p95": 189.05600160360336, + "p99": 263.1999999284744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.82400333881378, + "p90": 90.62399715185165, + "p95": 95.39200365543365, + "p99": 114.52800035476685 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 112.31999844312668, + "p95": 115.77600240707397, + "p99": 130.49599528312683 + }, + "roundtrip": { + "p50": 199.77599382400513, + "p90": 228.32000255584717, + "p95": 247.29600548744202, + "p99": 297.88801074028015 + }, + "isolatedSum": { + "p50": 171.07200622558594, + "p90": 202.94399559497833, + "p95": 211.16800606250763, + "p99": 245.02399563789368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-39796825", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_ff232ea5", + "comparisonKey": "643e1b15925a53af", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:34.222899+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271653486", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", + "createdAt": "2026-06-26T23:49:28Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 29.08799983561039, + "p90": 36.41600161790848, + "p95": 44.28799822926521, + "p99": 63.551999628543854 + }, + "combine": { + "p50": 40.95999896526337, + "p90": 64.70400094985962, + "p95": 74.8480036854744, + "p99": 125.69600343704224 + }, + "roundtrip": { + "p50": 1856.8320274353027, + "p90": 1879.7760009765625, + "p95": 1894.495964050293, + "p99": 2116.607904434204 + }, + "isolatedSum": { + "p50": 70.04799880087376, + "p90": 101.1200025677681, + "p95": 119.13600191473961, + "p99": 189.2480030655861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.76799926161766, + "p90": 36.22400015592575, + "p95": 42.11200028657913, + "p99": 48.767998814582825 + }, + "combine": { + "p50": 36.06399893760681, + "p90": 45.75999826192856, + "p95": 52.2879995405674, + "p99": 84.1279998421669 + }, + "roundtrip": { + "p50": 1847.4880456924438, + "p90": 1861.0880374908447, + "p95": 1871.3279962539673, + "p99": 2004.607915878296 + }, + "isolatedSum": { + "p50": 64.83199819922447, + "p90": 81.98399841785431, + "p95": 94.39999982714653, + "p99": 132.89599865674973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.575999662280083, + "p90": 38.816001266241074, + "p95": 45.632001012563705, + "p99": 57.95200169086456 + }, + "combine": { + "p50": 41.69600084424019, + "p90": 59.93599817156792, + "p95": 68.06399673223495, + "p99": 170.30400037765503 + }, + "roundtrip": { + "p50": 1848.3840227127075, + "p90": 1869.920015335083, + "p95": 1881.9199800491333, + "p99": 1995.0400590896606 + }, + "isolatedSum": { + "p50": 70.27200050652027, + "p90": 98.75199943780899, + "p95": 113.69599774479866, + "p99": 228.2560020685196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 29.37600016593933, + "p90": 37.21600025892258, + "p95": 50.65599828958511, + "p99": 62.65600025653839 + }, + "combine": { + "p50": 47.520000487565994, + "p90": 61.664000153541565, + "p95": 68.57600063085556, + "p99": 103.2319962978363 + }, + "roundtrip": { + "p50": 1859.2000007629395, + "p90": 1878.6879777908325, + "p95": 1886.1440420150757, + "p99": 1924.1600036621094 + }, + "isolatedSum": { + "p50": 76.89600065350533, + "p90": 98.88000041246414, + "p95": 119.23199892044067, + "p99": 165.8879965543747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.039999797940254, + "p90": 43.83999854326248, + "p95": 53.63199859857559, + "p99": 66.01600348949432 + }, + "combine": { + "p50": 52.25599929690361, + "p90": 69.43999975919724, + "p95": 82.40000158548355, + "p99": 131.99999928474426 + }, + "roundtrip": { + "p50": 1864.0960454940796, + "p90": 1884.160041809082, + "p95": 1898.1759548187256, + "p99": 1969.1519737243652 + }, + "isolatedSum": { + "p50": 83.29599909484386, + "p90": 113.27999830245972, + "p95": 136.03200018405914, + "p99": 198.0160027742386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.79200026392937, + "p90": 45.27999833226204, + "p95": 49.31199923157692, + "p99": 58.14399942755699 + }, + "combine": { + "p50": 47.839999198913574, + "p90": 64.25599753856659, + "p95": 70.36799937486649, + "p99": 101.53599828481674 + }, + "roundtrip": { + "p50": 1865.056037902832, + "p90": 1881.5360069274902, + "p95": 1888.8959884643555, + "p99": 1917.7600145339966 + }, + "isolatedSum": { + "p50": 81.63199946284294, + "p90": 109.53599587082863, + "p95": 119.6799986064434, + "p99": 159.67999771237373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 40.44799879193306, + "p90": 49.6320016682148, + "p95": 52.799999713897705, + "p99": 64.96000289916992 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 81.31200075149536, + "p95": 98.7199991941452, + "p99": 231.1680018901825 + }, + "roundtrip": { + "p50": 1885.632038116455, + "p90": 1903.3279418945312, + "p95": 1914.080023765564, + "p99": 2039.776086807251 + }, + "isolatedSum": { + "p50": 104.0319986641407, + "p90": 130.94400241971016, + "p95": 151.5199989080429, + "p99": 296.1280047893524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.552001386880875, + "p90": 60.60799956321716, + "p95": 62.65600025653839, + "p99": 73.82400333881378 + }, + "combine": { + "p50": 86.81599795818329, + "p90": 96.19200229644775, + "p95": 108.47999900579453, + "p99": 146.7839926481247 + }, + "roundtrip": { + "p50": 1922.6560592651367, + "p90": 1938.4959936141968, + "p95": 1957.0879936218262, + "p99": 2130.3679943084717 + }, + "isolatedSum": { + "p50": 138.36799934506416, + "p90": 156.80000185966492, + "p95": 171.13599926233292, + "p99": 220.60799598693848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dbb437b5", + "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_7ec76e6d", + "comparisonKey": "9a87b27b98bf2d7a", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:35.330044+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271656517", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", + "createdAt": "2026-06-26T23:49:35Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 28.76799926161766, + "p90": 38.88000175356865, + "p95": 44.73600164055824, + "p99": 61.15199998021126 + }, + "combine": { + "p50": 36.768000572919846, + "p90": 48.287998884916306, + "p95": 57.53599852323532, + "p99": 90.81599861383438 + }, + "roundtrip": { + "p50": 1847.7439880371094, + "p90": 1855.6159734725952, + "p95": 1860.543966293335, + "p99": 1893.2160139083862 + }, + "isolatedSum": { + "p50": 65.5359998345375, + "p90": 87.16800063848495, + "p95": 102.27200016379356, + "p99": 151.96799859404564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.991999104619026, + "p90": 33.376000821590424, + "p95": 37.02399879693985, + "p99": 41.05599969625473 + }, + "combine": { + "p50": 37.59999945759773, + "p90": 49.375999718904495, + "p95": 58.62399935722351, + "p99": 235.83999276161194 + }, + "roundtrip": { + "p50": 1847.6799726486206, + "p90": 1855.936050415039, + "p95": 1861.4720106124878, + "p99": 1959.007978439331 + }, + "isolatedSum": { + "p50": 66.59199856221676, + "p90": 82.75200054049492, + "p95": 95.64799815416336, + "p99": 276.89599245786667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.16000021994114, + "p90": 52.41600051522255, + "p95": 58.59199911355972, + "p99": 83.23200047016144 + }, + "combine": { + "p50": 36.959998309612274, + "p90": 48.06400090456009, + "p95": 54.59199845790863, + "p99": 94.59199756383896 + }, + "roundtrip": { + "p50": 1848.3200073242188, + "p90": 1858.62398147583, + "p95": 1864.5440340042114, + "p99": 1925.9519577026367 + }, + "isolatedSum": { + "p50": 65.11999852955341, + "p90": 100.48000141978264, + "p95": 113.18399757146835, + "p99": 177.8239980340004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 29.7279991209507, + "p90": 36.3520011305809, + "p95": 41.08799993991852, + "p99": 52.191998809576035 + }, + "combine": { + "p50": 37.88800165057182, + "p90": 50.52800104022026, + "p95": 61.24800071120262, + "p99": 175.7120043039322 + }, + "roundtrip": { + "p50": 1849.4080305099487, + "p90": 1862.7519607543945, + "p95": 1875.4240274429321, + "p99": 1930.5599927902222 + }, + "isolatedSum": { + "p50": 67.61600077152252, + "p90": 86.88000217080116, + "p95": 102.33600065112114, + "p99": 227.90400311350822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.775999814271927, + "p90": 37.856001406908035, + "p95": 43.007999658584595, + "p99": 52.2879995405674 + }, + "combine": { + "p50": 41.280001401901245, + "p90": 52.319999784231186, + "p95": 64.41599875688553, + "p99": 140.28799533843994 + }, + "roundtrip": { + "p50": 1854.848027229309, + "p90": 1876.3200044631958, + "p95": 1915.3599739074707, + "p99": 1982.6879501342773 + }, + "isolatedSum": { + "p50": 73.05600121617317, + "p90": 90.17600119113922, + "p95": 107.42399841547012, + "p99": 192.57599487900734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.344000577926636, + "p90": 36.159999668598175, + "p95": 38.30400109291077, + "p99": 46.14400118589401 + }, + "combine": { + "p50": 46.30399867892265, + "p90": 56.223999708890915, + "p95": 66.49599969387054, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 1862.8159761428833, + "p90": 1875.2959966659546, + "p95": 1890.6559944152832, + "p99": 1946.6559886932373 + }, + "isolatedSum": { + "p50": 79.64799925684929, + "p90": 92.38399937748909, + "p95": 104.80000078678131, + "p99": 155.39199858903885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 39.68000039458275, + "p90": 51.58400163054466, + "p95": 57.72799998521805, + "p99": 97.63199836015701 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 75.29599964618683, + "p95": 94.2080020904541, + "p99": 319.7759985923767 + }, + "roundtrip": { + "p50": 1882.3360204696655, + "p90": 1892.0639753341675, + "p95": 1907.5520038604736, + "p99": 1997.3440170288086 + }, + "isolatedSum": { + "p50": 100.38400068879128, + "p90": 126.88000127673149, + "p95": 151.93600207567215, + "p99": 417.4079969525337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.552001386880875, + "p90": 55.07199838757515, + "p95": 59.007998555898666, + "p99": 66.11199676990509 + }, + "combine": { + "p50": 86.43200248479843, + "p90": 93.08800101280212, + "p95": 100.89600086212158, + "p99": 167.10400581359863 + }, + "roundtrip": { + "p50": 1921.3759899139404, + "p90": 1930.4640293121338, + "p95": 1935.968041419983, + "p99": 1968.6399698257446 + }, + "isolatedSum": { + "p50": 137.9840038716793, + "p90": 148.15999940037727, + "p95": 159.90399941802025, + "p99": 233.21600258350372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1caa7ff5", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_df102230", + "comparisonKey": "2ce1d8f2e79d5005", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:08.227503+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254435010", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", + "createdAt": "2026-06-26T17:29:12Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 28.736000880599022, + "p90": 42.24000126123428, + "p95": 44.76799815893173, + "p99": 50.97600072622299 + }, + "combine": { + "p50": 37.087999284267426, + "p90": 44.256001710891724, + "p95": 49.6320016682148, + "p99": 65.60000032186508 + }, + "roundtrip": { + "p50": 1824.4800567626953, + "p90": 1831.7760229110718, + "p95": 1838.3680582046509, + "p99": 1884.1919898986816 + }, + "isolatedSum": { + "p50": 65.82400016486645, + "p90": 86.496002972126, + "p95": 94.39999982714653, + "p99": 116.57600104808807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.00000086426735, + "p90": 33.31200033426285, + "p95": 37.66399994492531, + "p99": 50.36799982190132 + }, + "combine": { + "p50": 36.86400130391121, + "p90": 45.27999833226204, + "p95": 51.29599943757057, + "p99": 124.1919994354248 + }, + "roundtrip": { + "p50": 1824.9599933624268, + "p90": 1835.4239463806152, + "p95": 1843.8400030136108, + "p99": 1961.7279767990112 + }, + "isolatedSum": { + "p50": 64.86400216817856, + "p90": 78.59199866652489, + "p95": 88.95999938249588, + "p99": 174.55999925732613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.48000079393387, + "p90": 33.59999880194664, + "p95": 36.41600161790848, + "p99": 42.33599826693535 + }, + "combine": { + "p50": 37.53599897027016, + "p90": 47.839999198913574, + "p95": 62.144000083208084, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 1825.8240222930908, + "p90": 1833.9519500732422, + "p95": 1842.0480489730835, + "p99": 1925.0880479812622 + }, + "isolatedSum": { + "p50": 66.01599976420403, + "p90": 81.43999800086021, + "p95": 98.56000170111656, + "p99": 178.78399416804314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 28.28799933195114, + "p90": 32.00000151991844, + "p95": 34.55999866127968, + "p99": 39.744000881910324 + }, + "combine": { + "p50": 37.43999823927879, + "p90": 46.78399860858917, + "p95": 53.69599908590317, + "p99": 124.64000284671783 + }, + "roundtrip": { + "p50": 1826.3360261917114, + "p90": 1834.1439962387085, + "p95": 1840.1600122451782, + "p99": 1865.6320571899414 + }, + "isolatedSum": { + "p50": 65.72799757122993, + "p90": 78.78400012850761, + "p95": 88.25599774718285, + "p99": 164.38400372862816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.136000528931618, + "p90": 34.94400158524513, + "p95": 37.856001406908035, + "p99": 46.39999940991402 + }, + "combine": { + "p50": 39.264000952243805, + "p90": 44.28799822926521, + "p95": 46.46399989724159, + "p99": 77.85599678754807 + }, + "roundtrip": { + "p50": 1830.4959535598755, + "p90": 1838.304042816162, + "p95": 1842.78404712677, + "p99": 1957.919955253601 + }, + "isolatedSum": { + "p50": 70.40000148117542, + "p90": 79.23199981451035, + "p95": 84.32000130414963, + "p99": 124.25599619746208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 32.896000891923904, + "p90": 35.96799820661545, + "p95": 39.135999977588654, + "p99": 45.56800052523613 + }, + "combine": { + "p50": 45.791998505592346, + "p90": 54.016001522541046, + "p95": 83.0719992518425, + "p99": 153.56799960136414 + }, + "roundtrip": { + "p50": 1840.1600122451782, + "p90": 1847.5840091705322, + "p95": 1853.9199829101562, + "p99": 1896.1600065231323 + }, + "isolatedSum": { + "p50": 78.68799939751625, + "p90": 89.9839997291565, + "p95": 122.20799922943115, + "p99": 199.13600012660027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 38.84800150990486, + "p90": 42.43199899792671, + "p95": 47.16800153255463, + "p99": 62.144000083208084 + }, + "combine": { + "p50": 59.67999994754791, + "p90": 66.14399701356888, + "p95": 83.16799998283386, + "p99": 121.21599912643433 + }, + "roundtrip": { + "p50": 1859.5199584960938, + "p90": 1866.495966911316, + "p95": 1875.264048576355, + "p99": 1916.1280393600464 + }, + "isolatedSum": { + "p50": 98.52800145745277, + "p90": 108.57599601149559, + "p95": 130.3360015153885, + "p99": 183.3599992096424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.80799961090088, + "p90": 55.135998874902725, + "p95": 59.776000678539276, + "p99": 68.83200258016586 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 92.03200042247772, + "p95": 95.74399888515472, + "p99": 156.41599893569946 + }, + "roundtrip": { + "p50": 1899.392008781433, + "p90": 1905.2480459213257, + "p95": 1909.440040588379, + "p99": 1973.3760356903076 + }, + "isolatedSum": { + "p50": 138.20800185203552, + "p90": 147.16799929738045, + "p95": 155.519999563694, + "p99": 225.24800151586533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5888aff1", + "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "a14fc35e02b01662", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:49.842184+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271748233", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271748233", + "createdAt": "2026-06-26T23:52:22Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.96799993515015, + "p90": 112.70400136709213, + "p95": 120.70400267839432, + "p99": 143.8400000333786 + }, + "combine": { + "p50": 83.29600095748901, + "p90": 93.40800344944, + "p95": 99.29600358009338, + "p99": 117.44000017642975 + }, + "roundtrip": { + "p50": 151.2639969587326, + "p90": 170.78399658203125, + "p95": 179.32799458503723, + "p99": 211.93599700927734 + }, + "isolatedSum": { + "p50": 175.26400089263916, + "p90": 206.11200481653214, + "p95": 220.0000062584877, + "p99": 261.28000020980835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.7120012640953, + "p90": 129.82399761676788, + "p95": 141.59999787807465, + "p99": 159.58400070667267 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 119.93599683046341, + "p95": 123.83999675512314, + "p99": 136.22400164604187 + }, + "roundtrip": { + "p50": 195.42400538921356, + "p90": 218.4000015258789, + "p95": 231.51999711990356, + "p99": 307.16800689697266 + }, + "isolatedSum": { + "p50": 216.0639986395836, + "p90": 249.7599944472313, + "p95": 265.4399946331978, + "p99": 295.80800235271454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 147.23199605941772, + "p90": 165.12000560760498, + "p95": 172.992005944252, + "p99": 204.6079933643341 + }, + "combine": { + "p50": 153.53600680828094, + "p90": 168.2240068912506, + "p95": 175.90400576591492, + "p99": 192.09599494934082 + }, + "roundtrip": { + "p50": 270.8800137042999, + "p90": 295.1680123806, + "p95": 303.77599596977234, + "p99": 446.8800127506256 + }, + "isolatedSum": { + "p50": 300.76800286769867, + "p90": 333.3440124988556, + "p95": 348.89601171016693, + "p99": 396.7039883136749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 214.52799439430237, + "p90": 237.63200640678406, + "p95": 244.9920028448105, + "p99": 282.5919985771179 + }, + "combine": { + "p50": 249.08800423145294, + "p90": 261.0880136489868, + "p95": 267.8079903125763, + "p99": 287.7439856529236 + }, + "roundtrip": { + "p50": 438.27199935913086, + "p90": 458.24000239372253, + "p95": 469.88800168037415, + "p99": 508.1599950790405 + }, + "isolatedSum": { + "p50": 463.6159986257553, + "p90": 498.7200200557709, + "p95": 512.7999931573868, + "p99": 570.3359842300415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 352.1279990673065, + "p90": 375.0720024108887, + "p95": 387.1999979019165, + "p99": 523.360013961792 + }, + "combine": { + "p50": 419.9039936065674, + "p90": 433.8560104370117, + "p95": 441.536009311676, + "p99": 501.6319751739502 + }, + "roundtrip": { + "p50": 744.5759773254395, + "p90": 766.4960026741028, + "p95": 777.3119807243347, + "p99": 837.7919793128967 + }, + "isolatedSum": { + "p50": 772.0319926738739, + "p90": 808.9280128479004, + "p95": 828.7360072135925, + "p99": 1024.9919891357422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 629.6319961547852, + "p90": 648.9279866218567, + "p95": 656.2560200691223, + "p99": 715.1039838790894 + }, + "combine": { + "p50": 754.368007183075, + "p90": 767.1359777450562, + "p95": 774.5919823646545, + "p99": 917.5040125846863 + }, + "roundtrip": { + "p50": 1354.0480136871338, + "p90": 1376.4159679412842, + "p95": 1387.8079652786255, + "p99": 1428.8320541381836 + }, + "isolatedSum": { + "p50": 1384.00000333786, + "p90": 1416.0639643669128, + "p95": 1430.8480024337769, + "p99": 1632.6079964637756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b183f57f", + "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "6953183723230449", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:18.715974+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271763623", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271763623", + "createdAt": "2026-06-26T23:52:49Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.69599884748459, + "p90": 105.95200210809708, + "p95": 110.68800091743469, + "p99": 117.37599968910217 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 95.64799815416336, + "p95": 98.65599870681763, + "p99": 108.03200304508209 + }, + "roundtrip": { + "p50": 164.32000696659088, + "p90": 174.01599884033203, + "p95": 181.0240000486374, + "p99": 201.56799256801605 + }, + "isolatedSum": { + "p50": 188.03200125694275, + "p90": 201.60000026226044, + "p95": 209.34399962425232, + "p99": 225.40800273418427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.79999661445618, + "p90": 143.96800100803375, + "p95": 151.96800231933594, + "p99": 176.57600343227386 + }, + "combine": { + "p50": 119.71200257539749, + "p90": 133.56800377368927, + "p95": 140.09599387645721, + "p99": 156.70399367809296 + }, + "roundtrip": { + "p50": 216.48000180721283, + "p90": 235.35999655723572, + "p95": 243.00800263881683, + "p99": 263.71198892593384 + }, + "isolatedSum": { + "p50": 244.51199918985367, + "p90": 277.536004781723, + "p95": 292.06399619579315, + "p99": 333.2799971103668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 161.6320013999939, + "p90": 176.54399573802948, + "p95": 185.47199666500092, + "p99": 204.96000349521637 + }, + "combine": { + "p50": 177.47199535369873, + "p90": 187.74400651454926, + "p95": 193.88799369335175, + "p99": 218.27200055122375 + }, + "roundtrip": { + "p50": 309.2159926891327, + "p90": 327.2320032119751, + "p95": 333.1199884414673, + "p99": 373.1519877910614 + }, + "isolatedSum": { + "p50": 339.1039967536926, + "p90": 364.28800225257874, + "p95": 379.35999035835266, + "p99": 423.2320040464401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 242.97599494457245, + "p90": 263.264000415802, + "p95": 271.10400795936584, + "p99": 296.54398560523987 + }, + "combine": { + "p50": 279.6800136566162, + "p90": 291.55200719833374, + "p95": 296.7039942741394, + "p99": 321.82401418685913 + }, + "roundtrip": { + "p50": 498.30400943756104, + "p90": 516.0959959030151, + "p95": 529.4719934463501, + "p99": 696.6400146484375 + }, + "isolatedSum": { + "p50": 522.6560086011887, + "p90": 554.8160076141357, + "p95": 567.8080022335052, + "p99": 618.367999792099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 402.52798795700073, + "p90": 414.46399688720703, + "p95": 420.28799653053284, + "p99": 450.72001218795776 + }, + "combine": { + "p50": 478.7839949131012, + "p90": 488.22399973869324, + "p95": 490.4960095882416, + "p99": 499.07198548316956 + }, + "roundtrip": { + "p50": 857.6639890670776, + "p90": 869.3439960479736, + "p95": 882.3680281639099, + "p99": 1592.25594997406 + }, + "isolatedSum": { + "p50": 881.3119828701019, + "p90": 902.6879966259003, + "p95": 910.7840061187744, + "p99": 949.7919976711273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 750.4640221595764, + "p90": 770.9119915962219, + "p95": 780.6079983711243, + "p99": 812.3199939727783 + }, + "combine": { + "p50": 873.1840252876282, + "p90": 885.6319785118103, + "p95": 893.4080004692078, + "p99": 941.9839978218079 + }, + "roundtrip": { + "p50": 1586.143970489502, + "p90": 1606.112003326416, + "p95": 1623.5840320587158, + "p99": 1662.7839803695679 + }, + "isolatedSum": { + "p50": 1623.6480474472046, + "p90": 1656.5439701080322, + "p95": 1674.015998840332, + "p99": 1754.3039917945862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96267e21", + "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "27afbf0ad63e86ca", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:01.688428+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271778692", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271778692", + "createdAt": "2026-06-26T23:53:16Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.64000022411346, + "p90": 120.70400267839432, + "p95": 131.58400356769562, + "p99": 146.2399959564209 + }, + "combine": { + "p50": 95.71199864149094, + "p90": 103.67999970912933, + "p95": 112.73600161075592, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 181.0240000486374, + "p90": 199.2959976196289, + "p95": 207.16799795627594, + "p99": 244.9280023574829 + }, + "isolatedSum": { + "p50": 204.3519988656044, + "p90": 224.38400238752365, + "p95": 244.32000517845154, + "p99": 267.7439972758293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.29600203037262, + "p90": 148.00000190734863, + "p95": 157.72800147533417, + "p99": 182.20800161361694 + }, + "combine": { + "p50": 128.31999361515045, + "p90": 139.74399864673615, + "p95": 145.7280069589615, + "p99": 158.75199437141418 + }, + "roundtrip": { + "p50": 235.6480062007904, + "p90": 248.6400008201599, + "p95": 259.16799902915955, + "p99": 301.60000920295715 + }, + "isolatedSum": { + "p50": 263.61599564552307, + "p90": 287.7440005540848, + "p95": 303.45600843429565, + "p99": 340.9599959850311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 184.1920018196106, + "p90": 199.8080015182495, + "p95": 208.48000049591064, + "p99": 231.90400004386902 + }, + "combine": { + "p50": 198.62399995326996, + "p90": 212.0320051908493, + "p95": 221.18400037288666, + "p99": 289.7599935531616 + }, + "roundtrip": { + "p50": 349.4719862937927, + "p90": 366.3040101528168, + "p95": 376.8320083618164, + "p99": 431.2959909439087 + }, + "isolatedSum": { + "p50": 382.81600177288055, + "p90": 411.8400067090988, + "p95": 429.6640008687973, + "p99": 521.6639935970306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 278.0799865722656, + "p90": 296.671986579895, + "p95": 305.759996175766, + "p99": 346.8799889087677 + }, + "combine": { + "p50": 313.1519854068756, + "p90": 324.6079981327057, + "p95": 331.9680094718933, + "p99": 350.5600094795227 + }, + "roundtrip": { + "p50": 563.1999969482422, + "p90": 577.9839754104614, + "p95": 589.5040035247803, + "p99": 688.9920234680176 + }, + "isolatedSum": { + "p50": 591.2319719791412, + "p90": 621.2799847126007, + "p95": 637.7280056476593, + "p99": 697.4399983882904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 465.8240079879761, + "p90": 484.3200147151947, + "p95": 496.2559938430786, + "p99": 558.8799715042114 + }, + "combine": { + "p50": 544.3519949913025, + "p90": 560.1599812507629, + "p95": 564.9600028991699, + "p99": 624.0959763526917 + }, + "roundtrip": { + "p50": 981.0879826545715, + "p90": 996.3520169258118, + "p95": 1007.7439546585083, + "p99": 1077.1839618682861 + }, + "isolatedSum": { + "p50": 1010.1760029792786, + "p90": 1044.4799959659576, + "p95": 1061.2159967422485, + "p99": 1182.975947856903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 858.2080006599426, + "p90": 877.9839873313904, + "p95": 884.0000033378601, + "p99": 925.6640076637268 + }, + "combine": { + "p50": 981.98401927948, + "p90": 994.4959878921509, + "p95": 1000.9280443191528, + "p99": 1111.9040250778198 + }, + "roundtrip": { + "p50": 1810.1119995117188, + "p90": 1826.0159492492676, + "p95": 1833.7279558181763, + "p99": 1947.551965713501 + }, + "isolatedSum": { + "p50": 1840.1920199394226, + "p90": 1872.4799752235413, + "p95": 1884.928047657013, + "p99": 2037.5680327415466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bc48bfe5", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", + "colorKey": "h200_d982b749", + "comparisonKey": "6da1f9e2ab025dbe", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:28.417730+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "157ca81687ddb63", + "workloadId": "set:3:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271827040", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271827040", + "createdAt": "2026-06-26T23:54:52Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.80000275373459, + "p90": 135.80800592899323, + "p95": 142.14399456977844, + "p99": 172.7679967880249 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 121.56800180673599, + "p95": 125.72799623012543, + "p99": 150.65599977970123 + }, + "roundtrip": { + "p50": 195.77600061893463, + "p90": 216.22399985790253, + "p95": 222.9440063238144, + "p99": 267.67998933792114 + }, + "isolatedSum": { + "p50": 221.15200012922287, + "p90": 257.3760077357292, + "p95": 267.87199079990387, + "p99": 323.42399656772614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.36800611019135, + "p90": 223.1999933719635, + "p95": 231.58399760723114, + "p99": 263.7439966201782 + }, + "combine": { + "p50": 223.93600642681122, + "p90": 236.32000386714935, + "p95": 241.88800156116486, + "p99": 258.7839961051941 + }, + "roundtrip": { + "p50": 399.58399534225464, + "p90": 417.279988527298, + "p95": 424.4160056114197, + "p99": 459.77601408958435 + }, + "isolatedSum": { + "p50": 426.30401253700256, + "p90": 459.51999723911285, + "p95": 473.471999168396, + "p99": 522.5279927253723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 542.5919890403748, + "p90": 557.5039982795715, + "p95": 563.3280277252197, + "p99": 587.8080129623413 + }, + "combine": { + "p50": 619.1999912261963, + "p90": 634.5599889755249, + "p95": 646.3040113449097, + "p99": 683.8080286979675 + }, + "roundtrip": { + "p50": 1131.1999559402466, + "p90": 1146.720051765442, + "p95": 1155.743956565857, + "p99": 1289.952039718628 + }, + "isolatedSum": { + "p50": 1161.791980266571, + "p90": 1192.0639872550964, + "p95": 1209.6320390701294, + "p99": 1271.6160416603088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5553e87c", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_d982b749", + "comparisonKey": "6da1f9e2ab025dbe", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:31.030615+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271605214", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271605214", + "createdAt": "2026-06-26T23:47:59Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.64000153541565, + "p90": 132.9600065946579, + "p95": 139.80799913406372, + "p99": 183.1039935350418 + }, + "combine": { + "p50": 106.11200332641602, + "p90": 121.08799815177917, + "p95": 127.61600315570831, + "p99": 162.7199947834015 + }, + "roundtrip": { + "p50": 197.11999595165253, + "p90": 216.67200326919556, + "p95": 225.2800017595291, + "p99": 246.75199389457703 + }, + "isolatedSum": { + "p50": 222.75200486183167, + "p90": 254.04800474643707, + "p95": 267.42400228977203, + "p99": 345.8239883184433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.8719928264618, + "p90": 166.143998503685, + "p95": 172.7360039949417, + "p99": 195.8719938993454 + }, + "combine": { + "p50": 143.327996134758, + "p90": 159.743994474411, + "p95": 162.81600296497345, + "p99": 171.7119961977005 + }, + "roundtrip": { + "p50": 260.70401072502136, + "p90": 280.8319926261902, + "p95": 286.27198934555054, + "p99": 329.3119966983795 + }, + "isolatedSum": { + "p50": 287.1999889612198, + "p90": 325.887992978096, + "p95": 335.55200695991516, + "p99": 367.5839900970459 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.23200523853302, + "p90": 227.00800001621246, + "p95": 239.07199501991272, + "p99": 277.1199941635132 + }, + "combine": { + "p50": 224.60800409317017, + "p90": 241.31199717521667, + "p95": 248.44799935817719, + "p99": 268.22400093078613 + }, + "roundtrip": { + "p50": 403.0719995498657, + "p90": 426.68798565864563, + "p95": 434.4640076160431, + "p99": 486.01600527763367 + }, + "isolatedSum": { + "p50": 427.8400093317032, + "p90": 468.31999719142914, + "p95": 487.5199943780899, + "p99": 545.3439950942993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.7520124912262, + "p90": 335.90400218963623, + "p95": 347.51999378204346, + "p99": 390.9119963645935 + }, + "combine": { + "p50": 357.9519987106323, + "p90": 372.1280097961426, + "p95": 378.9440095424652, + "p99": 416.6080057621002 + }, + "roundtrip": { + "p50": 646.7199921607971, + "p90": 668.3200001716614, + "p95": 684.4800114631653, + "p99": 754.4959783554077 + }, + "isolatedSum": { + "p50": 672.7040112018585, + "p90": 708.0320119857788, + "p95": 726.4640033245087, + "p99": 807.5200021266937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 542.0799851417542, + "p90": 560.1279735565186, + "p95": 575.3600001335144, + "p99": 736.2880110740662 + }, + "combine": { + "p50": 621.8879818916321, + "p90": 636.031985282898, + "p95": 641.6959762573242, + "p99": 732.7359914779663 + }, + "roundtrip": { + "p50": 1137.279987335205, + "p90": 1170.591950416565, + "p95": 1213.7600183486938, + "p99": 1369.6320056915283 + }, + "isolatedSum": { + "p50": 1163.9679670333862, + "p90": 1196.1599588394165, + "p95": 1217.0559763908386, + "p99": 1469.0240025520325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 997.3120093345642, + "p90": 1021.28005027771, + "p95": 1029.7919511795044, + "p99": 1212.8000259399414 + }, + "combine": { + "p50": 1121.6000318527222, + "p90": 1139.456033706665, + "p95": 1149.2160558700562, + "p99": 1185.4079961776733 + }, + "roundtrip": { + "p50": 2089.888095855713, + "p90": 2112.6720905303955, + "p95": 2126.431941986084, + "p99": 2277.951955795288 + }, + "isolatedSum": { + "p50": 2118.9120411872864, + "p90": 2160.736083984375, + "p95": 2179.0080070495605, + "p99": 2398.2080221176147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71f62108", + "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "c80c3e7446de9680", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:05.486154+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271618490", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271618490", + "createdAt": "2026-06-26T23:48:27Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.68800222873688, + "p90": 127.3919939994812, + "p95": 133.31200182437897, + "p99": 144.57599818706512 + }, + "combine": { + "p50": 105.8880016207695, + "p90": 112.76800185441971, + "p95": 117.79200285673141, + "p99": 129.72800433635712 + }, + "roundtrip": { + "p50": 199.35999810695648, + "p90": 209.4399929046631, + "p95": 215.7440036535263, + "p99": 257.82400369644165 + }, + "isolatedSum": { + "p50": 224.57600384950638, + "p90": 240.1599958539009, + "p95": 251.10400468111038, + "p99": 274.30400252342224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.4080045223236, + "p90": 154.91199493408203, + "p95": 159.2639982700348, + "p99": 170.6559956073761 + }, + "combine": { + "p50": 144.3520039319992, + "p90": 150.59199929237366, + "p95": 153.05599570274353, + "p99": 167.4879938364029 + }, + "roundtrip": { + "p50": 263.5200023651123, + "p90": 270.3680098056793, + "p95": 274.7200131416321, + "p99": 291.1039888858795 + }, + "isolatedSum": { + "p50": 289.7600084543228, + "p90": 305.5039942264557, + "p95": 312.3199939727783, + "p99": 338.143989443779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.1839977502823, + "p90": 219.04000639915466, + "p95": 227.743998169899, + "p99": 242.5920069217682 + }, + "combine": { + "p50": 221.50400280952454, + "p90": 232.96000063419342, + "p95": 239.58399891853333, + "p99": 263.0400061607361 + }, + "roundtrip": { + "p50": 397.8239893913269, + "p90": 412.03200817108154, + "p95": 421.08801007270813, + "p99": 463.8400077819824 + }, + "isolatedSum": { + "p50": 426.6880005598068, + "p90": 452.0000070333481, + "p95": 467.3279970884323, + "p99": 505.6320130825043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 316.0000145435333, + "p90": 324.0959942340851, + "p95": 328.99200916290283, + "p99": 351.6159951686859 + }, + "combine": { + "p50": 350.17600655555725, + "p90": 358.5599958896637, + "p95": 363.2960021495819, + "p99": 392.8639888763428 + }, + "roundtrip": { + "p50": 639.4559741020203, + "p90": 655.1039814949036, + "p95": 665.3760075569153, + "p99": 768.8000202178955 + }, + "isolatedSum": { + "p50": 666.1760210990906, + "p90": 682.6559901237488, + "p95": 692.2880113124847, + "p99": 744.4799840450287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 527.4559855461121, + "p90": 543.936014175415, + "p95": 551.3280034065247, + "p99": 568.5439705848694 + }, + "combine": { + "p50": 612.384021282196, + "p90": 627.3279786109924, + "p95": 639.519989490509, + "p99": 984.5119714736938 + }, + "roundtrip": { + "p50": 1111.6160154342651, + "p90": 1130.6240558624268, + "p95": 1139.7759914398193, + "p99": 1297.5679636001587 + }, + "isolatedSum": { + "p50": 1139.840006828308, + "p90": 1171.2639927864075, + "p95": 1190.8479928970337, + "p99": 1553.0559420585632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.4959878921509, + "p90": 1017.6960229873657, + "p95": 1024.7360467910767, + "p99": 1044.8640584945679 + }, + "combine": { + "p50": 1103.9680242538452, + "p90": 1115.7439947128296, + "p95": 1122.3039627075195, + "p99": 1306.1439990997314 + }, + "roundtrip": { + "p50": 2064.448118209839, + "p90": 2089.344024658203, + "p95": 2106.0800552368164, + "p99": 2285.504102706909 + }, + "isolatedSum": { + "p50": 2098.464012145996, + "p90": 2133.4400177001953, + "p95": 2147.040009498596, + "p99": 2351.0080575942993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a8fb4d9b", + "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "f6581a3621ac6cd2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:25.459367+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271732597", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271732597", + "createdAt": "2026-06-26T23:51:54Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.90400338172913, + "p90": 134.14399325847626, + "p95": 140.35199582576752, + "p99": 160.38399934768677 + }, + "combine": { + "p50": 104.09600287675858, + "p90": 119.71200257539749, + "p95": 124.64000284671783, + "p99": 145.31199634075165 + }, + "roundtrip": { + "p50": 195.64799964427948, + "p90": 212.8639966249466, + "p95": 219.9999988079071, + "p99": 230.3680032491684 + }, + "isolatedSum": { + "p50": 220.0000062584877, + "p90": 253.85599583387375, + "p95": 264.99199867248535, + "p99": 305.6959956884384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.53600549697876, + "p90": 162.30399906635284, + "p95": 170.3999936580658, + "p99": 184.64000523090363 + }, + "combine": { + "p50": 143.77599954605103, + "p90": 157.21599757671356, + "p95": 162.27200627326965, + "p99": 175.64800381660461 + }, + "roundtrip": { + "p50": 265.1199996471405, + "p90": 283.90398621559143, + "p95": 289.0239953994751, + "p99": 302.0159900188446 + }, + "isolatedSum": { + "p50": 289.3120050430298, + "p90": 319.5199966430664, + "p95": 332.67199993133545, + "p99": 360.28800904750824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.4399996995926, + "p90": 224.0000069141388, + "p95": 232.06399381160736, + "p99": 262.719988822937 + }, + "combine": { + "p50": 225.0880002975464, + "p90": 243.96799504756927, + "p95": 250.0160038471222, + "p99": 335.55200695991516 + }, + "roundtrip": { + "p50": 403.55199575424194, + "p90": 432.8959882259369, + "p95": 447.1360146999359, + "p99": 589.6000266075134 + }, + "isolatedSum": { + "p50": 430.527999997139, + "p90": 467.96800196170807, + "p95": 482.07999765872955, + "p99": 598.2719957828522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.79999017715454, + "p90": 334.7199857234955, + "p95": 340.1919901371002, + "p99": 368.73599886894226 + }, + "combine": { + "p50": 356.1280071735382, + "p90": 367.45598912239075, + "p95": 372.6719915866852, + "p99": 395.77600359916687 + }, + "roundtrip": { + "p50": 643.1999802589417, + "p90": 657.3759913444519, + "p95": 663.7439727783203, + "p99": 708.1599831581116 + }, + "isolatedSum": { + "p50": 668.9279973506927, + "p90": 702.1759748458862, + "p95": 712.8639817237854, + "p99": 764.5120024681091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.2639908790588, + "p90": 541.9520139694214, + "p95": 552.2559881210327, + "p99": 611.3280057907104 + }, + "combine": { + "p50": 611.0079884529114, + "p90": 623.0080127716064, + "p95": 630.3359866142273, + "p99": 657.2480201721191 + }, + "roundtrip": { + "p50": 1108.7679862976074, + "p90": 1123.9999532699585, + "p95": 1132.3200464248657, + "p99": 1233.63196849823 + }, + "isolatedSum": { + "p50": 1134.2719793319702, + "p90": 1164.9600267410278, + "p95": 1182.59197473526, + "p99": 1268.5760259628296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.5119714736938, + "p90": 1019.4560289382935, + "p95": 1036.128044128418, + "p99": 1103.0399799346924 + }, + "combine": { + "p50": 1114.6559715270996, + "p90": 1129.472017288208, + "p95": 1136.896014213562, + "p99": 1180.3200244903564 + }, + "roundtrip": { + "p50": 2057.408094406128, + "p90": 2091.423988342285, + "p95": 2103.264093399048, + "p99": 2406.8479537963867 + }, + "isolatedSum": { + "p50": 2099.1679430007935, + "p90": 2148.9280462265015, + "p95": 2173.02405834198, + "p99": 2283.360004425049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ad612267", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_b5c683eb", + "comparisonKey": "b18bebc70bf6167d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:03.036669+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272035224", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272035224", + "createdAt": "2026-06-27T00:01:30Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.60799646377563, + "p90": 151.8400013446808, + "p95": 157.3760062456131, + "p99": 181.47200345993042 + }, + "combine": { + "p50": 125.40799379348755, + "p90": 146.59200608730316, + "p95": 152.73599326610565, + "p99": 228.5439968109131 + }, + "roundtrip": { + "p50": 230.20799458026886, + "p90": 244.51200664043427, + "p95": 253.4080147743225, + "p99": 302.2719919681549 + }, + "isolatedSum": { + "p50": 258.0159902572632, + "p90": 298.43200743198395, + "p95": 310.11199951171875, + "p99": 410.0160002708435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 175.99999904632568, + "p90": 196.83200120925903, + "p95": 202.2400051355362, + "p99": 229.5680046081543 + }, + "combine": { + "p50": 175.58400332927704, + "p90": 189.82400000095367, + "p95": 193.79200041294098, + "p99": 265.5999958515167 + }, + "roundtrip": { + "p50": 323.0719864368439, + "p90": 339.29601311683655, + "p95": 345.3119993209839, + "p99": 369.4399893283844 + }, + "isolatedSum": { + "p50": 351.5840023756027, + "p90": 386.6560012102127, + "p95": 396.0320055484772, + "p99": 495.168000459671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 275.35998821258545, + "p90": 301.472008228302, + "p95": 311.19999289512634, + "p99": 359.0080142021179 + }, + "combine": { + "p50": 268.5120105743408, + "p90": 284.38401222229004, + "p95": 289.3120050430298, + "p99": 321.6319978237152 + }, + "roundtrip": { + "p50": 519.9040174484253, + "p90": 549.2479801177979, + "p95": 559.6160292625427, + "p99": 602.4960279464722 + }, + "isolatedSum": { + "p50": 543.8719987869263, + "p90": 585.856020450592, + "p95": 600.5119979381561, + "p99": 680.6400120258331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 458.75200629234314, + "p90": 610.4320287704468, + "p95": 643.1999802589417, + "p99": 663.7120246887207 + }, + "combine": { + "p50": 451.3919949531555, + "p90": 462.911993265152, + "p95": 471.23199701309204, + "p99": 480.8639883995056 + }, + "roundtrip": { + "p50": 882.0160031318665, + "p90": 899.4879722595215, + "p95": 906.6879749298096, + "p99": 926.688015460968 + }, + "isolatedSum": { + "p50": 910.1440012454987, + "p90": 1073.3440220355988, + "p95": 1114.4319772720337, + "p99": 1144.5760130882263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 819.5520043373108, + "p90": 837.664008140564, + "p95": 856.3200235366821, + "p99": 920.5440282821655 + }, + "combine": { + "p50": 816.6080117225647, + "p90": 834.879994392395, + "p95": 846.9439744949341, + "p99": 919.264018535614 + }, + "roundtrip": { + "p50": 1605.247974395752, + "p90": 1634.1760158538818, + "p95": 1654.9760103225708, + "p99": 1745.8560466766357 + }, + "isolatedSum": { + "p50": 1636.1600160598755, + "p90": 1672.544002532959, + "p95": 1703.2639980316162, + "p99": 1839.8080468177795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1564.7679567337036, + "p90": 1586.0799551010132, + "p95": 1601.8879413604736, + "p99": 1723.0720520019531 + }, + "combine": { + "p50": 1521.9199657440186, + "p90": 1538.7840270996094, + "p95": 1547.104001045227, + "p99": 1626.911997795105 + }, + "roundtrip": { + "p50": 3057.663917541504, + "p90": 3078.3679485321045, + "p95": 3098.1760025024414, + "p99": 3246.783971786499 + }, + "isolatedSum": { + "p50": 3086.687922477722, + "p90": 3124.8639822006226, + "p95": 3148.9919424057007, + "p99": 3349.984049797058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-30f874f3", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", + "colorKey": "h200_b5c683eb", + "comparisonKey": "b18bebc70bf6167d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:38.753854+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "9e6ac678a09f7f8", + "workloadId": "set:3:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271834221", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271834221", + "createdAt": "2026-06-26T23:55:06Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.99999928474426, + "p90": 147.74399995803833, + "p95": 155.68000078201294, + "p99": 168.7680035829544 + }, + "combine": { + "p50": 126.01600587368011, + "p90": 139.74399864673615, + "p95": 146.08000218868256, + "p99": 156.73600137233734 + }, + "roundtrip": { + "p50": 229.8559993505478, + "p90": 251.583993434906, + "p95": 260.0319981575012, + "p99": 275.07200837135315 + }, + "isolatedSum": { + "p50": 258.0160051584244, + "p90": 287.4879986047745, + "p95": 301.7600029706955, + "p99": 325.50400495529175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 270.7520127296448, + "p90": 292.89600253105164, + "p95": 304.9600124359131, + "p99": 352.6400029659271 + }, + "combine": { + "p50": 268.5759961605072, + "p90": 281.76000714302063, + "p95": 287.200003862381, + "p99": 301.31199955940247 + }, + "roundtrip": { + "p50": 514.4960284233093, + "p90": 532.7360033988953, + "p95": 542.1119928359985, + "p99": 571.615993976593 + }, + "isolatedSum": { + "p50": 539.328008890152, + "p90": 574.6560096740723, + "p95": 592.1600162982941, + "p99": 653.9520025253296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 822.0800161361694, + "p90": 845.1840281486511, + "p95": 862.5919818878174, + "p99": 1313.3759498596191 + }, + "combine": { + "p50": 820.032000541687, + "p90": 837.7919793128967, + "p95": 846.3680148124695, + "p99": 873.3440041542053 + }, + "roundtrip": { + "p50": 1605.9520244598389, + "p90": 1629.3439865112305, + "p95": 1645.1200246810913, + "p99": 1737.1840476989746 + }, + "isolatedSum": { + "p50": 1642.1120166778564, + "p90": 1682.9760074615479, + "p95": 1708.9599967002869, + "p99": 2186.7199540138245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2c76343", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_d0dfa19a", + "comparisonKey": "4ade4ca52869383d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:42.077253+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": "set:3:388ff74baef05c72", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271841288", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271841288", + "createdAt": "2026-06-26T23:55:19Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 85.08799970149994, + "p90": 109.40799862146378, + "p95": 117.47200042009354, + "p99": 164.38399255275726 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 86.496002972126, + "p95": 91.26400202512741, + "p99": 106.20799660682678 + }, + "roundtrip": { + "p50": 132.51200318336487, + "p90": 162.7199947834015, + "p95": 173.8560050725937, + "p99": 221.5680032968521 + }, + "isolatedSum": { + "p50": 156.54399991035461, + "p90": 195.90400159358978, + "p95": 208.73600244522095, + "p99": 270.59198915958405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 111.00800335407257, + "p90": 130.43199479579926, + "p95": 136.80000603199005, + "p99": 165.75999557971954 + }, + "combine": { + "p50": 118.1119978427887, + "p90": 134.62400436401367, + "p95": 139.67999815940857, + "p99": 149.6639996767044 + }, + "roundtrip": { + "p50": 202.30400562286377, + "p90": 223.83999824523926, + "p95": 241.85599386692047, + "p99": 296.25600576400757 + }, + "isolatedSum": { + "p50": 229.12000119686127, + "p90": 265.0559991598129, + "p95": 276.4800041913986, + "p99": 315.42399525642395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 189.63199853897095, + "p90": 209.72800254821777, + "p95": 218.4319943189621, + "p99": 254.14401292800903 + }, + "combine": { + "p50": 284.960001707077, + "p90": 298.7520098686218, + "p95": 303.2959997653961, + "p99": 331.9999873638153 + }, + "roundtrip": { + "p50": 447.3919868469238, + "p90": 475.42399168014526, + "p95": 484.8639965057373, + "p99": 529.9519896507263 + }, + "isolatedSum": { + "p50": 474.592000246048, + "p90": 508.4800124168396, + "p95": 521.7279940843582, + "p99": 586.1440002918243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-79209ba6", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_06544e53", + "comparisonKey": "822fd37c7222ef9b", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:05.638717+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272038593", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272038593", + "createdAt": "2026-06-27T00:01:37Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.83999961614609, + "p90": 112.73600161075592, + "p95": 121.15199863910675, + "p99": 153.4080058336258 + }, + "combine": { + "p50": 83.99999886751175, + "p90": 97.6639986038208, + "p95": 104.22399640083313, + "p99": 116.89600348472595 + }, + "roundtrip": { + "p50": 154.1759967803955, + "p90": 176.32000148296356, + "p95": 183.45600366592407, + "p99": 211.29600703716278 + }, + "isolatedSum": { + "p50": 179.83999848365784, + "p90": 210.40000021457672, + "p95": 225.37599503993988, + "p99": 270.30400931835175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 115.55200070142746, + "p90": 135.68000495433807, + "p95": 143.39199662208557, + "p99": 163.26400637626648 + }, + "combine": { + "p50": 103.35999727249146, + "p90": 120.03199756145477, + "p95": 127.32799351215363, + "p99": 154.4319987297058 + }, + "roundtrip": { + "p50": 197.2160041332245, + "p90": 215.58399498462677, + "p95": 223.26399385929108, + "p99": 242.5599992275238 + }, + "isolatedSum": { + "p50": 218.91199797391891, + "p90": 255.71200251579285, + "p95": 270.7199901342392, + "p99": 317.6960051059723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 144.6080058813095, + "p90": 169.0559983253479, + "p95": 176.7680048942566, + "p99": 208.064004778862 + }, + "combine": { + "p50": 140.47999680042267, + "p90": 155.74400126934052, + "p95": 161.6639941930771, + "p99": 184.1920018196106 + }, + "roundtrip": { + "p50": 262.9440128803253, + "p90": 282.24000334739685, + "p95": 290.6560003757477, + "p99": 320.0640082359314 + }, + "isolatedSum": { + "p50": 285.0880026817322, + "p90": 324.7999995946884, + "p95": 338.4319990873337, + "p99": 392.2560065984726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 205.4080069065094, + "p90": 229.21599447727203, + "p95": 238.49600553512573, + "p99": 255.67999482154846 + }, + "combine": { + "p50": 214.7199958562851, + "p90": 231.90400004386902, + "p95": 236.86400055885315, + "p99": 255.64798712730408 + }, + "roundtrip": { + "p50": 399.4239866733551, + "p90": 420.48001289367676, + "p95": 429.6000003814697, + "p99": 593.7280058860779 + }, + "isolatedSum": { + "p50": 420.1280027627945, + "p90": 461.11999452114105, + "p95": 475.3600060939789, + "p99": 511.32798194885254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 331.743985414505, + "p90": 350.23999214172363, + "p95": 361.08800768852234, + "p99": 406.0479998588562 + }, + "combine": { + "p50": 360.54399609565735, + "p90": 375.90399384498596, + "p95": 382.78400897979736, + "p99": 404.4159948825836 + }, + "roundtrip": { + "p50": 664.0639901161194, + "p90": 679.9039840698242, + "p95": 693.5679912567139, + "p99": 743.1359887123108 + }, + "isolatedSum": { + "p50": 692.2879815101624, + "p90": 726.1439859867096, + "p95": 743.8720166683197, + "p99": 810.4639947414398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 580.5439949035645, + "p90": 593.3759808540344, + "p95": 602.3679971694946, + "p99": 637.6640200614929 + }, + "combine": { + "p50": 628.3519864082336, + "p90": 641.1839723587036, + "p95": 648.3839750289917, + "p99": 680.9279918670654 + }, + "roundtrip": { + "p50": 1181.7920207977295, + "p90": 1199.295997619629, + "p95": 1210.3359699249268, + "p99": 1255.4240226745605 + }, + "isolatedSum": { + "p50": 1208.895981311798, + "p90": 1234.559953212738, + "p95": 1250.7519721984863, + "p99": 1318.5920119285583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c14326f0", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "colorKey": "h200_189562cd", + "comparisonKey": "b9475bb176588857", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:32.803411+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "38fd0bcf7109c32", + "workloadId": "set:3:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271862413", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271862413", + "createdAt": "2026-06-26T23:56:00Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.11999905109406, + "p90": 140.00000059604645, + "p95": 149.88799393177032, + "p99": 171.23199999332428 + }, + "combine": { + "p50": 118.65600198507309, + "p90": 132.64000415802002, + "p95": 137.60000467300415, + "p99": 164.95999693870544 + }, + "roundtrip": { + "p50": 221.5680032968521, + "p90": 238.14399540424347, + "p95": 251.71199440956116, + "p99": 291.6480004787445 + }, + "isolatedSum": { + "p50": 243.77600103616714, + "p90": 272.64000475406647, + "p95": 287.4879986047745, + "p99": 336.1919969320297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 245.02399563789368, + "p90": 257.31199979782104, + "p95": 265.8880054950714, + "p99": 298.72000217437744 + }, + "combine": { + "p50": 263.68001103401184, + "p90": 275.32801032066345, + "p95": 281.9199860095978, + "p99": 299.1679906845093 + }, + "roundtrip": { + "p50": 481.9839894771576, + "p90": 495.6800043582916, + "p95": 506.1759948730469, + "p99": 808.3199858665466 + }, + "isolatedSum": { + "p50": 508.7040066719055, + "p90": 532.6400101184845, + "p95": 547.8079915046692, + "p99": 597.8879928588867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 725.1200079917908, + "p90": 748.0959892272949, + "p95": 763.0079984664917, + "p99": 812.0959997177124 + }, + "combine": { + "p50": 799.3280291557312, + "p90": 813.9839768409729, + "p95": 823.5200047492981, + "p99": 875.6160140037537 + }, + "roundtrip": { + "p50": 1494.3679571151733, + "p90": 1516.1919593811035, + "p95": 1528.2560586929321, + "p99": 1709.8560333251953 + }, + "isolatedSum": { + "p50": 1524.448037147522, + "p90": 1562.0799660682678, + "p95": 1586.5280032157898, + "p99": 1687.712013721466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17171887", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_189562cd", + "comparisonKey": "b9475bb176588857", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:28.346517+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": "set:6:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272106904", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272106904", + "createdAt": "2026-06-27T00:03:47Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.41600114107132, + "p90": 159.9999964237213, + "p95": 167.39200055599213, + "p99": 211.42399311065674 + }, + "combine": { + "p50": 118.01599711179733, + "p90": 146.5280055999756, + "p95": 150.27199685573578, + "p99": 162.9759967327118 + }, + "roundtrip": { + "p50": 220.2560007572174, + "p90": 253.91998887062073, + "p95": 258.432000875473, + "p99": 271.42399549484253 + }, + "isolatedSum": { + "p50": 242.43199825286865, + "p90": 306.5280020236969, + "p95": 317.6639974117279, + "p99": 374.39998984336853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.432000041008, + "p90": 170.6240028142929, + "p95": 175.04000663757324, + "p99": 188.38399648666382 + }, + "combine": { + "p50": 165.0879979133606, + "p90": 175.7120043039322, + "p95": 179.83999848365784, + "p99": 191.77600741386414 + }, + "roundtrip": { + "p50": 301.66399478912354, + "p90": 317.3759877681732, + "p95": 322.6880133152008, + "p99": 333.69600772857666 + }, + "isolatedSum": { + "p50": 327.5199979543686, + "p90": 346.3360071182251, + "p95": 354.8800051212311, + "p99": 380.16000390052795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.97600173950195, + "p90": 247.0400035381317, + "p95": 250.0160038471222, + "p99": 259.39199328422546 + }, + "combine": { + "p50": 261.9200050830841, + "p90": 275.2000093460083, + "p95": 279.58399057388306, + "p99": 300.4480004310608 + }, + "roundtrip": { + "p50": 482.33601450920105, + "p90": 499.1999864578247, + "p95": 507.3919892311096, + "p99": 570.527970790863 + }, + "isolatedSum": { + "p50": 500.89600682258606, + "p90": 522.24001288414, + "p95": 529.5999944210052, + "p99": 559.8399937152863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 392.8639888763428, + "p90": 402.72000432014465, + "p95": 406.23998641967773, + "p99": 445.3760087490082 + }, + "combine": { + "p50": 443.1680142879486, + "p90": 455.80801367759705, + "p95": 461.5040123462677, + "p99": 481.53600096702576 + }, + "roundtrip": { + "p50": 817.5680041313171, + "p90": 835.2320194244385, + "p95": 845.3760147094727, + "p99": 893.887996673584 + }, + "isolatedSum": { + "p50": 836.0320031642914, + "p90": 858.5280179977417, + "p95": 867.7439987659454, + "p99": 926.9120097160339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 718.783974647522, + "p90": 730.3680181503296, + "p95": 737.280011177063, + "p99": 808.1920146942139 + }, + "combine": { + "p50": 797.4399924278259, + "p90": 810.8800053596497, + "p95": 820.032000541687, + "p99": 849.3760228157043 + }, + "roundtrip": { + "p50": 1490.3680086135864, + "p90": 1507.5839757919312, + "p95": 1519.2960500717163, + "p99": 1630.944013595581 + }, + "isolatedSum": { + "p50": 1516.223967075348, + "p90": 1541.2480235099792, + "p95": 1557.31201171875, + "p99": 1657.5680375099182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1386.0160112380981, + "p90": 1401.0560512542725, + "p95": 1406.1440229415894, + "p99": 1621.7279434204102 + }, + "combine": { + "p50": 1483.199954032898, + "p90": 1497.5999593734741, + "p95": 1502.17604637146, + "p99": 1538.0480289459229 + }, + "roundtrip": { + "p50": 2845.855951309204, + "p90": 2863.840103149414, + "p95": 2879.647970199585, + "p99": 3068.063974380493 + }, + "isolatedSum": { + "p50": 2869.215965270996, + "p90": 2898.6560106277466, + "p95": 2908.3200693130493, + "p99": 3159.775972366333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f354b9c6", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_80a72891", + "comparisonKey": "52b3ac7f405659bf", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:25.966329+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": "set:6:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272110404", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272110404", + "createdAt": "2026-06-27T00:03:54Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.24799871444702, + "p90": 134.17600095272064, + "p95": 140.25600254535675, + "p99": 158.84800255298615 + }, + "combine": { + "p50": 107.68000036478043, + "p90": 119.39200013875961, + "p95": 123.99999797344208, + "p99": 129.82399761676788 + }, + "roundtrip": { + "p50": 196.60800695419312, + "p90": 215.16799926757812, + "p95": 223.07200729846954, + "p99": 271.232008934021 + }, + "isolatedSum": { + "p50": 224.92799907922745, + "p90": 253.56800109148026, + "p95": 264.2560005187988, + "p99": 288.672000169754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.9520012140274, + "p90": 165.69599509239197, + "p95": 176.35199427604675, + "p99": 214.49600160121918 + }, + "combine": { + "p50": 143.61600577831268, + "p90": 153.28000485897064, + "p95": 157.3439985513687, + "p99": 169.91999745368958 + }, + "roundtrip": { + "p50": 263.7439966201782, + "p90": 279.1680097579956, + "p95": 287.07200288772583, + "p99": 316.0960078239441 + }, + "isolatedSum": { + "p50": 289.5680069923401, + "p90": 318.9759999513626, + "p95": 333.69599282741547, + "p99": 384.41599905490875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.96000349521637, + "p90": 222.81600534915924, + "p95": 232.1919947862625, + "p99": 259.552001953125 + }, + "combine": { + "p50": 222.4320024251938, + "p90": 239.51999843120575, + "p95": 245.2480047941208, + "p99": 269.3760097026825 + }, + "roundtrip": { + "p50": 400.83199739456177, + "p90": 421.7279851436615, + "p95": 431.3279986381531, + "p99": 482.14399814605713 + }, + "isolatedSum": { + "p50": 427.39200592041016, + "p90": 462.336003780365, + "p95": 477.4399995803833, + "p99": 528.9280116558075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.76001238822937, + "p90": 342.24000573158264, + "p95": 361.11998558044434, + "p99": 480.3520143032074 + }, + "combine": { + "p50": 359.20000076293945, + "p90": 373.79199266433716, + "p95": 381.9519877433777, + "p99": 407.77599811553955 + }, + "roundtrip": { + "p50": 644.2880034446716, + "p90": 664.1600131988525, + "p95": 676.4799952507019, + "p99": 748.8639950752258 + }, + "isolatedSum": { + "p50": 672.9600131511688, + "p90": 716.0319983959198, + "p95": 743.071973323822, + "p99": 888.128012418747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 538.3679866790771, + "p90": 557.1839809417725, + "p95": 566.0160183906555, + "p99": 608.0639958381653 + }, + "combine": { + "p50": 618.9759969711304, + "p90": 630.3359866142273, + "p95": 636.2559795379639, + "p99": 653.5680294036865 + }, + "roundtrip": { + "p50": 1131.2960386276245, + "p90": 1151.263952255249, + "p95": 1159.0080261230469, + "p99": 1297.9520559310913 + }, + "isolatedSum": { + "p50": 1157.3439836502075, + "p90": 1187.5199675559998, + "p95": 1202.2719979286194, + "p99": 1261.6320252418518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.2240214347839, + "p90": 1003.5840272903442, + "p95": 1015.2319669723511, + "p99": 1056.480050086975 + }, + "combine": { + "p50": 1093.9840078353882, + "p90": 1107.9679727554321, + "p95": 1119.9040412902832, + "p99": 1297.055959701538 + }, + "roundtrip": { + "p50": 2046.5600490570068, + "p90": 2070.3680515289307, + "p95": 2092.5118923187256, + "p99": 2573.024034500122 + }, + "isolatedSum": { + "p50": 2078.208029270172, + "p90": 2111.5520000457764, + "p95": 2135.1360082626343, + "p99": 2353.536009788513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db979d37", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_580d7b05", + "comparisonKey": "b1de1efab41abbdf", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:37.856020+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272024348", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272024348", + "createdAt": "2026-06-27T00:01:10Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.87200313806534, + "p90": 125.88800489902496, + "p95": 131.71200454235077, + "p99": 142.46399700641632 + }, + "combine": { + "p50": 103.96800190210342, + "p90": 115.48800021409988, + "p95": 122.68800288438797, + "p99": 204.3199986219406 + }, + "roundtrip": { + "p50": 195.5839991569519, + "p90": 206.65599405765533, + "p95": 212.25599944591522, + "p99": 236.03199422359467 + }, + "isolatedSum": { + "p50": 219.84000504016876, + "p90": 241.37600511312485, + "p95": 254.40000742673874, + "p99": 346.78399562835693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.39200472831726, + "p90": 165.50399363040924, + "p95": 174.20800030231476, + "p99": 197.11999595165253 + }, + "combine": { + "p50": 146.7839926481247, + "p90": 158.55999290943146, + "p95": 162.9440039396286, + "p99": 175.20000040531158 + }, + "roundtrip": { + "p50": 266.7520046234131, + "p90": 286.24001145362854, + "p95": 293.1840121746063, + "p99": 322.33598828315735 + }, + "isolatedSum": { + "p50": 294.17599737644196, + "p90": 324.0639865398407, + "p95": 337.15200424194336, + "p99": 372.3199963569641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.74399423599243, + "p90": 221.11999988555908, + "p95": 229.95199263095856, + "p99": 253.08799743652344 + }, + "combine": { + "p50": 222.52799570560455, + "p90": 234.72000658512115, + "p95": 238.24000358581543, + "p99": 259.3280076980591 + }, + "roundtrip": { + "p50": 398.17601442337036, + "p90": 415.74400663375854, + "p95": 422.04800248146057, + "p99": 459.26401019096375 + }, + "isolatedSum": { + "p50": 426.271989941597, + "p90": 455.84000647068024, + "p95": 468.191996216774, + "p99": 512.4160051345825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 310.36800146102905, + "p90": 327.93599367141724, + "p95": 333.72798562049866, + "p99": 371.8079924583435 + }, + "combine": { + "p50": 355.9679985046387, + "p90": 369.4719970226288, + "p95": 383.07198882102966, + "p99": 431.4880073070526 + }, + "roundtrip": { + "p50": 641.9199705123901, + "p90": 660.9920263290405, + "p95": 668.9280271530151, + "p99": 718.9760208129883 + }, + "isolatedSum": { + "p50": 666.3359999656677, + "p90": 697.407990694046, + "p95": 716.7999744415283, + "p99": 803.2959997653961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 537.1519923210144, + "p90": 553.5680055618286, + "p95": 562.6559853553772, + "p99": 586.9759917259216 + }, + "combine": { + "p50": 612.1600270271301, + "p90": 625.0240206718445, + "p95": 633.8880062103271, + "p99": 660.863995552063 + }, + "roundtrip": { + "p50": 1119.968056678772, + "p90": 1136.064052581787, + "p95": 1145.2480554580688, + "p99": 1263.4880542755127 + }, + "isolatedSum": { + "p50": 1149.3120193481445, + "p90": 1178.592026233673, + "p95": 1196.5439915657043, + "p99": 1247.8399872779846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1003.8080215454102, + "p90": 1027.008056640625, + "p95": 1034.432053565979, + "p99": 1060.1919889450073 + }, + "combine": { + "p50": 1111.0399961471558, + "p90": 1125.8879899978638, + "p95": 1135.3280544281006, + "p99": 1165.727972984314 + }, + "roundtrip": { + "p50": 2077.5039196014404, + "p90": 2101.6640663146973, + "p95": 2114.016056060791, + "p99": 2324.8000144958496 + }, + "isolatedSum": { + "p50": 2114.848017692566, + "p90": 2152.8960466384888, + "p95": 2169.7601079940796, + "p99": 2225.9199619293213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-59b7e35e", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", + "colorKey": "h200_b6aa6110", + "comparisonKey": "b89b8b0279afe699", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:59.891356+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4caecd33bedf786", + "workloadId": "set:3:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271848591", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271848591", + "createdAt": "2026-06-26T23:55:33Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.46400117874146, + "p90": 133.85599851608276, + "p95": 141.15199446678162, + "p99": 168.12799870967865 + }, + "combine": { + "p50": 112.5440001487732, + "p90": 125.791996717453, + "p95": 132.1599930524826, + "p99": 143.327996134758 + }, + "roundtrip": { + "p50": 215.7440036535263, + "p90": 240.03200232982635, + "p95": 247.13599681854248, + "p99": 281.5360128879547 + }, + "isolatedSum": { + "p50": 235.00800132751465, + "p90": 259.64799523353577, + "p95": 273.3119875192642, + "p99": 311.45599484443665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.0480021238327, + "p90": 249.439999461174, + "p95": 253.34399938583374, + "p99": 271.39198780059814 + }, + "combine": { + "p50": 259.3280076980591, + "p90": 273.6639976501465, + "p95": 278.1440019607544, + "p99": 748.5759854316711 + }, + "roundtrip": { + "p50": 472.7039933204651, + "p90": 492.76798963546753, + "p95": 497.5360035896301, + "p99": 524.8640179634094 + }, + "isolatedSum": { + "p50": 497.3760098218918, + "p90": 523.1039971113205, + "p95": 531.4880013465881, + "p99": 1019.9679732322693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 671.0079908370972, + "p90": 682.7840209007263, + "p95": 691.6159987449646, + "p99": 782.4000120162964 + }, + "combine": { + "p50": 788.0319952964783, + "p90": 803.0400276184082, + "p95": 810.4000091552734, + "p99": 879.2639970779419 + }, + "roundtrip": { + "p50": 1432.5439929962158, + "p90": 1457.2800397872925, + "p95": 1470.2719449996948, + "p99": 1641.3120031356812 + }, + "isolatedSum": { + "p50": 1459.0399861335754, + "p90": 1485.8240485191345, + "p95": 1502.016007900238, + "p99": 1661.6640090942383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-520b6c38", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_b6aa6110", + "comparisonKey": "b89b8b0279afe699", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:30.997265+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272049186", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272049186", + "createdAt": "2026-06-27T00:01:57Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.14399874210358, + "p90": 134.14399325847626, + "p95": 146.01600170135498, + "p99": 162.62400150299072 + }, + "combine": { + "p50": 112.92800307273865, + "p90": 121.11999839544296, + "p95": 126.68800354003906, + "p99": 141.50400459766388 + }, + "roundtrip": { + "p50": 214.30400013923645, + "p90": 228.28799486160278, + "p95": 232.57599771022797, + "p99": 247.48800694942474 + }, + "isolatedSum": { + "p50": 235.07200181484222, + "p90": 255.26399165391922, + "p95": 272.70400524139404, + "p99": 304.1280061006546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 170.43200135231018, + "p90": 187.71199882030487, + "p95": 196.0960030555725, + "p99": 223.00800681114197 + }, + "combine": { + "p50": 163.87200355529785, + "p90": 181.60000443458557, + "p95": 186.36800348758698, + "p99": 197.02400267124176 + }, + "roundtrip": { + "p50": 303.8400113582611, + "p90": 328.000009059906, + "p95": 333.0560028553009, + "p99": 366.2079870700836 + }, + "isolatedSum": { + "p50": 334.30400490760803, + "p90": 369.31200325489044, + "p95": 382.4640065431595, + "p99": 420.0320094823837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 235.83999276161194, + "p90": 244.6720004081726, + "p95": 248.86399507522583, + "p99": 265.4080092906952 + }, + "combine": { + "p50": 259.90399718284607, + "p90": 269.6639895439148, + "p95": 276.06400847435, + "p99": 299.0399897098541 + }, + "roundtrip": { + "p50": 473.471999168396, + "p90": 492.12801456451416, + "p95": 498.3679950237274, + "p99": 528.544008731842 + }, + "isolatedSum": { + "p50": 495.743989944458, + "p90": 514.3359899520874, + "p95": 524.9280035495758, + "p99": 564.4479990005493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 381.087988615036, + "p90": 397.47199416160583, + "p95": 404.35200929641724, + "p99": 493.4079945087433 + }, + "combine": { + "p50": 437.27999925613403, + "p90": 450.8799910545349, + "p95": 458.3039879798889, + "p99": 476.25601291656494 + }, + "roundtrip": { + "p50": 790.5600070953369, + "p90": 804.9920201301575, + "p95": 813.9200210571289, + "p99": 841.5359854698181 + }, + "isolatedSum": { + "p50": 818.36798787117, + "p90": 848.3519852161407, + "p95": 862.6559972763062, + "p99": 969.6640074253082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 672.1280217170715, + "p90": 685.0879788398743, + "p95": 689.9200081825256, + "p99": 743.4560060501099 + }, + "combine": { + "p50": 783.1360101699829, + "p90": 793.0560111999512, + "p95": 796.6399788856506, + "p99": 806.5599799156189 + }, + "roundtrip": { + "p50": 1425.7919788360596, + "p90": 1442.0160055160522, + "p95": 1455.4879665374756, + "p99": 1550.75204372406 + }, + "isolatedSum": { + "p50": 1455.2640318870544, + "p90": 1478.1439900398254, + "p95": 1486.5599870681763, + "p99": 1550.0159859657288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1269.1839933395386, + "p90": 1284.1919660568237, + "p95": 1291.8720245361328, + "p99": 1339.2640352249146 + }, + "combine": { + "p50": 1472.8000164031982, + "p90": 1489.8879528045654, + "p95": 1502.17604637146, + "p99": 1692.639946937561 + }, + "roundtrip": { + "p50": 2711.7760181427, + "p90": 2730.015993118286, + "p95": 2753.5040378570557, + "p99": 2926.464080810547 + }, + "isolatedSum": { + "p50": 2741.984009742737, + "p90": 2774.079918861389, + "p95": 2794.048070907593, + "p99": 3031.9039821624756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5907eae", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", + "colorKey": "h200_c5b3365a", + "comparisonKey": "d19848fb38a35ed8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:20.998823+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3dd868cb33839a3", + "workloadId": "set:3:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271855852", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271855852", + "createdAt": "2026-06-26T23:55:47Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.72800236940384, + "p90": 127.48800218105316, + "p95": 133.91999900341034, + "p99": 146.11199498176575 + }, + "combine": { + "p50": 107.29599744081497, + "p90": 117.3119992017746, + "p95": 122.43200093507767, + "p99": 134.11200046539307 + }, + "roundtrip": { + "p50": 205.85599541664124, + "p90": 220.09600698947906, + "p95": 228.5120040178299, + "p99": 244.09599602222443 + }, + "isolatedSum": { + "p50": 225.0239998102188, + "p90": 244.80000138282776, + "p95": 256.351999938488, + "p99": 280.2239954471588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.89600014686584, + "p90": 245.12000381946564, + "p95": 253.4399926662445, + "p99": 292.03200340270996 + }, + "combine": { + "p50": 245.34399807453156, + "p90": 260.25599241256714, + "p95": 269.27998661994934, + "p99": 297.37600684165955 + }, + "roundtrip": { + "p50": 454.68801259994507, + "p90": 472.6080000400543, + "p95": 486.6560101509094, + "p99": 522.4639773368835 + }, + "isolatedSum": { + "p50": 478.2399982213974, + "p90": 505.3759962320328, + "p95": 522.7199792861938, + "p99": 589.4080102443695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 662.335991859436, + "p90": 673.632025718689, + "p95": 681.2160015106201, + "p99": 744.5759773254395 + }, + "combine": { + "p50": 772.5759744644165, + "p90": 791.8720245361328, + "p95": 806.6239953041077, + "p99": 855.2640080451965 + }, + "roundtrip": { + "p50": 1405.9840440750122, + "p90": 1435.2960586547852, + "p95": 1455.7119607925415, + "p99": 1716.3519859313965 + }, + "isolatedSum": { + "p50": 1434.9119663238525, + "p90": 1465.5040502548218, + "p95": 1487.8399968147278, + "p99": 1599.839985370636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-75dcaec2", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_c5b3365a", + "comparisonKey": "d19848fb38a35ed8", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:55.820445+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272093905", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272093905", + "createdAt": "2026-06-27T00:03:20Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.14399808645248, + "p90": 130.17599284648895, + "p95": 135.5839967727661, + "p99": 147.07200229167938 + }, + "combine": { + "p50": 108.83200168609619, + "p90": 120.57600170373917, + "p95": 127.55200266838074, + "p99": 140.73599874973297 + }, + "roundtrip": { + "p50": 206.65599405765533, + "p90": 219.04000639915466, + "p95": 224.48000311851501, + "p99": 242.0479953289032 + }, + "isolatedSum": { + "p50": 226.97599977254868, + "p90": 250.75199455022812, + "p95": 263.13599944114685, + "p99": 287.80800104141235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.45600652694702, + "p90": 165.95199704170227, + "p95": 173.92000555992126, + "p99": 202.39999890327454 + }, + "combine": { + "p50": 150.94399452209473, + "p90": 162.59199380874634, + "p95": 170.3680008649826, + "p99": 186.24000251293182 + }, + "roundtrip": { + "p50": 287.6800000667572, + "p90": 302.94400453567505, + "p95": 309.7279965877533, + "p99": 357.7919900417328 + }, + "isolatedSum": { + "p50": 306.40000104904175, + "p90": 328.5439908504486, + "p95": 344.28800642490387, + "p99": 388.64000141620636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 230.9119999408722, + "p90": 242.11199581623077, + "p95": 249.66399371623993, + "p99": 269.8880136013031 + }, + "combine": { + "p50": 247.16800451278687, + "p90": 260.5760097503662, + "p95": 264.6400034427643, + "p99": 289.66400027275085 + }, + "roundtrip": { + "p50": 456.86399936676025, + "p90": 473.28001260757446, + "p95": 481.1519980430603, + "p99": 534.8799824714661 + }, + "isolatedSum": { + "p50": 478.08000445365906, + "p90": 502.688005566597, + "p95": 514.3039971590042, + "p99": 559.552013874054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.2719888687134, + "p90": 386.6559863090515, + "p95": 397.5679874420166, + "p99": 506.0480237007141 + }, + "combine": { + "p50": 423.1039881706238, + "p90": 436.0319972038269, + "p95": 440.8319890499115, + "p99": 470.97599506378174 + }, + "roundtrip": { + "p50": 771.232008934021, + "p90": 783.9679718017578, + "p95": 795.5520153045654, + "p99": 828.4800052642822 + }, + "isolatedSum": { + "p50": 797.3759770393372, + "p90": 822.6879835128784, + "p95": 838.3999764919281, + "p99": 977.0240187644958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 661.8559956550598, + "p90": 673.9199757575989, + "p95": 679.6159744262695, + "p99": 697.5039839744568 + }, + "combine": { + "p50": 770.6559896469116, + "p90": 781.1520099639893, + "p95": 786.7839932441711, + "p99": 830.560028553009 + }, + "roundtrip": { + "p50": 1405.791997909546, + "p90": 1421.280026435852, + "p95": 1432.2559833526611, + "p99": 1481.6319942474365 + }, + "isolatedSum": { + "p50": 1432.5119853019714, + "p90": 1455.0719857215881, + "p95": 1466.3999676704407, + "p99": 1528.0640125274658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1246.2400197982788, + "p90": 1261.631965637207, + "p95": 1269.5679664611816, + "p99": 1482.5600385665894 + }, + "combine": { + "p50": 1440.384030342102, + "p90": 1459.455966949463, + "p95": 1471.519947052002, + "p99": 1634.0479850769043 + }, + "roundtrip": { + "p50": 2662.400007247925, + "p90": 2688.096046447754, + "p95": 2712.4478816986084, + "p99": 2846.719980239868 + }, + "isolatedSum": { + "p50": 2686.624050140381, + "p90": 2721.08793258667, + "p95": 2741.0879135131836, + "p99": 3116.6080236434937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9bcc6cfd", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_06aa1194", + "comparisonKey": "fe01776775c5fb5e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:23.968491+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272097307", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272097307", + "createdAt": "2026-06-27T00:03:27Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.79200285673141, + "p90": 122.94399738311768, + "p95": 127.96799838542938, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 104.38399761915207, + "p90": 111.35999858379364, + "p95": 117.79200285673141, + "p99": 128.63999605178833 + }, + "roundtrip": { + "p50": 197.82400131225586, + "p90": 205.85599541664124, + "p95": 212.351992726326, + "p99": 252.86400318145752 + }, + "isolatedSum": { + "p50": 222.17600047588348, + "p90": 234.30399596691132, + "p95": 245.7600012421608, + "p99": 266.975998878479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.60000598430634, + "p90": 154.14400398731232, + "p95": 158.39999914169312, + "p99": 173.63199591636658 + }, + "combine": { + "p50": 145.6959992647171, + "p90": 150.56000649929047, + "p95": 155.2640050649643, + "p99": 165.56799411773682 + }, + "roundtrip": { + "p50": 267.520010471344, + "p90": 276.99199318885803, + "p95": 283.03998708724976, + "p99": 307.3599934577942 + }, + "isolatedSum": { + "p50": 291.29600524902344, + "p90": 304.7040104866028, + "p95": 313.6640042066574, + "p99": 339.1999900341034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.79999482631683, + "p90": 213.85599672794342, + "p95": 218.27200055122375, + "p99": 238.52799832820892 + }, + "combine": { + "p50": 219.4879949092865, + "p90": 226.9439995288849, + "p95": 233.66400599479675, + "p99": 274.944007396698 + }, + "roundtrip": { + "p50": 400.160014629364, + "p90": 409.7279906272888, + "p95": 419.16799545288086, + "p99": 445.6320106983185 + }, + "isolatedSum": { + "p50": 424.28798973560333, + "p90": 440.7999962568283, + "p95": 451.9360065460205, + "p99": 513.4720057249069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 317.7599906921387, + "p90": 327.87200808525085, + "p95": 340.06398916244507, + "p99": 393.3440148830414 + }, + "combine": { + "p50": 356.1600148677826, + "p90": 364.6079897880554, + "p95": 369.82399225234985, + "p99": 396.8319892883301 + }, + "roundtrip": { + "p50": 649.6959924697876, + "p90": 660.3519916534424, + "p95": 664.7040247917175, + "p99": 683.4239959716797 + }, + "isolatedSum": { + "p50": 673.9200055599213, + "p90": 692.4799978733063, + "p95": 709.8879814147949, + "p99": 790.1760041713715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 541.1199927330017, + "p90": 549.8560070991516, + "p95": 555.4239749908447, + "p99": 643.6160206794739 + }, + "combine": { + "p50": 614.8800253868103, + "p90": 626.3039708137512, + "p95": 632.2240233421326, + "p99": 680.8639764785767 + }, + "roundtrip": { + "p50": 1131.7440271377563, + "p90": 1142.7839994430542, + "p95": 1148.192048072815, + "p99": 1196.768045425415 + }, + "isolatedSum": { + "p50": 1156.000018119812, + "p90": 1176.1599779129028, + "p95": 1187.6479983329773, + "p99": 1324.4799971580505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1027.9680490493774, + "p90": 1046.720027923584, + "p95": 1055.4239749908447, + "p99": 1100.000023841858 + }, + "combine": { + "p50": 1124.384045600891, + "p90": 1135.9679698944092, + "p95": 1140.8640146255493, + "p99": 1170.9760427474976 + }, + "roundtrip": { + "p50": 2114.5920753479004, + "p90": 2138.495922088623, + "p95": 2152.127981185913, + "p99": 2480.2560806274414 + }, + "isolatedSum": { + "p50": 2152.3520946502686, + "p90": 2182.687997817993, + "p95": 2196.287989616394, + "p99": 2270.9760665893555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e075077e", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_6a794fcd", + "comparisonKey": "b6c24dab2941895d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:10.125267+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": "set:6:a224603e5a1640b8", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272065129", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272065129", + "createdAt": "2026-06-27T00:02:24Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.52799487113953, + "p90": 140.3840035200119, + "p95": 146.17599546909332, + "p99": 177.08800733089447 + }, + "combine": { + "p50": 116.73600226640701, + "p90": 128.86400520801544, + "p95": 133.63200426101685, + "p99": 143.8719928264618 + }, + "roundtrip": { + "p50": 216.35200083255768, + "p90": 234.3360036611557, + "p95": 240.25599658489227, + "p99": 277.3120105266571 + }, + "isolatedSum": { + "p50": 243.26399713754654, + "p90": 269.24800872802734, + "p95": 279.80799973011017, + "p99": 320.96000015735626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.96799683570862, + "p90": 176.256000995636, + "p95": 180.4479956626892, + "p99": 201.50400698184967 + }, + "combine": { + "p50": 160.41600704193115, + "p90": 173.0560064315796, + "p95": 178.3680021762848, + "p99": 186.75200641155243 + }, + "roundtrip": { + "p50": 298.94399642944336, + "p90": 319.487988948822, + "p95": 328.0960023403168, + "p99": 354.65601086616516 + }, + "isolatedSum": { + "p50": 324.38400387763977, + "p90": 349.3120074272156, + "p95": 358.815997838974, + "p99": 388.2560133934021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.72799968719482, + "p90": 252.48000025749207, + "p95": 263.8719975948334, + "p99": 307.16800689697266 + }, + "combine": { + "p50": 262.1760070323944, + "p90": 279.1999876499176, + "p95": 284.7999930381775, + "p99": 311.8399977684021 + }, + "roundtrip": { + "p50": 477.82400250434875, + "p90": 500.70399045944214, + "p95": 516.5759921073914, + "p99": 701.632022857666 + }, + "isolatedSum": { + "p50": 499.90400671958923, + "p90": 531.6799879074097, + "p95": 548.6719906330109, + "p99": 619.0080046653748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 378.495991230011, + "p90": 390.04799723625183, + "p95": 399.58399534225464, + "p99": 429.6320080757141 + }, + "combine": { + "p50": 439.9360120296478, + "p90": 452.2880017757416, + "p95": 457.15200901031494, + "p99": 474.047988653183 + }, + "roundtrip": { + "p50": 797.4079847335815, + "p90": 816.32000207901, + "p95": 828.6399841308594, + "p99": 955.839991569519 + }, + "isolatedSum": { + "p50": 818.4320032596588, + "p90": 842.3359990119934, + "p95": 856.7360043525696, + "p99": 903.6799967288971 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 679.4559955596924, + "p90": 694.208025932312, + "p95": 704.255998134613, + "p99": 742.8159713745117 + }, + "combine": { + "p50": 780.7040214538574, + "p90": 795.1679825782776, + "p95": 804.7360181808472, + "p99": 879.7439932823181 + }, + "roundtrip": { + "p50": 1432.0640563964844, + "p90": 1453.279972076416, + "p95": 1465.8559560775757, + "p99": 1602.3039817810059 + }, + "isolatedSum": { + "p50": 1460.1600170135498, + "p90": 1489.3760085105896, + "p95": 1508.9920163154602, + "p99": 1622.5599646568298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1287.071943283081, + "p90": 1304.8959970474243, + "p95": 1310.7839822769165, + "p99": 1432.2240352630615 + }, + "combine": { + "p50": 1463.6160135269165, + "p90": 1483.8080406188965, + "p95": 1511.7119550704956, + "p99": 1699.0400552749634 + }, + "roundtrip": { + "p50": 2723.9038944244385, + "p90": 2744.607925415039, + "p95": 2758.2719326019287, + "p99": 2967.616081237793 + }, + "isolatedSum": { + "p50": 2750.6879568099976, + "p90": 2788.704037666321, + "p95": 2822.495937347412, + "p99": 3131.264090538025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4768a96", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_b2ffaf91", + "comparisonKey": "d826aaa5f1321f31", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:16.163335+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": "set:6:a224603e5a1640b8", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272068834", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272068834", + "createdAt": "2026-06-27T00:02:31Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.07999759912491, + "p90": 128.63999605178833, + "p95": 134.62400436401367, + "p99": 156.2879979610443 + }, + "combine": { + "p50": 105.47199845314026, + "p90": 114.43199962377548, + "p95": 119.19999867677689, + "p99": 136.09600067138672 + }, + "roundtrip": { + "p50": 197.24799692630768, + "p90": 206.01600408554077, + "p95": 211.0079973936081, + "p99": 226.01599991321564 + }, + "isolatedSum": { + "p50": 223.55199605226517, + "p90": 243.0719956755638, + "p95": 253.82400304079056, + "p99": 292.38399863243103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.08799529075623, + "p90": 157.27999806404114, + "p95": 161.56800091266632, + "p99": 172.83199727535248 + }, + "combine": { + "p50": 143.77599954605103, + "p90": 148.99200201034546, + "p95": 152.12799608707428, + "p99": 163.68000209331512 + }, + "roundtrip": { + "p50": 265.28000831604004, + "p90": 273.50398898124695, + "p95": 279.35999631881714, + "p99": 293.37599873542786 + }, + "isolatedSum": { + "p50": 292.86399483680725, + "p90": 306.2720000743866, + "p95": 313.6959969997406, + "p99": 336.5119993686676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.2080055475235, + "p90": 216.99200570583344, + "p95": 223.03999960422516, + "p99": 264.44798707962036 + }, + "combine": { + "p50": 225.40800273418427, + "p90": 233.37599635124207, + "p95": 238.65599930286407, + "p99": 253.56799364089966 + }, + "roundtrip": { + "p50": 404.4800102710724, + "p90": 415.2959883213043, + "p95": 423.552006483078, + "p99": 451.9039988517761 + }, + "isolatedSum": { + "p50": 431.61600828170776, + "p90": 450.3680020570755, + "p95": 461.69599890708923, + "p99": 518.01598072052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.27998638153076, + "p90": 324.8960077762604, + "p95": 334.7199857234955, + "p99": 349.2160141468048 + }, + "combine": { + "p50": 357.05599188804626, + "p90": 370.59199810028076, + "p95": 381.4080059528351, + "p99": 418.43199729919434 + }, + "roundtrip": { + "p50": 643.7439918518066, + "p90": 656.0959815979004, + "p95": 666.2399768829346, + "p99": 702.9759883880615 + }, + "isolatedSum": { + "p50": 670.335978269577, + "p90": 695.4880058765411, + "p95": 716.1279916763306, + "p99": 767.6480114459991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 530.1439762115479, + "p90": 539.5519733428955, + "p95": 543.008029460907, + "p99": 568.9600110054016 + }, + "combine": { + "p50": 611.5840077400208, + "p90": 622.048020362854, + "p95": 629.2799711227417, + "p99": 677.5040030479431 + }, + "roundtrip": { + "p50": 1115.488052368164, + "p90": 1129.248023033142, + "p95": 1135.583996772766, + "p99": 1275.6479978561401 + }, + "isolatedSum": { + "p50": 1141.7279839515686, + "p90": 1161.5999937057495, + "p95": 1172.2880005836487, + "p99": 1246.4640140533447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 986.1119985580444, + "p90": 1002.2720098495483, + "p95": 1011.0080242156982, + "p99": 1069.0239667892456 + }, + "combine": { + "p50": 1125.3440380096436, + "p90": 1136.6080045700073, + "p95": 1142.3360109329224, + "p99": 1163.8400554656982 + }, + "roundtrip": { + "p50": 2081.088066101074, + "p90": 2097.9840755462646, + "p95": 2111.0079288482666, + "p99": 2311.743974685669 + }, + "isolatedSum": { + "p50": 2111.456036567688, + "p90": 2138.8800144195557, + "p95": 2153.3440351486206, + "p99": 2232.864022254944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1ecd1d4", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_f2b19f62", + "comparisonKey": "a7c9c0202574b9d0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:45.749249+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:6709a02c31933a9f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272079152", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272079152", + "createdAt": "2026-06-27T00:02:51Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.94399738311768, + "p90": 134.20799374580383, + "p95": 138.87999951839447, + "p99": 150.87999403476715 + }, + "combine": { + "p50": 111.90400272607803, + "p90": 122.43200093507767, + "p95": 128.38399410247803, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 213.8880044221878, + "p90": 230.43200373649597, + "p95": 236.735999584198, + "p99": 261.4080011844635 + }, + "isolatedSum": { + "p50": 234.8480001091957, + "p90": 256.6399946808815, + "p95": 267.2639936208725, + "p99": 287.32798993587494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.72799468040466, + "p90": 174.20800030231476, + "p95": 182.49599635601044, + "p99": 194.72000002861023 + }, + "combine": { + "p50": 158.27199816703796, + "p90": 174.8799979686737, + "p95": 179.58399653434753, + "p99": 191.26400351524353 + }, + "roundtrip": { + "p50": 296.9920039176941, + "p90": 319.0079927444458, + "p95": 327.2320032119751, + "p99": 340.03201127052307 + }, + "isolatedSum": { + "p50": 319.9999928474426, + "p90": 349.08799827098846, + "p95": 362.07999289035797, + "p99": 385.98400354385376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.34399676322937, + "p90": 252.19199061393738, + "p95": 259.48798656463623, + "p99": 274.0800082683563 + }, + "combine": { + "p50": 260.44800877571106, + "p90": 278.2079875469208, + "p95": 284.7999930381775, + "p99": 298.880010843277 + }, + "roundtrip": { + "p50": 475.1040041446686, + "p90": 495.2319860458374, + "p95": 509.3119740486145, + "p99": 531.8080186843872 + }, + "isolatedSum": { + "p50": 497.79200553894043, + "p90": 530.3999781608582, + "p95": 544.2879796028137, + "p99": 572.9600191116333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.4879913330078, + "p90": 389.60000872612, + "p95": 395.6800103187561, + "p99": 409.92000699043274 + }, + "combine": { + "p50": 438.1760060787201, + "p90": 452.06400752067566, + "p95": 457.69599080085754, + "p99": 494.59201097488403 + }, + "roundtrip": { + "p50": 794.2079901695251, + "p90": 809.7919821739197, + "p95": 823.6799836158752, + "p99": 875.6160140037537 + }, + "isolatedSum": { + "p50": 817.6639974117279, + "p90": 841.6640162467957, + "p95": 853.3760011196136, + "p99": 904.5120179653168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 671.2319850921631, + "p90": 682.6879978179932, + "p95": 689.2480254173279, + "p99": 929.0879964828491 + }, + "combine": { + "p50": 786.7839932441711, + "p90": 799.1999983787537, + "p95": 804.2880296707153, + "p99": 833.6960077285767 + }, + "roundtrip": { + "p50": 1430.0800561904907, + "p90": 1449.9200582504272, + "p95": 1461.3120555877686, + "p99": 1667.8080558776855 + }, + "isolatedSum": { + "p50": 1458.0159783363342, + "p90": 1481.8879961967468, + "p95": 1493.5360550880432, + "p99": 1762.7840042114258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1269.2480087280273, + "p90": 1284.5439910888672, + "p95": 1292.9919958114624, + "p99": 1424.064040184021 + }, + "combine": { + "p50": 1480.6400537490845, + "p90": 1504.7039985656738, + "p95": 1519.10400390625, + "p99": 1724.0320444107056 + }, + "roundtrip": { + "p50": 2719.4879055023193, + "p90": 2740.70405960083, + "p95": 2764.8000717163086, + "p99": 3076.0960578918457 + }, + "isolatedSum": { + "p50": 2749.888062477112, + "p90": 2789.247989654541, + "p95": 2812.0959997177124, + "p99": 3148.0960845947266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f58892d6", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_bac4102c", + "comparisonKey": "402825358de599a6", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:49.601548+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:6709a02c31933a9f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272082600", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272082600", + "createdAt": "2026-06-27T00:02:58Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.72800236940384, + "p90": 136.25599443912506, + "p95": 140.8960074186325, + "p99": 185.34399569034576 + }, + "combine": { + "p50": 103.61599922180176, + "p90": 115.9679964184761, + "p95": 122.49600142240524, + "p99": 137.7599984407425 + }, + "roundtrip": { + "p50": 197.02400267124176, + "p90": 215.13600647449493, + "p95": 222.6240038871765, + "p99": 233.43999683856964 + }, + "isolatedSum": { + "p50": 221.3440015912056, + "p90": 252.22399085760117, + "p95": 263.39200884103775, + "p99": 323.10399413108826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.21600306034088, + "p90": 166.1120057106018, + "p95": 175.1679927110672, + "p99": 194.91200149059296 + }, + "combine": { + "p50": 144.22400295734406, + "p90": 156.2879979610443, + "p95": 161.18399798870087, + "p99": 171.90399765968323 + }, + "roundtrip": { + "p50": 262.87999749183655, + "p90": 277.5999903678894, + "p95": 286.3999903202057, + "p99": 298.97600412368774 + }, + "isolatedSum": { + "p50": 289.44000601768494, + "p90": 322.4000036716461, + "p95": 336.35199069976807, + "p99": 366.8159991502762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.3279985189438, + "p90": 218.36799383163452, + "p95": 226.1440008878708, + "p99": 242.8479939699173 + }, + "combine": { + "p50": 223.00800681114197, + "p90": 237.5359982252121, + "p95": 245.7599937915802, + "p99": 267.2959864139557 + }, + "roundtrip": { + "p50": 399.77601170539856, + "p90": 420.415997505188, + "p95": 433.1839978694916, + "p99": 505.40798902511597 + }, + "isolatedSum": { + "p50": 426.33600533008575, + "p90": 455.9039920568466, + "p95": 471.903994679451, + "p99": 510.143980383873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.1280074119568, + "p90": 327.7119994163513, + "p95": 334.879994392395, + "p99": 400.4479944705963 + }, + "combine": { + "p50": 352.7680039405823, + "p90": 362.527996301651, + "p95": 367.6159977912903, + "p99": 386.0799968242645 + }, + "roundtrip": { + "p50": 641.1839723587036, + "p90": 658.1119894981384, + "p95": 666.0159826278687, + "p99": 719.5199728012085 + }, + "isolatedSum": { + "p50": 664.8960113525391, + "p90": 690.2399957180023, + "p95": 702.4959921836853, + "p99": 786.5279912948608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 527.5200009346008, + "p90": 542.4320101737976, + "p95": 550.4639744758606, + "p99": 575.2959847450256 + }, + "combine": { + "p50": 620.3839778900146, + "p90": 633.5999965667725, + "p95": 639.2639875411987, + "p99": 673.8560199737549 + }, + "roundtrip": { + "p50": 1121.1520433425903, + "p90": 1137.0879411697388, + "p95": 1147.3599672317505, + "p99": 1174.7519969940186 + }, + "isolatedSum": { + "p50": 1147.9039788246155, + "p90": 1176.03200674057, + "p95": 1189.7279620170593, + "p99": 1249.1520047187805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1003.3919811248779, + "p90": 1031.5200090408325, + "p95": 1040.4160022735596, + "p99": 1070.2400207519531 + }, + "combine": { + "p50": 1121.9840049743652, + "p90": 1135.7760429382324, + "p95": 1145.0239419937134, + "p99": 1167.8400039672852 + }, + "roundtrip": { + "p50": 2083.0399990081787, + "p90": 2113.568067550659, + "p95": 2122.431993484497, + "p99": 2277.791976928711 + }, + "isolatedSum": { + "p50": 2125.375986099243, + "p90": 2167.296051979065, + "p95": 2185.439944267273, + "p99": 2238.0800247192383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8c2088d8", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_1eda221e", + "comparisonKey": "6ee0b18a3e276ae1", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:37.741116+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272052634", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272052634", + "createdAt": "2026-06-27T00:02:03Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.65600198507309, + "p90": 133.56800377368927, + "p95": 139.1039937734604, + "p99": 146.97599411010742 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 118.01599711179733, + "p95": 121.76000326871872, + "p99": 131.77600502967834 + }, + "roundtrip": { + "p50": 197.02400267124176, + "p90": 214.75200355052948, + "p95": 219.67999637126923, + "p99": 230.97600042819977 + }, + "isolatedSum": { + "p50": 222.97599911689758, + "p90": 251.5840008854866, + "p95": 260.8639970421791, + "p99": 278.75199913978577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.41600441932678, + "p90": 161.8880033493042, + "p95": 168.96000504493713, + "p99": 186.43200397491455 + }, + "combine": { + "p50": 143.19999516010284, + "p90": 153.08800339698792, + "p95": 157.4079990386963, + "p99": 164.60800170898438 + }, + "roundtrip": { + "p50": 262.87999749183655, + "p90": 275.32801032066345, + "p95": 282.4000120162964, + "p99": 291.00799560546875 + }, + "isolatedSum": { + "p50": 287.6159995794296, + "p90": 314.9760067462921, + "p95": 326.3680040836334, + "p99": 351.0400056838989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.0079960823059, + "p90": 220.768004655838, + "p95": 227.55199670791626, + "p99": 253.63200902938843 + }, + "combine": { + "p50": 219.4879949092865, + "p90": 227.52000391483307, + "p95": 231.23200237751007, + "p99": 248.79999458789825 + }, + "roundtrip": { + "p50": 397.0560133457184, + "p90": 409.5039963722229, + "p95": 413.4719967842102, + "p99": 425.82398653030396 + }, + "isolatedSum": { + "p50": 422.4959909915924, + "p90": 448.2880085706711, + "p95": 458.78399908542633, + "p99": 502.4320036172867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 311.5839958190918, + "p90": 334.52799916267395, + "p95": 339.2319977283478, + "p99": 353.88800501823425 + }, + "combine": { + "p50": 350.20801424980164, + "p90": 362.0480000972748, + "p95": 365.9839928150177, + "p99": 423.71198534965515 + }, + "roundtrip": { + "p50": 636.7999911308289, + "p90": 650.1439809799194, + "p95": 654.2080044746399, + "p99": 711.4560008049011 + }, + "isolatedSum": { + "p50": 661.7920100688934, + "p90": 696.5759992599487, + "p95": 705.2159905433655, + "p99": 777.5999903678894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 528.3839702606201, + "p90": 545.7599759101868, + "p95": 551.9999861717224, + "p99": 572.2879767417908 + }, + "combine": { + "p50": 608.959972858429, + "p90": 620.9920048713684, + "p95": 626.1119842529297, + "p99": 657.0559740066528 + }, + "roundtrip": { + "p50": 1110.2720499038696, + "p90": 1125.0239610671997, + "p95": 1132.032036781311, + "p99": 1183.0079555511475 + }, + "isolatedSum": { + "p50": 1137.343943119049, + "p90": 1166.7519807815552, + "p95": 1178.111970424652, + "p99": 1229.3439507484436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.4639801979065, + "p90": 1016.1600112915039, + "p95": 1023.9039659500122, + "p99": 1042.0479774475098 + }, + "combine": { + "p50": 1103.2960414886475, + "p90": 1116.2559986114502, + "p95": 1121.7600107192993, + "p99": 1139.4879817962646 + }, + "roundtrip": { + "p50": 2056.544065475464, + "p90": 2077.9199600219727, + "p95": 2088.671922683716, + "p99": 2251.3279914855957 + }, + "isolatedSum": { + "p50": 2097.760021686554, + "p90": 2132.416009902954, + "p95": 2145.6639766693115, + "p99": 2181.5359592437744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e568434", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_c851a534", + "comparisonKey": "1f9e00010b0d6e5b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:59.726916+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254392935", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", + "createdAt": "2026-06-26T17:28:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.44800007343292, + "p90": 126.97599828243256, + "p95": 137.92000710964203, + "p99": 159.96800363063812 + }, + "combine": { + "p50": 103.55199873447418, + "p90": 113.11999708414078, + "p95": 120.80000340938568, + "p99": 147.10399508476257 + }, + "roundtrip": { + "p50": 194.62400674819946, + "p90": 208.19200575351715, + "p95": 215.39199352264404, + "p99": 238.75199258327484 + }, + "isolatedSum": { + "p50": 219.9999988079071, + "p90": 240.09599536657333, + "p95": 258.7200105190277, + "p99": 307.0719987154007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.97599411010742, + "p90": 163.07200491428375, + "p95": 171.77599668502808, + "p99": 191.42399728298187 + }, + "combine": { + "p50": 142.84799993038177, + "p90": 154.78399395942688, + "p95": 165.12000560760498, + "p99": 172.28800058364868 + }, + "roundtrip": { + "p50": 267.0080065727234, + "p90": 288.9600098133087, + "p95": 295.77600955963135, + "p99": 315.71200489997864 + }, + "isolatedSum": { + "p50": 289.8239940404892, + "p90": 317.85599887371063, + "p95": 336.89600229263306, + "p99": 363.71199786663055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.68000185489655, + "p90": 228.64000499248505, + "p95": 236.92800104618073, + "p99": 267.90401339530945 + }, + "combine": { + "p50": 210.36800742149353, + "p90": 225.0239998102188, + "p95": 234.68799889087677, + "p99": 271.58400416374207 + }, + "roundtrip": { + "p50": 390.49598574638367, + "p90": 413.37600350379944, + "p95": 420.28799653053284, + "p99": 449.8240053653717 + }, + "isolatedSum": { + "p50": 418.0480092763901, + "p90": 453.66400480270386, + "p95": 471.6159999370575, + "p99": 539.4880175590515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 324.8960077762604, + "p90": 341.5679931640625, + "p95": 351.4559864997864, + "p99": 364.73599076271057 + }, + "combine": { + "p50": 328.0960023403168, + "p90": 339.6480083465576, + "p95": 345.95200419425964, + "p99": 362.8480136394501 + }, + "roundtrip": { + "p50": 628.9600133895874, + "p90": 643.231987953186, + "p95": 649.3120193481445, + "p99": 664.3199920654297 + }, + "isolatedSum": { + "p50": 652.9920101165771, + "p90": 681.2160015106201, + "p95": 697.407990694046, + "p99": 727.5840044021606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 566.3679838180542, + "p90": 581.0880064964294, + "p95": 587.2960090637207, + "p99": 609.1520190238953 + }, + "combine": { + "p50": 560.9920024871826, + "p90": 573.0559825897217, + "p95": 578.2399773597717, + "p99": 609.7279787063599 + }, + "roundtrip": { + "p50": 1097.3440408706665, + "p90": 1114.400029182434, + "p95": 1121.791958808899, + "p99": 1286.6239547729492 + }, + "isolatedSum": { + "p50": 1127.3599863052368, + "p90": 1154.1439890861511, + "p95": 1165.5359864234924, + "p99": 1218.8799977302551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1051.8079996109009, + "p90": 1067.8720474243164, + "p95": 1078.271985054016, + "p99": 1161.4079475402832 + }, + "combine": { + "p50": 1028.9920568466187, + "p90": 1044.0959930419922, + "p95": 1054.4320344924927, + "p99": 1218.783974647522 + }, + "roundtrip": { + "p50": 2049.3760108947754, + "p90": 2068.4800148010254, + "p95": 2079.200029373169, + "p99": 2593.600034713745 + }, + "isolatedSum": { + "p50": 2080.8000564575195, + "p90": 2111.9680404663086, + "p95": 2132.704019546509, + "p99": 2380.191922187805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6764a75f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", + "colorKey": "h200_a1e795ec", + "comparisonKey": "5a22622d9db14749", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:54.944678+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · balanced", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254443915", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", + "createdAt": "2026-06-26T17:29:22Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.88800621032715, + "p90": 147.16799557209015, + "p95": 159.5200002193451, + "p99": 177.76000499725342 + }, + "combine": { + "p50": 119.39200013875961, + "p90": 131.80799782276154, + "p95": 139.74399864673615, + "p99": 152.48000621795654 + }, + "roundtrip": { + "p50": 227.64800488948822, + "p90": 249.05599653720856, + "p95": 255.74401021003723, + "p99": 274.3679881095886 + }, + "isolatedSum": { + "p50": 253.28000634908676, + "p90": 278.9759933948517, + "p95": 299.26399886608124, + "p99": 330.24001121520996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 176.54399573802948, + "p90": 188.4160041809082, + "p95": 203.07199656963348, + "p99": 299.8400032520294 + }, + "combine": { + "p50": 169.91999745368958, + "p90": 175.48799514770508, + "p95": 180.16000092029572, + "p99": 187.51999735832214 + }, + "roundtrip": { + "p50": 319.4560110569, + "p90": 328.7679851055145, + "p95": 336.32001280784607, + "p99": 355.0400137901306 + }, + "isolatedSum": { + "p50": 346.46399319171906, + "p90": 363.9039993286133, + "p95": 383.2319974899292, + "p99": 487.36000061035156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 269.567996263504, + "p90": 288.12798857688904, + "p95": 294.048011302948, + "p99": 315.3280019760132 + }, + "combine": { + "p50": 262.0159983634949, + "p90": 282.1120023727417, + "p95": 286.5920066833496, + "p99": 306.11199140548706 + }, + "roundtrip": { + "p50": 505.7920217514038, + "p90": 531.9039821624756, + "p95": 535.7760190963745, + "p99": 544.6720123291016 + }, + "isolatedSum": { + "p50": 531.5839946269989, + "p90": 570.2399909496307, + "p95": 580.6400179862976, + "p99": 621.4399933815002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 443.87200474739075, + "p90": 459.55199003219604, + "p95": 467.74399280548096, + "p99": 487.199991941452 + }, + "combine": { + "p50": 427.64800786972046, + "p90": 442.81598925590515, + "p95": 451.58401131629944, + "p99": 483.13599824905396 + }, + "roundtrip": { + "p50": 844.7999954223633, + "p90": 860.0640296936035, + "p95": 867.0719861984253, + "p99": 924.67200756073 + }, + "isolatedSum": { + "p50": 871.5200126171112, + "p90": 902.3679792881012, + "p95": 919.3280041217804, + "p99": 970.335990190506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 806.1119914054871, + "p90": 823.7119913101196, + "p95": 832.4480056762695, + "p99": 892.3199772834778 + }, + "combine": { + "p50": 758.9120268821716, + "p90": 777.1199941635132, + "p95": 790.3040051460266, + "p99": 827.3919820785522 + }, + "roundtrip": { + "p50": 1534.5920324325562, + "p90": 1550.75204372406, + "p95": 1561.3759756088257, + "p99": 1597.9520082473755 + }, + "isolatedSum": { + "p50": 1565.0240182876587, + "p90": 1600.8319854736328, + "p95": 1622.7520108222961, + "p99": 1719.71195936203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1534.7520112991333, + "p90": 1552.4159669876099, + "p95": 1570.9120035171509, + "p99": 1686.7519617080688 + }, + "combine": { + "p50": 1415.2640104293823, + "p90": 1439.2000436782837, + "p95": 1449.120044708252, + "p99": 1643.1679725646973 + }, + "roundtrip": { + "p50": 2922.528028488159, + "p90": 2943.743944168091, + "p95": 2957.535982131958, + "p99": 3040.5759811401367 + }, + "isolatedSum": { + "p50": 2950.0160217285156, + "p90": 2991.6160106658936, + "p95": 3020.032048225403, + "p99": 3329.919934272766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e63750d6", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", + "colorKey": "h200_0a93a01f", + "comparisonKey": "f4911d0a95d49c62", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:03.582434+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254452252", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", + "createdAt": "2026-06-26T17:29:31Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.86399644613266, + "p90": 133.53599607944489, + "p95": 138.5280042886734, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 112.64000087976456, + "p90": 124.86399710178375, + "p95": 130.5599957704544, + "p99": 142.7839994430542 + }, + "roundtrip": { + "p50": 213.47199380397797, + "p90": 229.72799837589264, + "p95": 238.68800699710846, + "p99": 280.8000147342682 + }, + "isolatedSum": { + "p50": 233.50399732589722, + "p90": 258.39999318122864, + "p95": 269.0880000591278, + "p99": 296.80000245571136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.55200111865997, + "p90": 173.7920045852661, + "p95": 181.66400492191315, + "p99": 202.87999510765076 + }, + "combine": { + "p50": 156.54399991035461, + "p90": 170.9119975566864, + "p95": 178.20799350738525, + "p99": 194.62400674819946 + }, + "roundtrip": { + "p50": 297.1839904785156, + "p90": 314.65598940849304, + "p95": 321.02400064468384, + "p99": 352.28800773620605 + }, + "isolatedSum": { + "p50": 320.0960010290146, + "p90": 344.7040021419525, + "p95": 359.8719984292984, + "p99": 397.5040018558502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.56800591945648, + "p90": 246.72000110149384, + "p95": 252.44799256324768, + "p99": 262.2720003128052 + }, + "combine": { + "p50": 242.3039972782135, + "p90": 256.99201226234436, + "p95": 264.5759880542755, + "p99": 294.17601227760315 + }, + "roundtrip": { + "p50": 457.5679898262024, + "p90": 477.27999091148376, + "p95": 485.6959879398346, + "p99": 519.9679732322693 + }, + "isolatedSum": { + "p50": 479.87200319767, + "p90": 503.7120133638382, + "p95": 517.0239806175232, + "p99": 556.4480125904083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 384.0320110321045, + "p90": 394.9120044708252, + "p95": 400.2879858016968, + "p99": 411.77600622177124 + }, + "combine": { + "p50": 408.2239866256714, + "p90": 420.22401094436646, + "p95": 427.39200592041016, + "p99": 457.5679898262024 + }, + "roundtrip": { + "p50": 765.9199833869934, + "p90": 785.9519720077515, + "p95": 798.2079982757568, + "p99": 844.543993473053 + }, + "isolatedSum": { + "p50": 792.2559976577759, + "p90": 815.1360154151917, + "p95": 827.6799917221069, + "p99": 869.3439960479736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 669.6959733963013, + "p90": 682.3359727859497, + "p95": 689.0559792518616, + "p99": 731.8080067634583 + }, + "combine": { + "p50": 727.1360158920288, + "p90": 740.4800057411194, + "p95": 746.783971786499, + "p99": 762.8480195999146 + }, + "roundtrip": { + "p50": 1366.0800457000732, + "p90": 1389.631986618042, + "p95": 1405.6639671325684, + "p99": 1561.8239641189575 + }, + "isolatedSum": { + "p50": 1396.83198928833, + "p90": 1422.815978527069, + "p95": 1435.8399510383606, + "p99": 1494.6560263633728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1259.1999769210815, + "p90": 1273.1839418411255, + "p95": 1278.5600423812866, + "p99": 1390.463948249817 + }, + "combine": { + "p50": 1366.8160438537598, + "p90": 1383.2319974899292, + "p95": 1391.2960290908813, + "p99": 1428.5119771957397 + }, + "roundtrip": { + "p50": 2598.0799198150635, + "p90": 2617.0880794525146, + "p95": 2628.2238960266113, + "p99": 2879.9679279327393 + }, + "isolatedSum": { + "p50": 2626.0160207748413, + "p90": 2656.4159393310547, + "p95": 2669.856071472168, + "p99": 2818.9759254455566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-353049ec", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", + "colorKey": "h200_993777bf", + "comparisonKey": "cb74cc9ee6130bb2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:47:04.200207+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255303840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", + "createdAt": "2026-06-26T17:45:35Z", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.40799993276596, + "p90": 132.54399597644806, + "p95": 140.06400108337402, + "p99": 154.27200496196747 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 118.04799735546112, + "p95": 123.99999797344208, + "p99": 158.75199437141418 + }, + "roundtrip": { + "p50": 193.9840018749237, + "p90": 207.68000185489655, + "p95": 215.61600267887115, + "p99": 244.6720004081726 + }, + "isolatedSum": { + "p50": 221.72799706459045, + "p90": 250.59199333190918, + "p95": 264.0639990568161, + "p99": 313.02399933338165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.68799936771393, + "p90": 160.64000129699707, + "p95": 169.855996966362, + "p99": 192.06400215625763 + }, + "combine": { + "p50": 142.91200041770935, + "p90": 152.0320028066635, + "p95": 157.98400342464447, + "p99": 178.0479997396469 + }, + "roundtrip": { + "p50": 266.1440074443817, + "p90": 278.7199914455414, + "p95": 285.6000065803528, + "p99": 310.43198704719543 + }, + "isolatedSum": { + "p50": 289.5999997854233, + "p90": 312.6720041036606, + "p95": 327.84000039100647, + "p99": 370.11200189590454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 208.12800526618958, + "p90": 229.8559993505478, + "p95": 237.34399676322937, + "p99": 272.5760042667389 + }, + "combine": { + "p50": 210.62399446964264, + "p90": 222.75200486183167, + "p95": 228.99200022220612, + "p99": 251.45599246025085 + }, + "roundtrip": { + "p50": 391.4879858493805, + "p90": 413.05598616600037, + "p95": 424.54400658607483, + "p99": 474.047988653183 + }, + "isolatedSum": { + "p50": 418.7519997358322, + "p90": 452.60800421237946, + "p95": 466.3359969854355, + "p99": 524.0319967269897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 322.7840065956116, + "p90": 342.78398752212524, + "p95": 351.6800105571747, + "p99": 378.2399892807007 + }, + "combine": { + "p50": 330.1439881324768, + "p90": 345.0239896774292, + "p95": 349.8559892177582, + "p99": 379.13599610328674 + }, + "roundtrip": { + "p50": 626.2080073356628, + "p90": 646.8480229377747, + "p95": 661.1520051956177, + "p99": 823.4559893608093 + }, + "isolatedSum": { + "p50": 652.9279947280884, + "p90": 687.8079771995544, + "p95": 701.5359997749329, + "p99": 757.3759853839874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 569.7280168533325, + "p90": 585.7920050621033, + "p95": 596.2240099906921, + "p99": 690.7520294189453 + }, + "combine": { + "p50": 569.1199898719788, + "p90": 583.1040143966675, + "p95": 591.0400152206421, + "p99": 609.503984451294 + }, + "roundtrip": { + "p50": 1109.8560094833374, + "p90": 1127.8719902038574, + "p95": 1138.335943222046, + "p99": 1191.648006439209 + }, + "isolatedSum": { + "p50": 1138.8480067253113, + "p90": 1168.8960194587708, + "p95": 1187.2640252113342, + "p99": 1300.2560138702393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1082.5920104980469, + "p90": 1103.16801071167, + "p95": 1116.927981376648, + "p99": 1311.8400573730469 + }, + "combine": { + "p50": 1018.3039903640747, + "p90": 1032.4480533599854, + "p95": 1047.5200414657593, + "p99": 1417.472004890442 + }, + "roundtrip": { + "p50": 2072.60799407959, + "p90": 2096.7679023742676, + "p95": 2112.7359867095947, + "p99": 2388.000011444092 + }, + "isolatedSum": { + "p50": 2100.8960008621216, + "p90": 2135.6160640716553, + "p95": 2164.448022842407, + "p99": 2729.3120622634888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5c3f9114", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_edd92e38", + "comparisonKey": "696a49bd5b0de953", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:13.181201+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254409438", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", + "createdAt": "2026-06-26T17:28:41Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.032002389431, + "p90": 116.12799763679504, + "p95": 120.83200365304947, + "p99": 131.00799918174744 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 115.167997777462, + "p95": 120.95999717712402, + "p99": 125.76000392436981 + }, + "roundtrip": { + "p50": 182.23999440670013, + "p90": 196.48000597953796, + "p95": 200.095996260643, + "p99": 249.7600018978119 + }, + "isolatedSum": { + "p50": 207.10400491952896, + "p90": 231.29599541425705, + "p95": 241.7920008301735, + "p99": 256.76800310611725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.03200018405914, + "p90": 151.96800231933594, + "p95": 158.4639996290207, + "p99": 170.68800330162048 + }, + "combine": { + "p50": 142.59199798107147, + "p90": 157.53600001335144, + "p95": 161.18399798870087, + "p99": 179.6800047159195 + }, + "roundtrip": { + "p50": 252.8960108757019, + "p90": 265.28000831604004, + "p95": 271.232008934021, + "p99": 293.4400141239166 + }, + "isolatedSum": { + "p50": 278.6239981651306, + "p90": 309.5040023326874, + "p95": 319.64799761772156, + "p99": 350.36800801754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.87999379634857, + "p90": 210.33599972724915, + "p95": 215.87200462818146, + "p99": 243.9039945602417 + }, + "combine": { + "p50": 208.064004778862, + "p90": 222.04799950122833, + "p95": 230.14399409294128, + "p99": 255.42399287223816 + }, + "roundtrip": { + "p50": 378.84798645973206, + "p90": 394.9120044708252, + "p95": 405.5039882659912, + "p99": 434.27199125289917 + }, + "isolatedSum": { + "p50": 402.94399857521057, + "p90": 432.3839992284775, + "p95": 446.01599872112274, + "p99": 499.32798743247986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.99200654029846, + "p90": 334.1119885444641, + "p95": 342.9119884967804, + "p99": 389.15199041366577 + }, + "combine": { + "p50": 326.1120021343231, + "p90": 339.35999870300293, + "p95": 347.3280072212219, + "p99": 393.0560052394867 + }, + "roundtrip": { + "p50": 614.0159964561462, + "p90": 628.4800171852112, + "p95": 635.7759833335876, + "p99": 708.4479928016663 + }, + "isolatedSum": { + "p50": 639.1040086746216, + "p90": 673.471987247467, + "p95": 690.2399957180023, + "p99": 782.2079956531525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 549.3760108947754, + "p90": 563.264012336731, + "p95": 569.2480206489563, + "p99": 593.1519865989685 + }, + "combine": { + "p50": 560.8000159263611, + "p90": 573.2799768447876, + "p95": 579.8400044441223, + "p99": 591.871976852417 + }, + "roundtrip": { + "p50": 1080.9600353240967, + "p90": 1097.5359678268433, + "p95": 1106.0800552368164, + "p99": 1136.512041091919 + }, + "isolatedSum": { + "p50": 1110.1760268211365, + "p90": 1136.5439891815186, + "p95": 1149.0880250930786, + "p99": 1185.0239634513855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1017.7919864654541, + "p90": 1032.1600437164307, + "p95": 1039.6480560302734, + "p99": 1061.1519813537598 + }, + "combine": { + "p50": 1013.0879878997803, + "p90": 1025.823950767517, + "p95": 1031.775951385498, + "p99": 1097.7599620819092 + }, + "roundtrip": { + "p50": 2001.5358924865723, + "p90": 2015.7439708709717, + "p95": 2029.7598838806152, + "p99": 2119.1039085388184 + }, + "isolatedSum": { + "p50": 2030.8799743652344, + "p90": 2057.9839944839478, + "p95": 2071.4240074157715, + "p99": 2158.911943435669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1047fdc", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_76bb7d5d", + "comparisonKey": "174936235ac15d2c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:44.261568+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271611947", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271611947", + "createdAt": "2026-06-26T23:48:13Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.3199971318245, + "p90": 121.50400131940842, + "p95": 125.50400197505951, + "p99": 141.76000654697418 + }, + "combine": { + "p50": 104.032002389431, + "p90": 119.71200257539749, + "p95": 123.96799772977829, + "p99": 145.4080045223236 + }, + "roundtrip": { + "p50": 184.4799965620041, + "p90": 197.24799692630768, + "p95": 202.11200416088104, + "p99": 221.91999852657318 + }, + "isolatedSum": { + "p50": 208.3519995212555, + "p90": 241.2160038948059, + "p95": 249.4719997048378, + "p99": 287.1680110692978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.49600338935852, + "p90": 149.59999918937683, + "p95": 156.63999319076538, + "p99": 199.0080028772354 + }, + "combine": { + "p50": 143.71199905872345, + "p90": 156.51200711727142, + "p95": 161.6639941930771, + "p99": 174.14399981498718 + }, + "roundtrip": { + "p50": 254.88001108169556, + "p90": 277.50399708747864, + "p95": 284.09600257873535, + "p99": 315.20000100135803 + }, + "isolatedSum": { + "p50": 278.20800244808197, + "p90": 306.11200630664825, + "p95": 318.30398738384247, + "p99": 373.1520026922226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 192.89599359035492, + "p90": 207.39200711250305, + "p95": 213.53599429130554, + "p99": 229.8240065574646 + }, + "combine": { + "p50": 222.88000583648682, + "p90": 239.77600038051605, + "p95": 244.06400322914124, + "p99": 276.16000175476074 + }, + "roundtrip": { + "p50": 388.51198554039, + "p90": 405.08800745010376, + "p95": 412.6400053501129, + "p99": 470.43201327323914 + }, + "isolatedSum": { + "p50": 415.77599942684174, + "p90": 447.1680074930191, + "p95": 457.5999975204468, + "p99": 505.98400831222534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.32000756263733, + "p90": 328.2560110092163, + "p95": 334.6239924430847, + "p99": 354.8159897327423 + }, + "combine": { + "p50": 352.35199332237244, + "p90": 364.1279935836792, + "p95": 372.44799733161926, + "p99": 391.80800318717957 + }, + "roundtrip": { + "p50": 630.1760077476501, + "p90": 646.7840075492859, + "p95": 655.135989189148, + "p99": 679.5520186424255 + }, + "isolatedSum": { + "p50": 656.6720008850098, + "p90": 692.3840045928955, + "p95": 707.071989774704, + "p99": 746.6239929199219 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 515.2000188827515, + "p90": 530.0800204277039, + "p95": 538.9119982719421, + "p99": 611.7119789123535 + }, + "combine": { + "p50": 611.2319827079773, + "p90": 623.5520243644714, + "p95": 633.2160234451294, + "p99": 764.1919851303101 + }, + "roundtrip": { + "p50": 1099.4880199432373, + "p90": 1118.4959411621094, + "p95": 1131.1999559402466, + "p99": 1154.2079448699951 + }, + "isolatedSum": { + "p50": 1126.4320015907288, + "p90": 1153.6320447921753, + "p95": 1172.1280217170715, + "p99": 1375.9039640426636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 964.959979057312, + "p90": 992.2879934310913, + "p95": 1000.3199577331543, + "p99": 1034.4959497451782 + }, + "combine": { + "p50": 1105.7920455932617, + "p90": 1125.1840591430664, + "p95": 1137.5679969787598, + "p99": 1247.26402759552 + }, + "roundtrip": { + "p50": 2036.895990371704, + "p90": 2068.3839321136475, + "p95": 2084.383964538574, + "p99": 2168.4799194335938 + }, + "isolatedSum": { + "p50": 2070.7520246505737, + "p90": 2117.4720525741577, + "p95": 2137.887954711914, + "p99": 2281.7599773406982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26de8d70", + "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_87683f6c", + "comparisonKey": "b7adcc489d58bf89", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:37.273038+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271739849", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271739849", + "createdAt": "2026-06-26T23:52:08Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 233.2800030708313, + "p90": 296.25600576400757, + "p95": 315.45600295066833, + "p99": 387.84000277519226 + }, + "combine": { + "p50": 74.72000271081924, + "p90": 92.96000003814697, + "p95": 97.98400104045868, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 278.9759933948517, + "p90": 337.44001388549805, + "p95": 363.5840117931366, + "p99": 408.9600145816803 + }, + "isolatedSum": { + "p50": 308.00000578165054, + "p90": 389.21600580215454, + "p95": 413.440003991127, + "p99": 512.703999876976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 240.28800427913666, + "p90": 292.03200340270996, + "p95": 306.97599053382874, + "p99": 329.5679986476898 + }, + "combine": { + "p50": 98.30400347709656, + "p90": 115.07199704647064, + "p95": 119.00799721479416, + "p99": 131.9359987974167 + }, + "roundtrip": { + "p50": 325.408011674881, + "p90": 376.67199969291687, + "p95": 392.8639888763428, + "p99": 439.520001411438 + }, + "isolatedSum": { + "p50": 338.5920077562332, + "p90": 407.1040004491806, + "p95": 425.9839877486229, + "p99": 461.5039974451065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 315.45600295066833, + "p90": 357.08799958229065, + "p95": 369.9199855327606, + "p99": 407.039999961853 + }, + "combine": { + "p50": 147.45600521564484, + "p90": 164.67200219631195, + "p95": 168.16000640392303, + "p99": 182.52800405025482 + }, + "roundtrip": { + "p50": 460.4479968547821, + "p90": 508.575975894928, + "p95": 523.360013961792, + "p99": 576.0959982872009 + }, + "isolatedSum": { + "p50": 462.91200816631317, + "p90": 521.7600017786026, + "p95": 538.0799919366837, + "p99": 589.5680040121078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 458.2720100879669, + "p90": 501.5680193901062, + "p95": 517.632007598877, + "p99": 562.1119737625122 + }, + "combine": { + "p50": 241.2160038948059, + "p90": 252.06398963928223, + "p95": 257.34400749206543, + "p99": 279.83999252319336 + }, + "roundtrip": { + "p50": 681.9199919700623, + "p90": 713.4079933166504, + "p95": 728.8320064544678, + "p99": 805.8239817619324 + }, + "isolatedSum": { + "p50": 699.4880139827728, + "p90": 753.6320090293884, + "p95": 774.9760150909424, + "p99": 841.9519662857056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 734.112024307251, + "p90": 769.8879837989807, + "p95": 783.7439775466919, + "p99": 899.9680280685425 + }, + "combine": { + "p50": 410.17600893974304, + "p90": 422.4640130996704, + "p95": 427.64800786972046, + "p99": 457.72799849510193 + }, + "roundtrip": { + "p50": 1137.4399662017822, + "p90": 1176.416039466858, + "p95": 1203.328013420105, + "p99": 1318.8159465789795 + }, + "isolatedSum": { + "p50": 1144.288033246994, + "p90": 1192.3519968986511, + "p95": 1211.3919854164124, + "p99": 1357.6960265636444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1373.792052268982, + "p90": 1396.7679738998413, + "p95": 1406.9440364837646, + "p99": 1577.5359869003296 + }, + "combine": { + "p50": 750.3679990768433, + "p90": 762.6879811286926, + "p95": 770.3359723091125, + "p99": 788.0319952964783 + }, + "roundtrip": { + "p50": 2134.335994720459, + "p90": 2161.439895629883, + "p95": 2178.2400608062744, + "p99": 2561.3439083099365 + }, + "isolatedSum": { + "p50": 2124.160051345825, + "p90": 2159.455955028534, + "p95": 2177.280008792877, + "p99": 2365.567982196808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e0e49b4", + "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_87683f6c", + "comparisonKey": "dcdf4b262ed1d48f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:08.323229+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271755854", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271755854", + "createdAt": "2026-06-26T23:52:36Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 219.84000504016876, + "p90": 274.01599287986755, + "p95": 289.5039916038513, + "p99": 343.77598762512207 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 91.90399944782257, + "p95": 99.55199807882309, + "p99": 105.79200088977814 + }, + "roundtrip": { + "p50": 288.57600688934326, + "p90": 340.2239978313446, + "p95": 353.95199060440063, + "p99": 388.0319893360138 + }, + "isolatedSum": { + "p50": 300.9280040860176, + "p90": 365.9199923276901, + "p95": 389.0559896826744, + "p99": 449.5679885149002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 254.62400913238525, + "p90": 299.74400997161865, + "p95": 313.2160007953644, + "p99": 335.6480002403259 + }, + "combine": { + "p50": 112.60800063610077, + "p90": 124.57600235939026, + "p95": 128.31999361515045, + "p99": 137.472003698349 + }, + "roundtrip": { + "p50": 357.88801312446594, + "p90": 402.78398990631104, + "p95": 418.7839925289154, + "p99": 468.3839976787567 + }, + "isolatedSum": { + "p50": 367.232009768486, + "p90": 424.3200123310089, + "p95": 441.53599441051483, + "p99": 473.1200039386749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 338.17601203918457, + "p90": 376.48001313209534, + "p95": 391.80800318717957, + "p99": 431.71200156211853 + }, + "combine": { + "p50": 170.43200135231018, + "p90": 182.8480064868927, + "p95": 187.77599930763245, + "p99": 198.46400618553162 + }, + "roundtrip": { + "p50": 509.5679759979248, + "p90": 558.2079887390137, + "p95": 577.6960253715515, + "p99": 617.7600026130676 + }, + "isolatedSum": { + "p50": 508.60801339149475, + "p90": 559.328019618988, + "p95": 579.584002494812, + "p99": 630.1760077476501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 505.0879716873169, + "p90": 540.7040119171143, + "p95": 552.6720285415649, + "p99": 595.1679944992065 + }, + "combine": { + "p50": 273.75999093055725, + "p90": 285.66399216651917, + "p95": 291.4240062236786, + "p99": 313.05599212646484 + }, + "roundtrip": { + "p50": 780.2879810333252, + "p90": 834.7839713096619, + "p95": 867.3920035362244, + "p99": 1058.9760541915894 + }, + "isolatedSum": { + "p50": 778.8479626178741, + "p90": 826.3680040836334, + "p95": 844.0960347652435, + "p99": 908.2239866256714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 859.1039776802063, + "p90": 874.3680119514465, + "p95": 884.447991847992, + "p99": 1000.8000135421753 + }, + "combine": { + "p50": 476.0960042476654, + "p90": 487.5839948654175, + "p95": 495.9680140018463, + "p99": 551.2639880180359 + }, + "roundtrip": { + "p50": 1315.2320384979248, + "p90": 1342.4960374832153, + "p95": 1364.9920225143433, + "p99": 1437.1839761734009 + }, + "isolatedSum": { + "p50": 1335.1999819278717, + "p90": 1361.952006816864, + "p95": 1380.4160058498383, + "p99": 1552.0640015602112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1618.3040142059326, + "p90": 1638.8479471206665, + "p95": 1650.3679752349854, + "p99": 1797.8880405426025 + }, + "combine": { + "p50": 871.5839982032776, + "p90": 885.4719996452332, + "p95": 893.7280178070068, + "p99": 936.1280202865601 + }, + "roundtrip": { + "p50": 2472.0640182495117, + "p90": 2496.8960285186768, + "p95": 2517.6639556884766, + "p99": 2775.1998901367188 + }, + "isolatedSum": { + "p50": 2489.88801240921, + "p90": 2524.3199467658997, + "p95": 2544.095993041992, + "p99": 2734.0160608291626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-92d6dac4", + "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_87683f6c", + "comparisonKey": "5878390fb0ef3ac0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:33.209811+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271771597", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271771597", + "createdAt": "2026-06-26T23:53:03Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 237.12000250816345, + "p90": 447.00801372528076, + "p95": 466.2080109119415, + "p99": 509.2800259590149 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 118.20799857378006, + "p95": 120.38400024175644, + "p99": 131.55199587345123 + }, + "roundtrip": { + "p50": 299.51998591423035, + "p90": 465.9839868545532, + "p95": 490.01601338386536, + "p99": 533.9199900627136 + }, + "isolatedSum": { + "p50": 326.7199993133545, + "p90": 565.2160122990608, + "p95": 586.592011153698, + "p99": 640.8320218324661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 272.96000719070435, + "p90": 312.19199299812317, + "p95": 322.7840065956116, + "p99": 376.6080141067505 + }, + "combine": { + "p50": 121.91999703645706, + "p90": 133.34399461746216, + "p95": 139.1039937734604, + "p99": 144.48000490665436 + }, + "roundtrip": { + "p50": 388.5760009288788, + "p90": 429.28001284599304, + "p95": 448.5439956188202, + "p99": 507.87198543548584 + }, + "isolatedSum": { + "p50": 394.8800042271614, + "p90": 445.5359876155853, + "p95": 461.88800036907196, + "p99": 521.0880190134048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 375.61601400375366, + "p90": 427.4879992008209, + "p95": 443.77601146698, + "p99": 500.4799962043762 + }, + "combine": { + "p50": 192.9599940776825, + "p90": 205.08800446987152, + "p95": 213.47199380397797, + "p99": 237.92000114917755 + }, + "roundtrip": { + "p50": 553.5680055618286, + "p90": 599.2000102996826, + "p95": 623.583972454071, + "p99": 716.1920070648193 + }, + "isolatedSum": { + "p50": 568.5760080814362, + "p90": 632.5760036706924, + "p95": 657.248005270958, + "p99": 738.3999973535538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 557.6000213623047, + "p90": 596.7360138893127, + "p95": 607.3920130729675, + "p99": 644.9599862098694 + }, + "combine": { + "p50": 306.335985660553, + "p90": 316.3520097732544, + "p95": 320.51199674606323, + "p99": 334.52799916267395 + }, + "roundtrip": { + "p50": 853.1839847564697, + "p90": 880.8959722518921, + "p95": 895.3920006752014, + "p99": 966.7840003967285 + }, + "isolatedSum": { + "p50": 863.9360070228577, + "p90": 913.0880236625671, + "p95": 927.9040098190308, + "p99": 979.4879853725433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 987.8720045089722, + "p90": 1001.9840002059937, + "p95": 1013.2479667663574, + "p99": 1395.5520391464233 + }, + "combine": { + "p50": 540.9280061721802, + "p90": 573.7280249595642, + "p95": 584.6400260925293, + "p99": 626.0480284690857 + }, + "roundtrip": { + "p50": 1523.6799716949463, + "p90": 1545.408010482788, + "p95": 1558.1120252609253, + "p99": 1704.2880058288574 + }, + "isolatedSum": { + "p50": 1528.8000106811523, + "p90": 1575.7120251655579, + "p95": 1597.8879928588867, + "p99": 2021.600067615509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1865.3759956359863, + "p90": 1883.2000494003296, + "p95": 1893.02396774292, + "p99": 1925.7279634475708 + }, + "combine": { + "p50": 981.823980808258, + "p90": 994.0800070762634, + "p95": 1002.7199983596802, + "p99": 1096.3200330734253 + }, + "roundtrip": { + "p50": 2907.2320461273193, + "p90": 2933.151960372925, + "p95": 2943.104028701782, + "p99": 3191.3599967956543 + }, + "isolatedSum": { + "p50": 2847.1999764442444, + "p90": 2877.280056476593, + "p95": 2895.7439661026, + "p99": 3022.047996520996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e6cb64c3", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_9979edfc", + "comparisonKey": "e1fcecbd9bd8ede3", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:14.800894+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271625900", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271625900", + "createdAt": "2026-06-26T23:48:41Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.74399757385254, + "p90": 110.55999994277954, + "p95": 120.25599926710129, + "p99": 134.36800241470337 + }, + "combine": { + "p50": 96.73599898815155, + "p90": 114.01599645614624, + "p95": 121.0239976644516, + "p99": 137.34400272369385 + }, + "roundtrip": { + "p50": 209.24800634384155, + "p90": 246.39999866485596, + "p95": 260.0319981575012, + "p99": 304.22401428222656 + }, + "isolatedSum": { + "p50": 184.4799965620041, + "p90": 224.57599639892578, + "p95": 241.2799969315529, + "p99": 271.7120051383972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 102.24000364542007, + "p90": 118.97599697113037, + "p95": 129.7920048236847, + "p99": 143.42400431632996 + }, + "combine": { + "p50": 136.86400651931763, + "p90": 150.43200552463531, + "p95": 156.3200056552887, + "p99": 173.92000555992126 + }, + "roundtrip": { + "p50": 310.88000535964966, + "p90": 332.73598551750183, + "p95": 338.78400921821594, + "p99": 370.11200189590454 + }, + "isolatedSum": { + "p50": 239.1040101647377, + "p90": 269.4080024957657, + "p95": 286.1120104789734, + "p99": 317.3440098762512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 138.14400136470795, + "p90": 162.59199380874634, + "p95": 168.47999393939972, + "p99": 190.3039962053299 + }, + "combine": { + "p50": 214.88000452518463, + "p90": 230.17600178718567, + "p95": 236.32000386714935, + "p99": 254.4960081577301 + }, + "roundtrip": { + "p50": 494.4959878921509, + "p90": 515.6800150871277, + "p95": 529.6000242233276, + "p99": 559.8719716072083 + }, + "isolatedSum": { + "p50": 353.0240058898926, + "p90": 392.767995595932, + "p95": 404.7999978065491, + "p99": 444.80000436306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 206.1759978532791, + "p90": 232.9919934272766, + "p95": 242.49599874019623, + "p99": 273.98398518562317 + }, + "combine": { + "p50": 348.63999485969543, + "p90": 365.7279908657074, + "p95": 373.1519877910614, + "p99": 415.2640104293823 + }, + "roundtrip": { + "p50": 835.8079791069031, + "p90": 859.5200181007385, + "p95": 871.6480135917664, + "p99": 925.055980682373 + }, + "isolatedSum": { + "p50": 554.8159927129745, + "p90": 598.719984292984, + "p95": 615.6479865312576, + "p99": 689.2479956150055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 338.8800024986267, + "p90": 359.6799969673157, + "p95": 372.0000088214874, + "p99": 397.5360095500946 + }, + "combine": { + "p50": 606.4640283584595, + "p90": 624.895989894867, + "p95": 636.7679834365845, + "p99": 693.5359835624695 + }, + "roundtrip": { + "p50": 1500, + "p90": 1528.9280414581299, + "p95": 1547.0080375671387, + "p99": 1667.6160097122192 + }, + "isolatedSum": { + "p50": 945.3440308570862, + "p90": 984.5759868621826, + "p95": 1008.7679922580719, + "p99": 1091.071993112564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 615.9039735794067, + "p90": 634.0479850769043, + "p95": 644.2239880561829, + "p99": 707.9039812088013 + }, + "combine": { + "p50": 1102.112054824829, + "p90": 1116.7999505996704, + "p95": 1128.767967224121, + "p99": 1167.2320365905762 + }, + "roundtrip": { + "p50": 2840.384006500244, + "p90": 2870.07999420166, + "p95": 2894.5279121398926, + "p99": 3452.9600143432617 + }, + "isolatedSum": { + "p50": 1718.0160284042358, + "p90": 1750.8479356765747, + "p95": 1772.991955280304, + "p99": 1875.1360177993774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4da6f6db", + "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_87683f6c", + "comparisonKey": "90a8a7fc3b314f23", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:44.259181+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271640687", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271640687", + "createdAt": "2026-06-26T23:49:09Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 239.3600046634674, + "p90": 286.52799129486084, + "p95": 313.79199028015137, + "p99": 391.2000060081482 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 110.59200018644333, + "p95": 116.67200177907944, + "p99": 134.783998131752 + }, + "roundtrip": { + "p50": 309.9519908428192, + "p90": 360.48001050949097, + "p95": 381.5680146217346, + "p99": 466.94400906562805 + }, + "isolatedSum": { + "p50": 336.5760073065758, + "p90": 397.11999148130417, + "p95": 430.4639920592308, + "p99": 525.9840041399002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 291.0720109939575, + "p90": 340.5759930610657, + "p95": 355.19999265670776, + "p99": 430.30399084091187 + }, + "combine": { + "p50": 137.7599984407425, + "p90": 154.30399775505066, + "p95": 160.41600704193115, + "p99": 182.3360025882721 + }, + "roundtrip": { + "p50": 415.8079922199249, + "p90": 464.0960097312927, + "p95": 484.5759868621826, + "p99": 556.8320155143738 + }, + "isolatedSum": { + "p50": 428.8320094347, + "p90": 494.87999081611633, + "p95": 515.6159996986389, + "p99": 612.639993429184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 408.28800201416016, + "p90": 486.4000082015991, + "p95": 495.7759976387024, + "p99": 554.3680191040039 + }, + "combine": { + "p50": 219.10400688648224, + "p90": 233.37599635124207, + "p95": 239.48800563812256, + "p99": 266.07999205589294 + }, + "roundtrip": { + "p50": 607.4560284614563, + "p90": 650.2400040626526, + "p95": 670.5920100212097, + "p99": 729.3760180473328 + }, + "isolatedSum": { + "p50": 627.3920089006424, + "p90": 719.7760045528412, + "p95": 735.264003276825, + "p99": 820.4480111598969 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 621.9840049743652, + "p90": 667.8720116615295, + "p95": 696.0639953613281, + "p99": 765.0880217552185 + }, + "combine": { + "p50": 346.8480110168457, + "p90": 362.08000779151917, + "p95": 368.47999691963196, + "p99": 384.89601016044617 + }, + "roundtrip": { + "p50": 955.2639722824097, + "p90": 1010.1120471954346, + "p95": 1039.4879579544067, + "p99": 1108.6399555206299 + }, + "isolatedSum": { + "p50": 968.8320159912109, + "p90": 1029.9520194530487, + "p95": 1064.54399228096, + "p99": 1149.9840319156647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1107.7439785003662, + "p90": 1126.9439458847046, + "p95": 1137.887954711914, + "p99": 1176.8319606781006 + }, + "combine": { + "p50": 609.9200248718262, + "p90": 624.4159936904907, + "p95": 631.8399906158447, + "p99": 652.1919965744019 + }, + "roundtrip": { + "p50": 1692.2240257263184, + "p90": 1713.1520509719849, + "p95": 1732.5439453125, + "p99": 1810.7199668884277 + }, + "isolatedSum": { + "p50": 1717.6640033721924, + "p90": 1751.3599395751953, + "p95": 1769.7279453277588, + "p99": 1829.0239572525024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2100.4478931427, + "p90": 2129.312038421631, + "p95": 2148.47993850708, + "p99": 2358.464002609253 + }, + "combine": { + "p50": 1102.6560068130493, + "p90": 1120.0640201568604, + "p95": 1132.8959465026855, + "p99": 1158.560037612915 + }, + "roundtrip": { + "p50": 3193.376064300537, + "p90": 3219.615936279297, + "p95": 3229.9840450286865, + "p99": 3288.5758876800537 + }, + "isolatedSum": { + "p50": 3203.1038999557495, + "p90": 3249.376058578491, + "p95": 3281.3758850097656, + "p99": 3517.024040222168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2673258", + "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_87683f6c", + "comparisonKey": "ae4528707b5ffd7f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:16.316846+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271725115", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271725115", + "createdAt": "2026-06-26T23:51:41Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 221.27999365329742, + "p90": 242.20800399780273, + "p95": 255.3279995918274, + "p99": 294.94398832321167 + }, + "combine": { + "p50": 96.67199850082397, + "p90": 103.20000350475311, + "p95": 107.32799768447876, + "p99": 117.85600334405899 + }, + "roundtrip": { + "p50": 306.8479895591736, + "p90": 331.07200264930725, + "p95": 352.31998562812805, + "p99": 409.05600786209106 + }, + "isolatedSum": { + "p50": 317.9519921541214, + "p90": 345.40800750255585, + "p95": 362.65599727630615, + "p99": 412.79999166727066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 282.04798698425293, + "p90": 307.3279857635498, + "p95": 327.2320032119751, + "p99": 442.68798828125 + }, + "combine": { + "p50": 138.87999951839447, + "p90": 145.05599439144135, + "p95": 152.73599326610565, + "p99": 170.01600563526154 + }, + "roundtrip": { + "p50": 410.46398878097534, + "p90": 435.39199233055115, + "p95": 465.6960070133209, + "p99": 525.2479910850525 + }, + "isolatedSum": { + "p50": 420.9279865026474, + "p90": 452.38398015499115, + "p95": 479.96799647808075, + "p99": 612.7039939165115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 390.9760117530823, + "p90": 407.8719913959503, + "p95": 414.3039882183075, + "p99": 448.2240080833435 + }, + "combine": { + "p50": 212.3199999332428, + "p90": 220.2560007572174, + "p95": 229.08799350261688, + "p99": 299.71200227737427 + }, + "roundtrip": { + "p50": 589.3120169639587, + "p90": 609.9839806556702, + "p95": 625.5040168762207, + "p99": 686.6880059242249 + }, + "isolatedSum": { + "p50": 603.2960116863251, + "p90": 628.1279921531677, + "p95": 643.3919817209244, + "p99": 747.9360103607178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 601.7919778823853, + "p90": 624.064028263092, + "p95": 640.0960087776184, + "p99": 705.2800059318542 + }, + "combine": { + "p50": 343.29599142074585, + "p90": 351.39200091362, + "p95": 357.02401399612427, + "p99": 386.01601123809814 + }, + "roundtrip": { + "p50": 930.400013923645, + "p90": 953.1520009040833, + "p95": 967.1040177345276, + "p99": 1069.5680379867554 + }, + "isolatedSum": { + "p50": 945.0879693031311, + "p90": 975.456029176712, + "p95": 997.1200227737427, + "p99": 1091.2960171699524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1100.0959873199463, + "p90": 1113.9520406723022, + "p95": 1130.784034729004, + "p99": 1221.2159633636475 + }, + "combine": { + "p50": 596.3199734687805, + "p90": 606.9440245628357, + "p95": 612.6400232315063, + "p99": 648.5120058059692 + }, + "roundtrip": { + "p50": 1675.5199432373047, + "p90": 1687.999963760376, + "p95": 1695.3599452972412, + "p99": 2014.2719745635986 + }, + "isolatedSum": { + "p50": 1696.4159607887268, + "p90": 1720.896065235138, + "p95": 1743.4240579605103, + "p99": 1869.7279691696167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2087.3920917510986, + "p90": 2099.519968032837, + "p95": 2110.6879711151123, + "p99": 2213.7598991394043 + }, + "combine": { + "p50": 1087.4559879302979, + "p90": 1099.4240045547485, + "p95": 1103.5200357437134, + "p99": 1151.8080234527588 + }, + "roundtrip": { + "p50": 3166.016101837158, + "p90": 3187.0079040527344, + "p95": 3196.5761184692383, + "p99": 3422.0480918884277 + }, + "isolatedSum": { + "p50": 3174.8480796813965, + "p90": 3198.9439725875854, + "p95": 3214.2080068588257, + "p99": 3365.567922592163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a82a4d9", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_3a17d46b", + "comparisonKey": "680e15fb3428bab0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:05.917629+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254401482", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", + "createdAt": "2026-06-26T17:28:31Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.81599795818329, + "p90": 108.2879975438118, + "p95": 115.26399850845337, + "p99": 141.79199934005737 + }, + "combine": { + "p50": 96.38399630784988, + "p90": 114.68800157308578, + "p95": 119.55200135707855, + "p99": 138.72000575065613 + }, + "roundtrip": { + "p50": 210.59200167655945, + "p90": 242.94400215148926, + "p95": 254.17599081993103, + "p99": 313.27998638153076 + }, + "isolatedSum": { + "p50": 183.19999426603317, + "p90": 222.97599911689758, + "p95": 234.81599986553192, + "p99": 280.5120050907135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.2319962978363, + "p90": 128.28800082206726, + "p95": 134.8160058259964, + "p99": 155.07200360298157 + }, + "combine": { + "p50": 133.66399705410004, + "p90": 149.79200065135956, + "p95": 157.21599757671356, + "p99": 173.37599396705627 + }, + "roundtrip": { + "p50": 304.22401428222656, + "p90": 332.41599798202515, + "p95": 337.92001008987427, + "p99": 353.2800078392029 + }, + "isolatedSum": { + "p50": 236.89599335193634, + "p90": 278.0800014734268, + "p95": 292.03200340270996, + "p99": 328.44799757003784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 135.77599823474884, + "p90": 162.30399906635284, + "p95": 169.95200514793396, + "p99": 237.98400163650513 + }, + "combine": { + "p50": 203.2960057258606, + "p90": 220.41599452495575, + "p95": 226.55999660491943, + "p99": 257.31199979782104 + }, + "roundtrip": { + "p50": 476.9600033760071, + "p90": 496.63999676704407, + "p95": 511.55197620391846, + "p99": 544.7999835014343 + }, + "isolatedSum": { + "p50": 339.07200396060944, + "p90": 382.7199935913086, + "p95": 396.5120017528534, + "p99": 495.2960014343262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 196.57599925994873, + "p90": 218.87999773025513, + "p95": 225.3119945526123, + "p99": 253.7280023097992 + }, + "combine": { + "p50": 320.607990026474, + "p90": 335.2319896221161, + "p95": 344.4800078868866, + "p99": 365.9519851207733 + }, + "roundtrip": { + "p50": 794.7199940681458, + "p90": 817.6959753036499, + "p95": 837.0879888534546, + "p99": 910.5280041694641 + }, + "isolatedSum": { + "p50": 517.1839892864227, + "p90": 554.1119873523712, + "p95": 569.7920024394989, + "p99": 619.6799874305725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 320.16000151634216, + "p90": 343.55199337005615, + "p95": 363.45601081848145, + "p99": 439.9999976158142 + }, + "combine": { + "p50": 554.8160076141357, + "p90": 569.7919726371765, + "p95": 577.6000022888184, + "p99": 639.3280029296875 + }, + "roundtrip": { + "p50": 1425.7279634475708, + "p90": 1448.3519792556763, + "p95": 1468.4480428695679, + "p99": 1752.8959512710571 + }, + "isolatedSum": { + "p50": 874.9760091304779, + "p90": 913.3439660072327, + "p95": 941.0560131072998, + "p99": 1079.3280005455017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 572.4160075187683, + "p90": 584.447979927063, + "p95": 591.6479825973511, + "p99": 629.6640038490295 + }, + "combine": { + "p50": 1012.6080513000488, + "p90": 1025.696039199829, + "p95": 1030.2400588989258, + "p99": 1060.1279735565186 + }, + "roundtrip": { + "p50": 2698.7199783325195, + "p90": 2725.055932998657, + "p95": 2745.215892791748, + "p99": 2952.064037322998 + }, + "isolatedSum": { + "p50": 1585.0240588188171, + "p90": 1610.144019126892, + "p95": 1621.8880414962769, + "p99": 1689.791977405548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da3555d5", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_50a9ee63", + "comparisonKey": "ee1a607167629f55", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:23.809590+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm) [cl]", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254418007", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", + "createdAt": "2026-06-26T17:28:51Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.69600236415863, + "p90": 84.63999629020691, + "p95": 90.08000046014786, + "p99": 106.6880002617836 + }, + "combine": { + "p50": 95.20000219345093, + "p90": 106.97600245475769, + "p95": 112.28799819946289, + "p99": 135.77599823474884 + }, + "roundtrip": { + "p50": 196.70400023460388, + "p90": 213.79199624061584, + "p95": 224.16000068187714, + "p99": 281.0240089893341 + }, + "isolatedSum": { + "p50": 168.89600455760956, + "p90": 191.6159987449646, + "p95": 202.36799865961075, + "p99": 242.46399849653244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 91.71199798583984, + "p90": 108.0000028014183, + "p95": 111.87200248241425, + "p99": 124.57600235939026 + }, + "combine": { + "p50": 132.7359974384308, + "p90": 146.2399959564209, + "p95": 151.8400013446808, + "p99": 165.56799411773682 + }, + "roundtrip": { + "p50": 291.456013917923, + "p90": 308.57598781585693, + "p95": 313.34400177001953, + "p99": 330.78399300575256 + }, + "isolatedSum": { + "p50": 224.44799542427063, + "p90": 254.2399987578392, + "p95": 263.71200382709503, + "p99": 290.1439964771271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 125.50400197505951, + "p90": 144.3520039319992, + "p95": 149.85600113868713, + "p99": 213.6639952659607 }, "combine": { - "p50": 91.36000275611877, - "p90": 91.36000275611877, - "p99": 102.01600193977356 + "p50": 203.10400426387787, + "p90": 215.64799547195435, + "p95": 220.47999501228333, + "p99": 236.92800104618073 }, - "serial": { - "p50": 177.08799988031387, - "p90": 177.08799988031387, - "p99": 200.03200322389603 + "roundtrip": { + "p50": 464.7040069103241, + "p90": 485.5999946594238, + "p95": 495.64799666404724, + "p99": 524.3520140647888 }, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "isolatedSum": { + "p50": 328.6080062389374, + "p90": 359.99999940395355, + "p95": 370.33599615097046, + "p99": 450.5919963121414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 93.40800344944, - "p90": 93.40800344944, - "p99": 103.45599800348282 + "p50": 184.1599941253662, + "p90": 198.94400238990784, + "p95": 204.352006316185, + "p99": 232.12799429893494 }, "combine": { - "p50": 115.03999680280685, - "p90": 115.03999680280685, - "p99": 126.91199779510498 + "p50": 318.39999556541443, + "p90": 328.96000146865845, + "p95": 333.15199613571167, + "p99": 352.7359962463379 }, - "serial": { - "p50": 208.44800025224686, - "p90": 208.44800025224686, - "p99": 230.3679957985878 + "roundtrip": { + "p50": 782.4640274047852, + "p90": 796.064019203186, + "p95": 802.4960160255432, + "p99": 826.4960050582886 }, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "isolatedSum": { + "p50": 502.55998969078064, + "p90": 527.9040038585663, + "p95": 537.5040024518967, + "p99": 584.8639905452728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 304.3519854545593, + "p90": 320.8320140838623, + "p95": 336.2559974193573, + "p99": 371.42398953437805 + }, + "combine": { + "p50": 550.4000186920166, + "p90": 560.2880120277405, + "p95": 567.7760243415833, + "p99": 656.8959951400757 + }, + "roundtrip": { + "p50": 1410.4959964752197, + "p90": 1427.456021308899, + "p95": 1436.4160299301147, + "p99": 1585.2479934692383 + }, + "isolatedSum": { + "p50": 854.7520041465759, + "p90": 881.1200261116028, + "p95": 904.0320217609406, + "p99": 1028.3199846744537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 542.8479909896851, + "p90": 557.5680136680603, + "p95": 565.5360221862793, + "p99": 587.7760052680969 + }, + "combine": { + "p50": 1013.5680437088013, + "p90": 1026.4320373535156, + "p95": 1031.999945640564, + "p99": 1048.192024230957 + }, + "roundtrip": { + "p50": 2668.4160232543945, + "p90": 2694.3039894104004, + "p95": 2716.320037841797, + "p99": 3019.615888595581 + }, + "isolatedSum": { + "p50": 1556.4160346984863, + "p90": 1584.000051021576, + "p95": 1597.5359678268433, + "p99": 1635.968029499054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-43f1fda706ca7bdd", - "identity": "b300|deepep|prefill|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", - "stitchKey": "b300|deepep|normal|tuned|standardized|comm-only-v1|b300-nvlink-island|8|8|bf16|uniform|7168|8|256", - "colorKey": "b300_b219a378", - "schemaVersion": 2, - "generatedAt": "2026-06-24T23:33:10.949608+00:00", + "id": "cx-4a1bc537", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_4f483b60", + "comparisonKey": "ac62097ce902c24f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:33.490755+00:00", "status": "valid", - "sku": "b300", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", - "topologyClass": "b300-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 · deepep · bf16 · EP8 · comm only", + "label": "H200 EP8 · deepep · fp8 [cl]", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, - "routingConsistent": null, - "traceSignature": null, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28135639401", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135639401", - "createdAt": "2026-06-24T23:12:52Z", - "sha": "4e217f93fda64a43d32a46f1e57325ff848148d8" + "id": "28271633476", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271633476", + "createdAt": "2026-06-26T23:48:55Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.7519987821579, - "p90": 94.7519987821579, - "p99": 267.13600754737854 + "p50": 75.71200281381607, + "p90": 95.29600292444229, + "p95": 102.11200267076492, + "p99": 128.83199751377106 }, "combine": { - "p50": 115.52000045776367, - "p90": 115.52000045776367, - "p99": 118.81600320339203 + "p50": 97.31200337409973, + "p90": 115.93600362539291, + "p95": 120.80000340938568, + "p99": 140.44800400733948 }, - "serial": { - "p50": 210.27199923992157, - "p90": 210.27199923992157, - "p99": 385.95201075077057 + "roundtrip": { + "p50": 200.8959949016571, + "p90": 248.28800559043884, + "p95": 261.24799251556396, + "p99": 302.5600016117096 }, - "dispatchLogicalBytes": 77672448, + "isolatedSum": { + "p50": 173.0240061879158, + "p90": 211.2320065498352, + "p95": 222.9120060801506, + "p99": 269.28000152111053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 136.3839954137802, - "p90": 136.3839954137802, - "p99": 144.54400539398193 + "p50": 91.61599725484848, + "p90": 110.33599823713303, + "p95": 116.35199934244156, + "p99": 134.17600095272064 }, "combine": { - "p50": 153.60000729560852, - "p90": 153.60000729560852, - "p99": 165.47200083732605 + "p50": 136.76799833774567, + "p90": 151.5199989080429, + "p95": 159.04000401496887, + "p99": 170.6240028142929 }, - "serial": { - "p50": 289.98400270938873, - "p90": 289.98400270938873, - "p99": 310.016006231308 + "roundtrip": { + "p50": 299.45600032806396, + "p90": 324.38400387763977, + "p95": 331.07200264930725, + "p99": 365.7279908657074 }, - "dispatchLogicalBytes": 155889664, + "isolatedSum": { + "p50": 228.38399559259415, + "p90": 261.85599714517593, + "p95": 275.39200335741043, + "p99": 304.80000376701355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 194.59199905395508, - "p90": 194.59199905395508, - "p99": 215.7440036535263 + "p50": 127.83999741077423, + "p90": 142.94399321079254, + "p95": 150.4960060119629, + "p99": 162.7199947834015 }, "combine": { - "p50": 273.50398898124695, - "p90": 273.50398898124695, - "p99": 280.19198775291443 + "p50": 214.62400257587433, + "p90": 226.78400576114655, + "p95": 231.51999711990356, + "p99": 242.14400351047516 }, - "serial": { - "p50": 468.095988035202, - "p90": 468.095988035202, - "p99": 495.93599140644073 + "roundtrip": { + "p50": 483.5200011730194, + "p90": 497.2800016403198, + "p95": 504.5120120048523, + "p99": 540.831983089447 }, - "dispatchLogicalBytes": 312266752, + "isolatedSum": { + "p50": 342.46399998664856, + "p90": 369.7279989719391, + "p95": 382.01600313186646, + "p99": 404.86399829387665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 325.3439962863922, - "p90": 325.3439962863922, - "p99": 330.7519853115082 + "p50": 194.75199282169342, + "p90": 214.88000452518463, + "p95": 220.2879935503006, + "p99": 243.74400079250336 }, "combine": { - "p50": 459.1679871082306, - "p90": 459.1679871082306, - "p99": 482.08001255989075 + "p50": 346.3360071182251, + "p90": 362.8160059452057, + "p95": 374.4960129261017, + "p99": 426.56001448631287 }, - "serial": { - "p50": 784.5119833946228, - "p90": 784.5119833946228, - "p99": 812.8319978713989 + "roundtrip": { + "p50": 824.5440125465393, + "p90": 852.5760173797607, + "p95": 862.2400164604187, + "p99": 896.6720104217529 }, - "dispatchLogicalBytes": 623443968, + "isolatedSum": { + "p50": 541.0879999399185, + "p90": 577.6960104703903, + "p95": 594.7840064764023, + "p99": 670.3040152788162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 575.2320289611816, - "p90": 575.2320289611816, - "p99": 587.8400206565857 + "p50": 325.0879943370819, + "p90": 342.52798557281494, + "p95": 348.9919900894165, + "p99": 374.9440014362335 }, "combine": { - "p50": 817.5039887428284, - "p90": 817.5039887428284, - "p99": 831.8079710006714 + "p50": 603.8720011711121, + "p90": 613.6959791183472, + "p95": 618.1120276451111, + "p99": 640.3520107269287 }, - "serial": { - "p50": 1392.73601770401, - "p90": 1392.73601770401, - "p99": 1419.647991657257 + "roundtrip": { + "p50": 1486.36794090271, + "p90": 1510.7519626617432, + "p95": 1524.1600275039673, + "p99": 1566.3679838180542 }, - "dispatchLogicalBytes": 1243805696, + "isolatedSum": { + "p50": 928.959995508194, + "p90": 956.2239646911621, + "p95": 967.1040177345276, + "p99": 1015.2960121631622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1068.3200359344482, - "p90": 1068.3200359344482, - "p99": 1088.1279706954956 + "p50": 586.624026298523, + "p90": 618.9759969711304, + "p95": 627.6800036430359, + "p99": 654.7200083732605 }, "combine": { - "p50": 1529.312014579773, - "p90": 1529.312014579773, - "p99": 1618.3359622955322 + "p50": 1108.8639497756958, + "p90": 1126.1119842529297, + "p95": 1134.2079639434814, + "p99": 1169.376015663147 }, - "serial": { - "p50": 2597.632050514221, - "p90": 2597.632050514221, - "p99": 2706.463932991028 + "roundtrip": { + "p50": 2817.1839714050293, + "p90": 2849.3120670318604, + "p95": 2871.0079193115234, + "p99": 3254.4960975646973 }, - "dispatchLogicalBytes": 2487009280, + "isolatedSum": { + "p50": 1695.4879760742188, + "p90": 1745.08798122406, + "p95": 1761.8879675865173, + "p99": 1824.0960240364075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-a26390df5f1e4196", - "identity": "gb200|deepep|decode|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", - "stitchKey": "gb200|deepep|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", - "colorKey": "gb200_21efa99d", - "schemaVersion": 1, - "generatedAt": "2026-06-24T03:45:16.336112+00:00", + "id": "cx-279043f8", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "5776ea979804ef91", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:08:32.534640+00:00", "status": "valid", - "sku": "gb200", - "backend": "deepep", + "publicationStatus": "official", + "runner": "mi355x-amds_05", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272169530", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272169530", + "createdAt": "2026-06-27T00:05:44Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.19999876618385, + "p90": 43.000999838113785, + "p95": 44.56000030040741, + "p99": 47.880999743938446 + }, + "combine": { + "p50": 17.760999500751495, + "p90": 19.360000267624855, + "p95": 20.959999412298203, + "p99": 23.080000653862953 + }, + "roundtrip": { + "p50": 56.04099854826927, + "p90": 59.00000035762787, + "p95": 60.201000422239304, + "p99": 62.24000081419945 + }, + "isolatedSum": { + "p50": 57.96099826693535, + "p90": 62.36100010573864, + "p95": 65.51999971270561, + "p99": 70.9610003978014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.64099895954132, + "p90": 45.52000015974045, + "p95": 47.07999899983406, + "p99": 49.76100102066994 + }, + "combine": { + "p50": 16.599999740719795, + "p90": 18.60000006854534, + "p95": 19.79999989271164, + "p99": 23.080000653862953 + }, + "roundtrip": { + "p50": 58.96100029349327, + "p90": 62.39999830722809, + "p95": 64.32099640369415, + "p99": 102.64100134372711 + }, + "isolatedSum": { + "p50": 59.240998700261116, + "p90": 64.12000022828579, + "p95": 66.8799988925457, + "p99": 72.84100167453289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.160000652074814, + "p90": 44.76099833846092, + "p95": 46.20100185275078, + "p99": 48.5600009560585 + }, + "combine": { + "p50": 19.759999588131905, + "p90": 21.27999998629093, + "p95": 22.5210003554821, + "p99": 25.200000032782555 + }, + "roundtrip": { + "p50": 62.001001089811325, + "p90": 65.32099843025208, + "p95": 66.16000086069107, + "p99": 69.15999948978424 + }, + "isolatedSum": { + "p50": 61.92000024020672, + "p90": 66.04099832475185, + "p95": 68.72200220823288, + "p99": 73.76000098884106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.399998754262924, + "p90": 45.35999894142151, + "p95": 47.15999960899353, + "p99": 49.52000081539154 + }, + "combine": { + "p50": 20.880000665783882, + "p90": 23.08100089430809, + "p95": 24.04000051319599, + "p99": 26.441000401973724 + }, + "roundtrip": { + "p50": 62.52100318670273, + "p90": 65.64100086688995, + "p95": 66.56000018119812, + "p99": 68.84100288152695 + }, + "isolatedSum": { + "p50": 63.279999420046806, + "p90": 68.4409998357296, + "p95": 71.20000012218952, + "p99": 75.96100121736526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.52000153064728, + "p90": 45.32000049948692, + "p95": 46.640001237392426, + "p99": 49.04000088572502 + }, + "combine": { + "p50": 25.599999353289604, + "p90": 27.799999341368675, + "p95": 29.239999130368233, + "p99": 31.520001590251923 + }, + "roundtrip": { + "p50": 67.63999909162521, + "p90": 70.60100138187408, + "p95": 71.68100029230118, + "p99": 74.36099648475647 + }, + "isolatedSum": { + "p50": 68.12000088393688, + "p90": 73.1199998408556, + "p95": 75.88000036776066, + "p99": 80.56000247597694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60c60832", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "3677ee6ace04ac65", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:53:59.155172+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_05", + "sku": "mi355x", + "backend": "mori", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "deepep-normal-v1", - "topologyClass": "gb200-nvl72-mnnvl", - "transport": "mnnvl", - "worldSize": 4, - "epSize": 4, - "label": "GB200 · deepep · bf16 · EP4", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "dispatchDtype": "bf16" + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "unknown", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069684997", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069684997", - "createdAt": "2026-06-24T01:52:08Z", - "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + "id": "28273516714", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714", + "createdAt": "2026-06-27T00:53:08Z", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" }, "rows": [ { "tokensPerRank": 1, - "globalTokens": 4, - "dispatch": { - "p50": 99.42399710416794, - "p90": 99.42399710416794, - "p99": 147.32800424098969 - }, - "combine": { - "p50": 72.73600250482559, - "p90": 72.73600250482559, - "p99": 18259.39178466797 - }, - "serial": { - "p50": 148.83199334144592, - "p90": 148.83199334144592, - "p99": 168.19199919700623 - }, - "dispatchLogicalBytes": 57344, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 4, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 2, "globalTokens": 8, "dispatch": { - "p50": 101.50399804115295, - "p90": 101.50399804115295, - "p99": 123.99999797344208 + "p50": 40.6000018119812, + "p90": 43.76000165939331, + "p95": 45.239999890327454, + "p99": 54.71999943256378 }, "combine": { - "p50": 79.68000322580338, - "p90": 79.68000322580338, - "p99": 6393.280029296875 + "p50": 17.920000478625298, + "p90": 19.039999693632126, + "p95": 20.999999716877937, + "p99": 22.87999913096428 }, - "serial": { - "p50": 149.63200688362122, - "p90": 149.63200688362122, - "p99": 176.41599476337433 + "roundtrip": { + "p50": 56.32000043988228, + "p90": 59.4400018453598, + "p95": 60.64099818468094, + "p99": 63.19999694824219 }, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 8, + "isolatedSum": { + "p50": 58.5200022906065, + "p90": 62.800001353025436, + "p95": 66.23999960720539, + "p99": 77.59999856352806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 4, + "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 98.78399968147278, - "p90": 98.78399968147278, - "p99": 118.56000125408173 + "p50": 42.64000058174133, + "p90": 45.35999894142151, + "p95": 46.76000028848648, + "p99": 50.23999884724617 }, "combine": { - "p50": 76.28799974918365, - "p90": 76.28799974918365, - "p99": 92.12800115346909 + "p50": 16.759999096393585, + "p90": 18.68000067770481, + "p95": 19.801000133156776, + "p99": 22.08000048995018 }, - "serial": { - "p50": 152.38399803638458, - "p90": 152.38399803638458, - "p99": 24719.839096069336 + "roundtrip": { + "p50": 58.9199997484684, + "p90": 61.799999326467514, + "p95": 62.95999884605408, + "p99": 65.20000100135803 }, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 16, + "isolatedSum": { + "p50": 59.39999967813492, + "p90": 64.03999961912632, + "p95": 66.56100042164326, + "p99": 72.31999933719635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 8, + "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 105.40799796581268, - "p90": 105.40799796581268, - "p99": 265.4080092906952 + "p50": 42.44000092148781, + "p90": 45.281000435352325, + "p95": 46.4400015771389, + "p99": 47.919999808073044 }, "combine": { - "p50": 82.68799632787704, - "p90": 82.68799632787704, - "p99": 95.551997423172 + "p50": 19.999999552965164, + "p90": 21.99999988079071, + "p95": 23.360000923275948, + "p99": 25.72000026702881 + }, + "roundtrip": { + "p50": 61.91999837756157, + "p90": 65.20099937915802, + "p95": 66.3599967956543, + "p99": 67.84100085496902 }, - "serial": { - "p50": 163.68000209331512, - "p90": 163.68000209331512, - "p99": 15888.832092285156 + "isolatedSum": { + "p50": 62.44000047445297, + "p90": 67.28100031614304, + "p95": 69.80000250041485, + "p99": 73.64000007510185 }, - "dispatchLogicalBytes": 415744, - "combineLogicalBytes": 0, - "fanoutMean": null, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, "recvTokensMax": 29, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 16, + "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 103.74400019645691, - "p90": 103.74400019645691, - "p99": 131.8719983100891 + "p50": 42.44000092148781, + "p90": 45.00100016593933, + "p95": 46.88100144267082, + "p99": 49.27999898791313 }, "combine": { - "p50": 83.26400071382523, - "p90": 83.26400071382523, - "p99": 4964.223861694336 - }, - "serial": { - "p50": 157.05600380897522, - "p90": 157.05600380897522, - "p99": 193.50400567054749 - }, - "dispatchLogicalBytes": 874496, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 61, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 32, - "globalTokens": 128, - "dispatch": { - "p50": 103.5199984908104, - "p90": 103.5199984908104, - "p99": 142.84799993038177 + "p50": 20.880000665783882, + "p90": 22.840000689029694, + "p95": 24.240000173449516, + "p99": 26.399999856948853 }, - "combine": { - "p50": 83.20000022649765, - "p90": 83.20000022649765, - "p99": 108.2879975438118 + "roundtrip": { + "p50": 62.401000410318375, + "p90": 65.48000127077103, + "p95": 66.28099828958511, + "p99": 68.00000369548798 }, - "serial": { - "p50": 163.26400637626648, - "p90": 163.26400637626648, - "p99": 4763.391971588135 + "isolatedSum": { + "p50": 63.32000158727169, + "p90": 67.84100085496902, + "p95": 71.12100161612034, + "p99": 75.67999884486198 }, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 118, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 256, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 108.19199681282043, - "p90": 108.19199681282043, - "p99": 15079.903602600098 + "p50": 42.520999908447266, + "p90": 45.1200008392334, + "p95": 46.59999907016754, + "p99": 49.04000088572502 }, "combine": { - "p50": 83.74399691820145, - "p90": 83.74399691820145, - "p99": 99.71199929714203 + "p50": 25.8799996227026, + "p90": 27.879999950528145, + "p95": 29.239999130368233, + "p99": 31.800001859664917 }, - "serial": { - "p50": 167.84000396728516, - "p90": 167.84000396728516, - "p99": 193.1840032339096 - }, - "dispatchLogicalBytes": 3411968, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 238, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 128, - "globalTokens": 512, - "dispatch": { - "p50": 121.44000083208084, - "p90": 121.44000083208084, - "p99": 16422.271728515625 - }, - "combine": { - "p50": 100.832000374794, - "p90": 100.832000374794, - "p99": 150.68799257278442 + "roundtrip": { + "p50": 67.80099868774414, + "p90": 71.16000354290009, + "p95": 72.2000002861023, + "p99": 74.47999715805054 }, - "serial": { - "p50": 200.8959949016571, - "p90": 200.8959949016571, - "p99": 270.9119915962219 + "isolatedSum": { + "p50": 68.40099953114986, + "p90": 73.00000078976154, + "p95": 75.83999820053577, + "p99": 80.84000274538994 }, - "dispatchLogicalBytes": 6852608, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 478, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-6bf6c38771ee141b", - "identity": "gb200|deepep|prefill|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", - "stitchKey": "gb200|deepep|normal|tuned|standardized|deepep-normal-v1|gb200-nvl72-mnnvl|4|4|bf16|balanced|7168|8|256", - "colorKey": "gb200_21efa99d", - "schemaVersion": 1, - "generatedAt": "2026-06-24T04:09:18.525148+00:00", + "id": "cx-f513e0f0", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "43eedfb9c3cc2b53", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:07:01.734617+00:00", "status": "valid", - "sku": "gb200", - "backend": "deepep", - "phase": "prefill", + "publicationStatus": "official", + "runner": "mi355x-amds_01", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "deepep-normal-v1", - "topologyClass": "gb200-nvl72-mnnvl", - "transport": "mnnvl", - "worldSize": 4, - "epSize": 4, - "label": "GB200 · deepep · bf16 · EP4", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "dispatchDtype": "bf16" + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "unknown", + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069684997", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069684997", - "createdAt": "2026-06-24T01:52:08Z", - "sha": "368cfbc6390cf69b864dedc121a79a12114b716b" + "id": "28272162006", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272162006", + "createdAt": "2026-06-27T00:05:30Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 512, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 111.16799712181091, - "p90": 111.16799712181091, - "p99": 135.74400544166565 + "p50": 40.44099897146225, + "p90": 43.72100159525871, + "p95": 45.1200008392334, + "p99": 51.600001752376556 }, "combine": { - "p50": 93.98400038480759, - "p90": 93.98400038480759, - "p99": 105.6319996714592 - }, - "serial": { - "p50": 177.59999632835388, - "p90": 177.59999632835388, - "p99": 198.59200716018677 + "p50": 15.960000455379486, + "p90": 18.160000443458557, + "p95": 19.279999658465385, + "p99": 21.159999072551727 }, - "dispatchLogicalBytes": 6780928, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 473, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 256, - "globalTokens": 1024, - "dispatch": { - "p50": 137.7599984407425, - "p90": 137.7599984407425, - "p99": 164.35199975967407 - }, - "combine": { - "p50": 114.56000059843063, - "p90": 114.56000059843063, - "p99": 123.4240010380745 + "roundtrip": { + "p50": 55.56099861860275, + "p90": 58.75999853014946, + "p95": 60.120001435279846, + "p99": 63.63999843597412 }, - "serial": { - "p50": 226.8799990415573, - "p90": 226.8799990415573, - "p99": 237.7920001745224 + "isolatedSum": { + "p50": 56.400999426841736, + "p90": 61.88100203871727, + "p95": 64.40000049769878, + "p99": 72.76000082492828 }, - "dispatchLogicalBytes": 13389824, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 934, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 184.38400328159332, - "p90": 184.38400328159332, - "p99": 206.62400126457214 + "p50": 42.08099842071533, + "p90": 45.0810007750988, + "p95": 46.39999940991402, + "p99": 49.76100102066994 }, "combine": { - "p50": 152.8320014476776, - "p90": 152.8320014476776, - "p99": 162.91199624538422 + "p50": 16.00000075995922, + "p90": 18.60000006854534, + "p95": 19.55999992787838, + "p99": 21.920999512076378 + }, + "roundtrip": { + "p50": 58.32099914550781, + "p90": 61.64000183343887, + "p95": 63.600003719329834, + "p99": 67.59999692440033 }, - "serial": { - "p50": 311.7760121822357, - "p90": 311.7760121822357, - "p99": 325.6640136241913 + "isolatedSum": { + "p50": 58.08099918067455, + "p90": 63.68100084364414, + "p95": 65.9599993377924, + "p99": 71.68200053274632 }, - "dispatchLogicalBytes": 26736640, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 1865, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 282.8480005264282, - "p90": 282.8480005264282, - "p99": 300.0960052013397 + "p50": 41.839998215436935, + "p90": 44.920001178979874, + "p95": 46.28000035881996, + "p99": 49.40100014209747 }, "combine": { - "p50": 272.2879946231842, - "p90": 272.2879946231842, - "p99": 288.4159982204437 + "p50": 19.31999996304512, + "p90": 21.75999991595745, + "p95": 22.5600004196167, + "p99": 24.43999983370304 + }, + "roundtrip": { + "p50": 60.80099940299988, + "p90": 64.03999775648117, + "p95": 65.56099653244019, + "p99": 69.92000341415405 }, - "serial": { - "p50": 528.7359952926636, - "p90": 528.7359952926636, - "p99": 545.7599759101868 + "isolatedSum": { + "p50": 61.159998178482056, + "p90": 66.68000109493732, + "p95": 68.84000077843666, + "p99": 73.84099997580051 }, - "dispatchLogicalBytes": 53358592, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 3722, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 476.063996553421, - "p90": 476.063996553421, - "p99": 494.52799558639526 + "p50": 42.080000042915344, + "p90": 45.20000144839287, + "p95": 46.64099961519241, + "p99": 48.43999817967415 }, "combine": { - "p50": 453.72799038887024, - "p90": 453.72799038887024, - "p99": 476.25601291656494 + "p50": 20.16099914908409, + "p90": 22.280000150203705, + "p95": 23.04000034928322, + "p99": 24.960000067949295 + }, + "roundtrip": { + "p50": 62.199998646974564, + "p90": 65.36100059747696, + "p95": 66.72099977731705, + "p99": 68.71999800205231 }, - "serial": { - "p50": 910.3040099143982, - "p90": 910.3040099143982, - "p99": 965.9519791603088 + "isolatedSum": { + "p50": 62.240999191999435, + "p90": 67.48000159859657, + "p95": 69.68099996447563, + "p99": 73.39999824762344 }, - "dispatchLogicalBytes": 106373120, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 7420, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 866.4960265159607, - "p90": 866.4960265159607, - "p99": 879.7439932823181 + "p50": 42.24099963903427, + "p90": 45.239999890327454, + "p95": 46.36099934577942, + "p99": 48.40100184082985 }, "combine": { - "p50": 834.6880078315735, - "p90": 834.6880078315735, - "p99": 848.6080169677734 + "p50": 24.639999493956566, + "p90": 26.88100002706051, + "p95": 27.881000190973282, + "p99": 30.079999938607216 }, - "serial": { - "p50": 1678.3039569854736, - "p90": 1678.3039569854736, - "p99": 1692.031979560852 + "roundtrip": { + "p50": 67.47999787330627, + "p90": 70.60100138187408, + "p95": 72.28100299835205, + "p99": 75.20099729299545 }, - "dispatchLogicalBytes": 212774912, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 14842, + "isolatedSum": { + "p50": 66.88099913299084, + "p90": 72.12099991738796, + "p95": 74.2419995367527, + "p99": 78.48100177943707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-b9f1a317815a742c", - "identity": "h100|deepep|decode|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", - "stitchKey": "h100|deepep|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", - "colorKey": "h100_a6184024", - "schemaVersion": 2, - "generatedAt": "2026-06-24T22:50:09.306878+00:00", + "id": "cx-67074ab6", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "2ccb7553c969aafc", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:07:48.076161+00:00", "status": "valid", - "sku": "h100", - "backend": "deepep", + "publicationStatus": "official", + "runner": "mi355x-amds_06", + "sku": "mi355x", + "backend": "mori", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "H100 · deepep · bf16 · EP8 · comm only", + "label": "MI355X EP8 · mori · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "bf16" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "wide-dynamic-range", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "1.2.1", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28134642131", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", - "createdAt": "2026-06-24T22:49:12Z", - "sha": "9f85d054303e23b24e720ca6cb472b6a8eba3754" + "id": "28272165928", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272165928", + "createdAt": "2026-06-27T00:05:37Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 100.16000270843506, - "p90": 100.16000270843506, - "p99": 121.37600034475327 + "p50": 40.240999311208725, + "p90": 43.43999922275543, + "p95": 44.76099833846092, + "p99": 48.11999946832657 }, "combine": { - "p50": 79.96799796819687, - "p90": 79.96799796819687, - "p99": 88.41600269079208 + "p50": 16.839999705553055, + "p90": 18.319999799132347, + "p95": 19.600000232458115, + "p99": 23.399999365210533 }, - "serial": { - "p50": 180.12800067663193, - "p90": 180.12800067663193, - "p99": 209.79200303554535 + "roundtrip": { + "p50": 56.120000779628754, + "p90": 59.48000028729439, + "p95": 60.76100096106529, + "p99": 65.24000316858292 }, + "isolatedSum": { + "p50": 57.08099901676178, + "p90": 61.75999902188778, + "p95": 64.36099857091904, + "p99": 71.5199988335371 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 98.01600128412247, - "p90": 98.01600128412247, - "p99": 121.8239963054657 + "p50": 42.44000092148781, + "p90": 45.48000171780586, + "p95": 46.51999846100807, + "p99": 49.19999837875366 }, "combine": { - "p50": 79.8719972372055, - "p90": 79.8719972372055, - "p99": 96.57599776983261 + "p50": 16.201000660657883, + "p90": 18.479999154806137, + "p95": 19.55999992787838, + "p99": 21.800000220537186 }, - "serial": { - "p50": 177.88799852132797, - "p90": 177.88799852132797, - "p99": 218.3999940752983 + "roundtrip": { + "p50": 58.80099907517433, + "p90": 61.96000054478645, + "p95": 62.76000291109085, + "p99": 64.19999897480011 }, + "isolatedSum": { + "p50": 58.64100158214569, + "p90": 63.960000872612, + "p95": 66.07999838888645, + "p99": 70.99999859929085 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 99.2640033364296, - "p90": 99.2640033364296, - "p99": 119.58400160074234 + "p50": 42.04000160098076, + "p90": 44.280000030994415, + "p95": 45.921001583337784, + "p99": 49.28100109100342 }, "combine": { - "p50": 80.22399991750717, - "p90": 80.22399991750717, - "p99": 90.01599997282028 + "p50": 19.039999693632126, + "p90": 21.51999995112419, + "p95": 22.801000624895096, + "p99": 24.560000747442245 }, - "serial": { - "p50": 179.48800325393677, - "p90": 179.48800325393677, - "p99": 209.60000157356262 + "roundtrip": { + "p50": 61.601001769304276, + "p90": 64.92000073194504, + "p95": 66.00099802017212, + "p99": 67.72000342607498 }, + "isolatedSum": { + "p50": 61.080001294612885, + "p90": 65.7999999821186, + "p95": 68.72200220823288, + "p99": 73.84100183844566 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 98.24000298976898, - "p90": 98.24000298976898, - "p99": 115.99999666213989 + "p50": 42.27999970316887, + "p90": 45.00000178813934, + "p95": 46.23999819159508, + "p99": 48.16000163555145 }, "combine": { - "p50": 80.70400357246399, - "p90": 80.70400357246399, - "p99": 88.67199718952179 + "p50": 20.320000126957893, + "p90": 23.32100085914135, + "p95": 25.439999997615814, + "p99": 57.88100138306618 }, - "serial": { - "p50": 178.94400656223297, - "p90": 178.94400656223297, - "p99": 204.67199385166168 + "roundtrip": { + "p50": 62.3599998652935, + "p90": 65.0399997830391, + "p95": 66.0799965262413, + "p99": 68.00100207328796 }, + "isolatedSum": { + "p50": 62.59999983012676, + "p90": 68.3210026472807, + "p95": 71.67999818921089, + "p99": 106.04100301861763 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 98.59199821949005, - "p90": 98.59199821949005, - "p99": 147.5200057029724 + "p50": 42.399998754262924, + "p90": 45.0810007750988, + "p95": 46.23999819159508, + "p99": 48.8400012254715 }, "combine": { - "p50": 87.74399757385254, - "p90": 87.74399757385254, - "p99": 89.37600255012512 + "p50": 25.120999664068222, + "p90": 27.2000003606081, + "p95": 28.161000460386276, + "p99": 30.319999903440475 }, - "serial": { - "p50": 186.3359957933426, - "p90": 186.3359957933426, - "p99": 236.89600825309753 + "roundtrip": { + "p50": 67.63999909162521, + "p90": 70.79999893903732, + "p95": 71.68000191450119, + "p99": 73.72000068426132 }, + "isolatedSum": { + "p50": 67.52099841833115, + "p90": 72.2810011357069, + "p95": 74.40099865198135, + "p99": 79.16000112891197 + }, + "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 99.29600358009338, - "p90": 99.29600358009338, - "p99": 127.36000120639801 - }, - "combine": { - "p50": 88.48000317811966, - "p90": 88.48000317811966, - "p99": 97.08800166845322 - }, - "serial": { - "p50": 187.77600675821304, - "p90": 187.77600675821304, - "p99": 224.44800287485123 - }, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 112.73600161075592, - "p90": 112.73600161075592, - "p99": 122.27199971675873 - }, - "combine": { - "p50": 98.30400347709656, - "p90": 98.30400347709656, - "p99": 106.27199709415436 - }, - "serial": { - "p50": 211.04000508785248, - "p90": 211.04000508785248, - "p99": 228.5439968109131 - }, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 129.50399518013, - "p90": 129.50399518013, - "p99": 141.85599982738495 - }, - "combine": { - "p50": 119.64800208806992, - "p90": 119.64800208806992, - "p99": 123.9359974861145 - }, - "serial": { - "p50": 249.15199726819992, - "p90": 249.15199726819992, - "p99": 265.79199731349945 - }, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-6de3ad32139f9200", - "identity": "h100|deepep|decode|ll|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", - "stitchKey": "h100|deepep|ll|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", - "colorKey": "h100_9d00efc8", - "schemaVersion": 2, - "generatedAt": "2026-06-24T23:09:09.861657+00:00", + "id": "cx-23f1ecd4", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "1ab1f06166250146", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:16.763261+00:00", "status": "valid", - "sku": "h100", - "backend": "deepep", + "publicationStatus": "official", + "runner": "mi355x-amds_02", + "sku": "mi355x", + "backend": "mori", "phase": "decode", - "mode": "ll", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "H100 · deepep · fp8 · EP8 · LL · comm only", + "label": "MI355X EP8 · mori · bf16", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "dispatchDtype": "fp8" + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "1.2.1", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28135444762", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135444762", - "createdAt": "2026-06-24T23:08:05Z", - "sha": "e71ef3c2a0465a357771c14935dd0807dc1da165" + "id": "28272158268", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272158268", + "createdAt": "2026-06-27T00:05:24Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 52.06400156021118, - "p90": 52.06400156021118, - "p99": 65.8240020275116 + "p50": 40.240999311208725, + "p90": 43.5199998319149, + "p95": 44.920001178979874, + "p99": 54.32000011205673 }, "combine": { - "p50": 49.8879998922348, - "p90": 49.8879998922348, - "p99": 74.68800246715546 + "p50": 17.680000513792038, + "p90": 19.401000812649727, + "p95": 20.759999752044678, + "p99": 23.80100078880787 }, - "serial": { - "p50": 101.95200145244598, - "p90": 101.95200145244598, - "p99": 140.51200449466705 + "roundtrip": { + "p50": 56.040000170469284, + "p90": 59.12100151181221, + "p95": 60.47999858856201, + "p99": 63.040003180503845 }, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 917504, + "isolatedSum": { + "p50": 57.92099982500076, + "p90": 62.92100064456463, + "p95": 65.68000093102455, + "p99": 78.1210009008646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, "fanoutMean": 5.5, - "recvTokensMax": 14, + "recvTokensMax": 7, + "stragglerRank": 1, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 52.352000027894974, - "p90": 52.352000027894974, - "p99": 61.47199869155884 + "p50": 42.319998145103455, + "p90": 44.87999901175499, + "p95": 46.480998396873474, + "p99": 49.320999532938 }, "combine": { - "p50": 37.151999771595, - "p90": 37.151999771595, - "p99": 55.03999814391136 + "p50": 16.720000654459, + "p90": 18.240999430418015, + "p95": 19.401000812649727, + "p99": 23.240000009536743 }, - "serial": { - "p50": 89.50399979948997, - "p90": 89.50399979948997, - "p99": 116.5119968354702 + "roundtrip": { + "p50": 58.479998260736465, + "p90": 61.879999935626984, + "p95": 62.880001962184906, + "p99": 65.99999964237213 }, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 1835008, + "isolatedSum": { + "p50": 59.039998799562454, + "p90": 63.120998442173004, + "p95": 65.8819992095232, + "p99": 72.56099954247475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, "fanoutMean": 5.375, - "recvTokensMax": 21, + "recvTokensMax": 13, + "stragglerRank": 1, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 38.11199963092804, - "p90": 38.11199963092804, - "p99": 45.43999955058098 + "p50": 41.919998824596405, + "p90": 45.120999217033386, + "p95": 46.59999907016754, + "p99": 50.84000155329704 }, "combine": { - "p50": 35.679999738931656, - "p90": 35.679999738931656, - "p99": 141.50400459766388 + "p50": 19.79999989271164, + "p90": 21.27999998629093, + "p95": 23.16099964082241, + "p99": 25.400999933481216 }, - "serial": { - "p50": 73.7919993698597, - "p90": 73.7919993698597, - "p99": 186.94400414824486 + "roundtrip": { + "p50": 61.51999905705452, + "p90": 64.40100073814392, + "p95": 65.80100208520889, + "p99": 68.24000179767609 }, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 3670016, + "isolatedSum": { + "p50": 61.719998717308044, + "p90": 66.40099920332432, + "p95": 69.76099871098995, + "p99": 76.24100148677826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, - "recvTokensMax": 39, + "recvTokensMax": 29, + "stragglerRank": 1, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.94399917125702, - "p90": 42.94399917125702, - "p99": 46.52800038456917 + "p50": 42.121000587940216, + "p90": 45.04000023007393, + "p95": 46.31999880075455, + "p99": 50.641000270843506 }, "combine": { - "p50": 36.99199855327606, - "p90": 36.99199855327606, - "p99": 45.3759990632534 + "p50": 21.04100026190281, + "p90": 22.95999974012375, + "p95": 24.6799997985363, + "p99": 26.920000091195107 }, - "serial": { - "p50": 79.93599772453308, - "p90": 79.93599772453308, - "p99": 91.90399944782257 + "roundtrip": { + "p50": 62.20100075006485, + "p90": 66.39999896287918, + "p95": 68.59999895095825, + "p99": 95.88100016117096 }, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 7340032, + "isolatedSum": { + "p50": 63.162000849843025, + "p90": 67.99999997019768, + "p95": 70.99999859929085, + "p99": 77.56100036203861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, - "recvTokensMax": 74, + "recvTokensMax": 47, + "stragglerRank": 1, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 43.42399910092354, - "p90": 43.42399910092354, - "p99": 59.20000001788139 - }, - "combine": { - "p50": 39.48799893260002, - "p90": 39.48799893260002, - "p99": 49.375999718904495 - }, - "serial": { - "p50": 82.91199803352356, - "p90": 82.91199803352356, - "p99": 108.57599973678589 - }, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 14680064, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 43.935999274253845, - "p90": 43.935999274253845, - "p99": 51.93600058555603 - }, - "combine": { - "p50": 45.27999833226204, + "p50": 42.281001806259155, "p90": 45.27999833226204, - "p99": 63.58399987220764 - }, - "serial": { - "p50": 89.21599760651588, - "p90": 89.21599760651588, - "p99": 115.52000045776367 - }, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 29360128, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 51.19999870657921, - "p90": 51.19999870657921, - "p99": 54.59199845790863 + "p95": 46.51999846100807, + "p99": 49.320001155138016 }, "combine": { - "p50": 57.5999990105629, - "p90": 57.5999990105629, - "p99": 63.968002796173096 - }, - "serial": { - "p50": 108.7999977171421, - "p90": 108.7999977171421, - "p99": 118.56000125408173 - }, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 58720256, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 61.344001442193985, - "p90": 61.344001442193985, - "p99": 68.60800087451935 + "p50": 25.919999927282333, + "p90": 28.080999851226807, + "p95": 29.559999704360962, + "p99": 32.35999867320061 }, - "combine": { - "p50": 84.3840017914772, - "p90": 84.3840017914772, - "p99": 88.92799913883209 + "roundtrip": { + "p50": 67.31999665498734, + "p90": 70.2809989452362, + "p95": 71.40100002288818, + "p99": 74.16000217199326 }, - "serial": { - "p50": 145.7280032336712, - "p90": 145.7280032336712, - "p99": 157.53600001335144 + "isolatedSum": { + "p50": 68.20100173354149, + "p90": 73.36099818348885, + "p95": 76.07999816536903, + "p99": 81.67999982833862 }, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 117440512, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-e75ae4f0ebd83f3c", - "identity": "h100|deepep|prefill|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", - "stitchKey": "h100|deepep|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|bf16|uniform|7168|8|256", - "colorKey": "h100_a6184024", - "schemaVersion": 2, - "generatedAt": "2026-06-24T22:50:10.363193+00:00", + "id": "cx-83a44089", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2c22646e864c27e", + "colorKey": "mi355x_eb5b377e", + "comparisonKey": "5bbe7a250a72d8b4", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:24.839410+00:00", "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "publicationStatus": "official", + "runner": "mi355x-amds_01", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "H100 · deepep · bf16 · EP8 · comm only", + "label": "MI355X EP8 · mori · bf16 · balanced", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "dispatchDtype": "bf16" + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "1.2.1", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2c22646e864c27e", + "workloadId": "set:5:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28134642131", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28134642131", - "createdAt": "2026-06-24T22:49:12Z", - "sha": "9f85d054303e23b24e720ca6cb472b6a8eba3754" + "id": "28271906612", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271906612", + "createdAt": "2026-06-26T23:57:30Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 132.76800513267517, - "p90": 132.76800513267517, - "p99": 144.67200636863708 + "p50": 40.36099836230278, + "p90": 43.44100132584572, + "p95": 44.60100084543228, + "p99": 48.920001834630966 }, "combine": { - "p50": 113.0559965968132, - "p90": 113.0559965968132, - "p99": 124.51200187206268 - }, - "serial": { - "p50": 245.82400172948837, - "p90": 245.82400172948837, - "p99": 269.18400824069977 - }, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 161.02400422096252, - "p90": 161.02400422096252, - "p99": 182.20800161361694 + "p50": 16.3199994713068, + "p90": 18.880000337958336, + "p95": 19.88000050187111, + "p99": 21.880999207496643 }, - "combine": { - "p50": 160.5760008096695, - "p90": 160.5760008096695, - "p99": 170.0800061225891 + "roundtrip": { + "p50": 57.20100179314613, + "p90": 60.63999980688095, + "p95": 61.72100082039833, + "p99": 64.56000357866287 }, - "serial": { - "p50": 321.600005030632, - "p90": 321.600005030632, - "p99": 352.28800773620605 + "isolatedSum": { + "p50": 56.68099783360958, + "p90": 62.321001663804054, + "p95": 64.48100134730339, + "p99": 70.80100104212761 }, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 213.34399282932281, - "p90": 213.34399282932281, - "p99": 228.09599339962006 + "p50": 42.64099895954132, + "p90": 45.680999755859375, + "p95": 47.2010001540184, + "p99": 49.47999864816666 }, "combine": { - "p50": 236.64000630378723, - "p90": 236.64000630378723, - "p99": 244.57600712776184 + "p50": 16.519999131560326, + "p90": 18.92000064253807, + "p95": 20.080000162124634, + "p99": 21.801000460982323 }, - "serial": { - "p50": 449.98399913311005, - "p90": 449.98399913311005, - "p99": 472.6720005273819 + "roundtrip": { + "p50": 59.52100083231926, + "p90": 62.67999857664108, + "p95": 63.84100019931793, + "p99": 66.96099787950516 }, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "isolatedSum": { + "p50": 59.160998091101646, + "p90": 64.60100039839745, + "p95": 67.28100031614304, + "p99": 71.28099910914898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 320.8959996700287, - "p90": 320.8959996700287, - "p99": 335.58401465415955 + "p50": 42.64000058174133, + "p90": 45.8809994161129, + "p95": 47.00100049376488, + "p99": 49.959998577833176 }, "combine": { - "p50": 368.3199882507324, - "p90": 368.3199882507324, - "p99": 377.21601128578186 + "p50": 20.759999752044678, + "p90": 23.600000888109207, + "p95": 24.480000138282776, + "p99": 26.760000735521317 }, - "serial": { - "p50": 689.2159879207611, - "p90": 689.2159879207611, - "p99": 712.8000259399414 + "roundtrip": { + "p50": 64.12000209093094, + "p90": 67.08099693059921, + "p95": 67.88100302219391, + "p99": 70.36100327968597 }, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "isolatedSum": { + "p50": 63.40000033378601, + "p90": 69.4810003042221, + "p95": 71.48100063204765, + "p99": 76.71999931335449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 519.2639827728271, - "p90": 519.2639827728271, - "p99": 541.1520004272461 + "p50": 42.7200011909008, + "p90": 45.88000103831291, + "p95": 47.36100137233734, + "p99": 49.60000142455101 }, "combine": { - "p50": 632.9600214958191, - "p90": 632.9600214958191, - "p99": 643.8400149345398 + "p50": 22.679999470710754, + "p90": 25.280000641942024, + "p95": 26.159999892115593, + "p99": 27.240000665187836 }, - "serial": { - "p50": 1152.2240042686462, - "p90": 1152.2240042686462, - "p99": 1184.992015361786 + "roundtrip": { + "p50": 65.72099775075912, + "p90": 68.64099949598312, + "p95": 69.64000314474106, + "p99": 72.2000002861023 }, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "isolatedSum": { + "p50": 65.40000066161156, + "p90": 71.16000168025494, + "p95": 73.52100126445293, + "p99": 76.84000208973885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1002.560019493103, - "p90": 1002.560019493103, - "p99": 1047.1359491348267 + "p50": 42.87999868392944, + "p90": 45.88000103831291, + "p95": 46.959999948740005, + "p99": 48.79999905824661 }, "combine": { - "p50": 1161.5040302276611, - "p90": 1161.5040302276611, - "p99": 1181.3440322875977 + "p50": 28.119999915361404, + "p90": 30.44000081717968, + "p95": 31.401000916957855, + "p99": 33.640000969171524 }, - "serial": { - "p50": 2164.064049720764, - "p90": 2164.064049720764, - "p99": 2228.4799814224243 + "roundtrip": { + "p50": 71.80000096559525, + "p90": 75.15999674797058, + "p95": 76.39999687671661, + "p99": 78.31999659538269 }, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "isolatedSum": { + "p50": 70.99999859929085, + "p90": 76.32000185549259, + "p95": 78.36100086569786, + "p99": 82.44000002741814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-18e4deb42cbe9c55", - "identity": "h100|deepep|prefill|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", - "stitchKey": "h100|deepep|normal|tuned|standardized|comm-only-v1|h100-nvlink-island|8|8|fp8|uniform|7168|8|256", - "colorKey": "h100_426025cb", - "schemaVersion": 2, - "generatedAt": "2026-06-24T23:10:20.402829+00:00", + "id": "cx-c1291ad7", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||15d7289bb70ed17", + "colorKey": "mi355x_ae729691", + "comparisonKey": "730c294e090417f2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:10.167624+00:00", "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "publicationStatus": "official", + "runner": "mi355x-amds_06", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "H100 · deepep · fp8 · EP8 · comm only", + "label": "MI355X EP8 · mori · bf16 · balanced-rank-local", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "dispatchDtype": "fp8" + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "1.2.1", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15d7289bb70ed17", + "workloadId": "set:5:2eebbed158fe1320", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28135446264", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28135446264", - "createdAt": "2026-06-24T23:08:07Z", - "sha": "e71ef3c2a0465a357771c14935dd0807dc1da165" + "id": "28271910050", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271910050", + "createdAt": "2026-06-26T23:57:37Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 111.58400028944016, - "p90": 111.58400028944016, - "p99": 128.7039965391159 + "p50": 36.80099919438362, + "p90": 39.80100154876709, + "p95": 40.76100140810013, + "p99": 43.63999888300896 }, "combine": { - "p50": 104.5759990811348, - "p90": 104.5759990811348, - "p99": 111.07199639081955 - }, - "serial": { - "p50": 216.15999937057495, - "p90": 216.15999937057495, - "p99": 239.77599292993546 - }, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 127.48800218105316, - "p90": 127.48800218105316, - "p99": 147.35999703407288 + "p50": 15.320000238716602, + "p90": 17.480000853538513, + "p95": 18.68000067770481, + "p99": 20.999999716877937 }, - "combine": { - "p50": 148.95999431610107, - "p90": 148.95999431610107, - "p99": 158.01599621772766 + "roundtrip": { + "p50": 49.07999932765961, + "p90": 51.80000141263008, + "p95": 52.76099964976311, + "p99": 53.76100167632103 }, - "serial": { - "p50": 276.44799649715424, - "p90": 276.44799649715424, - "p99": 305.37599325180054 + "isolatedSum": { + "p50": 52.12099943310022, + "p90": 57.2810024023056, + "p95": 59.44100208580494, + "p99": 64.6399985998869 }, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 167.55199432373047, - "p90": 167.55199432373047, - "p99": 179.45599555969238 + "p50": 38.07999938726425, + "p90": 40.39999842643738, + "p95": 41.20099917054176, + "p99": 42.80000180006027 }, "combine": { - "p50": 231.00799322128296, - "p90": 231.00799322128296, - "p99": 240.60800671577454 + "p50": 15.799999237060547, + "p90": 17.999999225139618, + "p95": 19.279999658465385, + "p99": 21.040000021457672 }, - "serial": { - "p50": 398.5599875450134, - "p90": 398.5599875450134, - "p99": 420.0640022754669 + "roundtrip": { + "p50": 51.600001752376556, + "p90": 53.92000079154968, + "p95": 55.24099990725517, + "p99": 57.32100084424019 }, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "isolatedSum": { + "p50": 53.8799986243248, + "p90": 58.399997651576996, + "p95": 60.48099882900715, + "p99": 63.840001821517944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 238.75199258327484, - "p90": 238.75199258327484, - "p99": 276.95998549461365 + "p50": 32.71999955177307, + "p90": 35.5600006878376, + "p95": 36.559998989105225, + "p99": 39.000000804662704 }, "combine": { - "p50": 366.8479919433594, - "p90": 366.8479919433594, - "p99": 377.1840035915375 + "p50": 13.72000016272068, + "p90": 15.799999237060547, + "p95": 16.599999740719795, + "p99": 18.120000138878822 }, - "serial": { - "p50": 605.5999845266342, - "p90": 605.5999845266342, - "p99": 654.1439890861511 + "roundtrip": { + "p50": 45.71999981999397, + "p90": 49.04000088572502, + "p95": 49.96100068092346, + "p99": 51.44000053405762 }, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "isolatedSum": { + "p50": 46.43999971449375, + "p90": 51.35999992489815, + "p95": 53.15999872982502, + "p99": 57.12000094354153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 371.9039857387543, - "p90": 371.9039857387543, - "p99": 398.81598949432373 + "p50": 37.84099966287613, + "p90": 40.92000052332878, + "p95": 41.999999433755875, + "p99": 43.880000710487366 }, "combine": { - "p50": 628.607988357544, - "p90": 628.607988357544, - "p99": 644.5119976997375 + "p50": 14.919999986886978, + "p90": 17.27999933063984, + "p95": 18.039999529719353, + "p99": 19.55999992787838 }, - "serial": { - "p50": 1000.5119740962982, - "p90": 1000.5119740962982, - "p99": 1043.3279871940613 + "roundtrip": { + "p50": 52.241001278162, + "p90": 55.75999990105629, + "p95": 56.68000131845474, + "p99": 58.35999920964241 }, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "isolatedSum": { + "p50": 52.76099964976311, + "p90": 58.19999985396862, + "p95": 60.03999896347523, + "p99": 63.440000638365746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 652.6399850845337, - "p90": 652.6399850845337, - "p99": 670.304000377655 + "p50": 38.24099898338318, + "p90": 40.92000052332878, + "p95": 41.839998215436935, + "p99": 44.16000097990036 }, "combine": { - "p50": 1157.3439836502075, - "p90": 1157.3439836502075, - "p99": 1173.0560064315796 + "p50": 16.24000072479248, + "p90": 18.841000273823738, + "p95": 19.88000050187111, + "p99": 22.280000150203705 }, - "serial": { - "p50": 1809.9839687347412, - "p90": 1809.9839687347412, - "p99": 1843.3600068092346 + "roundtrip": { + "p50": 54.28000167012215, + "p90": 57.840000838041306, + "p95": 58.800000697374344, + "p99": 60.96100062131882 }, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "isolatedSum": { + "p50": 54.48099970817566, + "p90": 59.76100079715252, + "p95": 61.719998717308044, + "p99": 66.44000113010406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 16, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-e49b5d3ba04f7fb1", - "identity": "mi355x|mori|decode|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", - "stitchKey": "mi355x|mori|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", - "colorKey": "mi355x_f3b49abd", - "schemaVersion": 1, - "generatedAt": "2026-06-24T01:58:52.129674+00:00", + "id": "cx-ace78f17", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||c8b7839b4895c1a", + "colorKey": "mi355x_62dc5cd4", + "comparisonKey": "316ae2638347880f", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:29.418642+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_00", "sku": "mi355x", "backend": "mori", "phase": "decode", @@ -2235,469 +71871,600 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "mori-normal-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "mi355x-xgmi", "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "MI355X · mori · bf16 · EP8", + "label": "MI355X EP8 · mori · bf16 · hotspot-single", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "dispatchDtype": "bf16" + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "unknown", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c8b7839b4895c1a", + "workloadId": "set:5:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069889124", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069889124", - "createdAt": "2026-06-24T01:57:55Z", - "sha": "e2717a341cf1514d4be6393db16121889db7bf19" + "id": "28271920340", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271920340", + "createdAt": "2026-06-26T23:57:58Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.160998702049255, - "p90": 40.160998702049255, - "p99": 126.64000689983368 + "p50": 39.84000161290169, + "p90": 42.55999997258186, + "p95": 44.08000037074089, + "p99": 48.601001501083374 }, "combine": { - "p50": 17.160000279545784, - "p90": 17.160000279545784, - "p99": 22.840000689029694 + "p50": 16.200000420212746, + "p90": 17.960000783205032, + "p95": 19.07999999821186, + "p99": 21.640000864863396 }, - "serial": { - "p50": 54.91999909281731, - "p90": 54.91999909281731, - "p99": 78.47999781370163 + "roundtrip": { + "p50": 55.44000118970871, + "p90": 58.27999860048294, + "p95": 59.20099839568138, + "p99": 60.920000076293945 }, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 0, - "fanoutMean": null, + "isolatedSum": { + "p50": 56.04000203311443, + "p90": 60.520000755786896, + "p95": 63.16000036895275, + "p99": 70.24100236594677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.281001806259155, - "p90": 42.281001806259155, - "p99": 74.60100203752518 + "p50": 42.1609990298748, + "p90": 44.920001178979874, + "p95": 45.80099880695343, + "p99": 47.800999134778976 }, "combine": { - "p50": 17.35999993979931, - "p90": 17.35999993979931, - "p99": 22.80000038444996 + "p50": 16.07999950647354, + "p90": 18.401000648736954, + "p95": 19.279999658465385, + "p99": 20.880000665783882 }, - "serial": { - "p50": 58.52099880576134, - "p90": 58.52099880576134, - "p99": 77.4800032377243 + "roundtrip": { + "p50": 58.35999920964241, + "p90": 61.56099960207939, + "p95": 62.60000169277191, + "p99": 64.7599995136261 }, - "dispatchLogicalBytes": 200704, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 14, + "isolatedSum": { + "p50": 58.24099853634834, + "p90": 63.32100182771683, + "p95": 65.08099846541882, + "p99": 68.68099980056286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 42.48100146651268, - "p90": 42.48100146651268, - "p99": 62.36099824309349 + "p50": 41.96000099182129, + "p90": 44.599998742341995, + "p95": 45.96000164747238, + "p99": 48.16100001335144 }, "combine": { - "p50": 19.801000133156776, - "p90": 19.801000133156776, - "p99": 29.839999973773956 + "p50": 19.401000812649727, + "p90": 21.880000829696655, + "p95": 23.080000653862953, + "p99": 24.12099950015545 }, - "serial": { - "p50": 61.921000480651855, - "p90": 61.921000480651855, - "p99": 77.7209997177124 + "roundtrip": { + "p50": 61.68099865317345, + "p90": 65.20099937915802, + "p95": 65.99999964237213, + "p99": 67.4000009894371 }, - "dispatchLogicalBytes": 358400, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 25, + "isolatedSum": { + "p50": 61.361001804471016, + "p90": 66.47999957203865, + "p95": 69.04000230133533, + "p99": 72.28199951350689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.27999970316887, - "p90": 42.27999970316887, - "p99": 76.68100297451019 + "p50": 41.80099815130234, + "p90": 44.2809984087944, + "p95": 45.559998601675034, + "p99": 48.39999973773956 }, "combine": { - "p50": 21.43999934196472, - "p90": 21.43999934196472, - "p99": 28.960000723600388 + "p50": 21.239999681711197, + "p90": 23.19999970495701, + "p95": 24.080000817775726, + "p99": 26.040000841021538 }, - "serial": { - "p50": 61.88099831342697, - "p90": 61.88099831342697, - "p99": 90.40100127458572 + "roundtrip": { + "p50": 62.960997223854065, + "p90": 66.041000187397, + "p95": 66.91999733448029, + "p99": 68.71999800205231 }, - "dispatchLogicalBytes": 673792, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 47, + "isolatedSum": { + "p50": 63.040997833013535, + "p90": 67.48099811375141, + "p95": 69.63999941945076, + "p99": 74.4400005787611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.281001806259155, - "p90": 42.281001806259155, - "p99": 92.56000071763992 - }, - "combine": { - "p50": 25.200000032782555, - "p90": 25.200000032782555, - "p99": 30.280999839305878 - }, - "serial": { - "p50": 66.96099787950516, - "p90": 66.96099787950516, - "p99": 90.96100181341171 - }, - "dispatchLogicalBytes": 1304576, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 91, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 42.59999841451645, - "p90": 42.59999841451645, - "p99": 66.08100235462189 - }, - "combine": { - "p50": 32.760001718997955, - "p90": 32.760001718997955, - "p99": 37.79999911785126 - }, - "serial": { - "p50": 75.48099756240845, - "p90": 75.48099756240845, - "p99": 99.60100054740906 - }, - "dispatchLogicalBytes": 2566144, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 179, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 45.40000110864639, - "p90": 45.40000110864639, - "p99": 123.52100014686584 + "p50": 42.1609990298748, + "p90": 45.00000178813934, + "p95": 45.96000164747238, + "p99": 50.40000006556511 }, "combine": { - "p50": 41.00099951028824, - "p90": 41.00099951028824, - "p99": 45.841000974178314 - }, - "serial": { - "p50": 84.88000184297562, - "p90": 84.88000184297562, - "p99": 110.96099764108658 - }, - "dispatchLogicalBytes": 5060608, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 353, - "correct": true, - "samplesPooled": null, - "trials": null - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 59.67999994754791, - "p90": 59.67999994754791, - "p99": 113.52100223302841 + "p50": 26.599999517202377, + "p90": 28.68100069463253, + "p95": 29.96000088751316, + "p99": 31.720001250505447 }, - "combine": { - "p50": 52.68099904060364, - "p90": 52.68099904060364, - "p99": 61.68099865317345 + "roundtrip": { + "p50": 69.20100003480911, + "p90": 71.76099717617035, + "p95": 72.7199986577034, + "p99": 74.16000217199326 }, - "serial": { - "p50": 111.40099912881851, - "p90": 111.40099912881851, - "p99": 135.76200604438782 + "isolatedSum": { + "p50": 68.76099854707718, + "p90": 73.68100248277187, + "p95": 75.92000253498554, + "p99": 82.12000131607056 }, - "dispatchLogicalBytes": 10106880, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 705, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-f9fac8841a429302", - "identity": "mi355x|mori|decode|normal|normalized|standardized|comm-only-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", - "stitchKey": "mi355x|mori|normal|normalized|standardized|comm-only-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", - "colorKey": "mi355x_d20dd52c", - "schemaVersion": 2, - "generatedAt": "2026-06-24T23:43:48.833883+00:00", + "id": "cx-2129d47b", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||4d5546b3fb85130", + "colorKey": "mi355x_570d6605", + "comparisonKey": "1ea3da47c00f36f8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:55.992554+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_07", "sku": "mi355x", "backend": "mori", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "mi355x-xgmi", "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "MI355X · mori · bf16 · EP8 · normalized · comm only", + "label": "MI355X EP8 · mori · bf16 · zipf", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "dispatchDtype": "bf16" + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false }, - "routingConsistent": null, - "traceSignature": null, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4d5546b3fb85130", + "workloadId": "set:5:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28136838021", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28136838021", - "createdAt": "2026-06-24T23:42:57Z", - "sha": "bbe05780a0a0a73656024f4f9eb566db593b6d18" + "id": "28271913592", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271913592", + "createdAt": "2026-06-26T23:57:44Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.481001138687134, - "p90": 40.481001138687134, - "p99": 65.16099721193314 + "p50": 39.03999924659729, + "p90": 41.76099970936775, + "p95": 43.40000078082085, + "p99": 47.15999960899353 }, "combine": { - "p50": 15.440000221133232, - "p90": 15.440000221133232, - "p99": 25.60099959373474 + "p50": 16.359999775886536, + "p90": 18.519999459385872, + "p95": 20.12000046670437, + "p99": 23.40099960565567 }, - "serial": { - "p50": 55.921001359820366, - "p90": 55.921001359820366, - "p99": 90.76199680566788 + "roundtrip": { + "p50": 53.95999923348427, + "p90": 57.20100179314613, + "p95": 58.75999853014946, + "p99": 61.20099872350693 }, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "isolatedSum": { + "p50": 55.399999022483826, + "p90": 60.280999168753624, + "p95": 63.520001247525215, + "p99": 70.5609992146492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.04000160098076, - "p90": 42.04000160098076, - "p99": 50.9210005402565 + "p50": 40.640998631715775, + "p90": 43.99999976158142, + "p95": 44.840000569820404, + "p99": 48.0009987950325 }, "combine": { - "p50": 16.039999201893806, - "p90": 16.039999201893806, - "p99": 21.239999681711197 + "p50": 16.519999131560326, + "p90": 18.561000004410744, + "p95": 20.24099975824356, + "p99": 23.520000278949738 }, - "serial": { - "p50": 58.080000802874565, - "p90": 58.080000802874565, - "p99": 72.1610002219677 + "roundtrip": { + "p50": 55.52000179886818, + "p90": 59.321001172065735, + "p95": 60.72099879384041, + "p99": 68.88099759817123 }, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "isolatedSum": { + "p50": 57.1609977632761, + "p90": 62.560999765992165, + "p95": 65.08100032806396, + "p99": 71.52099907398224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 41.79999977350235, - "p90": 41.79999977350235, - "p99": 50.20099878311157 + "p50": 41.28099977970123, + "p90": 44.16000097990036, + "p95": 45.00000178813934, + "p99": 47.68000170588493 }, "combine": { - "p50": 19.64000053703785, - "p90": 19.64000053703785, - "p99": 24.080000817775726 + "p50": 17.640000209212303, + "p90": 20.160000771284103, + "p95": 21.479999646544456, + "p99": 24.6799997985363 }, - "serial": { - "p50": 61.4400003105402, - "p90": 61.4400003105402, - "p99": 74.2809996008873 + "roundtrip": { + "p50": 59.04100090265274, + "p90": 63.07999789714813, + "p95": 64.87999856472015, + "p99": 68.83999705314636 }, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "isolatedSum": { + "p50": 58.920999988913536, + "p90": 64.32000175118446, + "p95": 66.4800014346838, + "p99": 72.36000150442123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.44000092148781, - "p90": 42.44000092148781, - "p99": 58.320000767707825 + "p50": 41.919998824596405, + "p90": 44.801000505685806, + "p95": 46.84000089764595, + "p99": 50.880998373031616 }, "combine": { - "p50": 19.76099982857704, - "p90": 19.76099982857704, - "p99": 24.879999458789825 + "p50": 19.600000232458115, + "p90": 22.120000794529915, + "p95": 23.520000278949738, + "p99": 26.799999177455902 }, - "serial": { - "p50": 62.20100075006485, - "p90": 62.20100075006485, - "p99": 83.20000022649765 + "roundtrip": { + "p50": 61.000000685453415, + "p90": 64.56000357866287, + "p95": 65.88099896907806, + "p99": 69.52100247144699 }, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "isolatedSum": { + "p50": 61.51999905705452, + "p90": 66.92100130021572, + "p95": 70.36000117659569, + "p99": 77.68099755048752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 3, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.67999902367592, - "p90": 42.67999902367592, - "p99": 49.320999532938 + "p50": 41.839998215436935, + "p90": 44.920001178979874, + "p95": 46.92000150680542, + "p99": 50.1599982380867 }, "combine": { - "p50": 25.880999863147736, - "p90": 25.880999863147736, - "p99": 30.479999259114265 + "p50": 24.481000378727913, + "p90": 27.720000594854355, + "p95": 30.561000108718872, + "p99": 59.321001172065735 }, - "serial": { - "p50": 68.56099888682365, - "p90": 68.56099888682365, - "p99": 79.80099879205227 + "roundtrip": { + "p50": 66.23999774456024, + "p90": 69.36100125312805, + "p95": 70.47999650239944, + "p99": 73.36000353097916 }, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "isolatedSum": { + "p50": 66.32099859416485, + "p90": 72.64000177383423, + "p95": 77.48100161552429, + "p99": 109.48099941015244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 3, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-dd40bab3980be4a6", - "identity": "mi355x|mori|decode|normal|normalized|standardized|layout-and-dispatch-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", - "stitchKey": "mi355x|mori|normal|normalized|standardized|layout-and-dispatch-v1|mi355x-xgmi|8|8|bf16|uniform|7168|8|256", - "colorKey": "mi355x_ddffacd2", + "id": "cx-47886ba2", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||5c00b1a0c13aa3e", + "colorKey": "mi355x_6fd30e97", + "comparisonKey": "41d88b5d4da0110a", "schemaVersion": 3, - "generatedAt": "2026-06-25T09:08:45.533564+00:00", + "generatedAt": "2026-06-27T00:00:43.491121+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_03", "sku": "mi355x", "backend": "mori", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "mi355x-xgmi", "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "MI355X · mori · bf16 · EP8 · normalized · layout + dispatch", + "label": "MI355X EP8 · mori · bf16 · zipf-heavy", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "dispatchDtype": "bf16" + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", + "traceSignature": "5c00b1a0c13aa3e", + "workloadId": "set:5:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28157318258", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28157318258", - "createdAt": "2026-06-25T08:30:18Z", - "sha": "e97bc8b22556293fe74207c68d4d0ea1cf8c7b4c" + "id": "28271916622", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271916622", + "createdAt": "2026-06-26T23:57:51Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.39900004863739, - "p90": 43.83999854326248, - "p99": 50.599999725818634 + "p50": 38.32000121474266, + "p90": 40.28100147843361, + "p95": 41.600000113248825, + "p99": 46.31999880075455 }, "combine": { - "p50": 16.039999201893806, - "p90": 18.120000138878822, - "p99": 22.360000759363174 + "p50": 15.720000490546227, + "p90": 17.03999936580658, + "p95": 18.640000373125076, + "p99": 20.800000056624413 }, - "serial": { - "p50": 56.4389992505312, - "p90": 61.959998682141304, - "p99": 72.96000048518181 + "roundtrip": { + "p50": 51.16099864244461, + "p90": 53.55999991297722, + "p95": 54.96000126004219, + "p99": 57.760998606681824 }, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "isolatedSum": { + "p50": 54.04000170528889, + "p90": 57.32100084424019, + "p95": 60.2400004863739, + "p99": 67.11999885737896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2706,24 +72473,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.24000126123428, - "p90": 45.32000049948692, - "p99": 50.48000067472458 + "p50": 40.47999903559685, + "p90": 42.64099895954132, + "p95": 44.47999969124794, + "p99": 48.760998994112015 }, "combine": { - "p50": 15.839999541640282, - "p90": 18.519999459385872, - "p99": 23.000000044703484 + "p50": 16.00099913775921, + "p90": 17.160000279545784, + "p95": 18.039999529719353, + "p99": 20.800000056624413 }, - "serial": { - "p50": 58.080000802874565, - "p90": 63.839999958872795, - "p99": 73.48000071942806 + "roundtrip": { + "p50": 53.16000059247017, + "p90": 56.07999861240387, + "p95": 57.64099955558777, + "p99": 60.08100137114525 }, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "isolatedSum": { + "p50": 56.480998173356056, + "p90": 59.800999239087105, + "p95": 62.51999922096729, + "p99": 69.56099905073643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2732,24 +72510,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 42.080000042915344, - "p90": 44.95999962091446, - "p99": 52.880000323057175 + "p50": 41.200000792741776, + "p90": 43.241001665592194, + "p95": 44.52100023627281, + "p99": 48.280999064445496 }, "combine": { - "p50": 18.75999942421913, - "p90": 21.27999998629093, - "p99": 25.839999318122864 + "p50": 17.240000888705254, + "p90": 18.519999459385872, + "p95": 20.19999921321869, + "p99": 22.5210003554821 }, - "serial": { - "p50": 60.839999467134476, - "p90": 66.23999960720539, - "p99": 78.71999964118004 + "roundtrip": { + "p50": 56.561000645160675, + "p90": 59.241000562906265, + "p95": 60.440998524427414, + "p99": 64.4410029053688 }, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "isolatedSum": { + "p50": 58.44000168144703, + "p90": 61.761001124978065, + "p95": 64.7209994494915, + "p99": 70.8019994199276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2758,24 +72547,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.1999990940094, - "p90": 45.40000110864639, - "p99": 50.08000135421753 + "p50": 41.31999984383583, + "p90": 43.28100010752678, + "p95": 44.679999351501465, + "p99": 46.480000019073486 }, "combine": { - "p50": 20.0399998575449, - "p90": 22.1599992364645, - "p99": 26.079999282956123 + "p50": 18.8400000333786, + "p90": 20.041000097990036, + "p95": 21.240999922156334, + "p99": 24.441000074148178 }, - "serial": { - "p50": 62.2399989515543, - "p90": 67.5600003451109, - "p99": 76.16000063717365 + "roundtrip": { + "p50": 58.761000633239746, + "p90": 61.43999844789505, + "p95": 63.1600022315979, + "p99": 65.52000343799591 }, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "isolatedSum": { + "p50": 60.15999987721443, + "p90": 63.322000205516815, + "p95": 65.9209992736578, + "p99": 70.92100009322166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2784,24 +72584,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.27999970316887, - "p90": 45.48000171780586, - "p99": 50.1599982380867 + "p50": 41.40099883079529, + "p90": 43.480001389980316, + "p95": 44.440001249313354, + "p99": 46.00000008940697 }, "combine": { - "p50": 25.359999388456345, - "p90": 27.480000630021095, - "p99": 32.55999833345413 + "p50": 22.87999913096428, + "p90": 24.6799997985363, + "p95": 26.559999212622643, + "p99": 29.40100058913231 }, - "serial": { - "p50": 67.63999909162521, - "p90": 72.96000234782696, - "p99": 82.71999657154083 + "roundtrip": { + "p50": 63.19999694824219, + "p90": 65.76000154018402, + "p95": 67.28000193834305, + "p99": 69.64100152254105 }, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "isolatedSum": { + "p50": 64.28099796175957, + "p90": 68.16000118851662, + "p95": 71.000000461936, + "p99": 75.40100067853928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2809,307 +72620,653 @@ ] }, { - "id": "cx-ea1e13cdaf24bc7b", - "identity": "mi355x|mori|prefill|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", - "stitchKey": "mi355x|mori|normal|tuned|standardized|mori-normal-v1|mi355x-xgmi|8|8|bf16|balanced|7168|8|256", - "colorKey": "mi355x_f3b49abd", - "schemaVersion": 1, - "generatedAt": "2026-06-24T01:59:40.183797+00:00", + "id": "cx-8d163d45", + "identity": "mi355x|mori|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||d42040086b5de07", + "colorKey": "mi355x_65e339f9", + "comparisonKey": "2ba4cba3af48c2b3", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:39:01.384245+00:00", "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_07", "sku": "mi355x", "backend": "mori", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "mori-normal-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "mi355x-xgmi", "transport": "xgmi", "worldSize": 8, "epSize": 8, - "label": "MI355X · mori · bf16 · EP8", + "label": "MI355X EP8 · mori · bf16 · zipf+eplb", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "dispatchDtype": "bf16" + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" }, - "routingConsistent": null, - "traceSignature": null, - "backendVersion": "unknown", + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d42040086b5de07", + "workloadId": "set:5:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.875, + "eplbImbalanceAfter": 1.0033482142857144, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28069889124", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28069889124", - "createdAt": "2026-06-24T01:57:55Z", - "sha": "e2717a341cf1514d4be6393db16121889db7bf19" + "id": "28271245352", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271245352", + "createdAt": "2026-06-26T23:36:55Z", + "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.24000093340874, - "p90": 40.24000093340874, - "p99": 135.68000495433807 + "p50": 39.319999516010284, + "p90": 42.11999848484993, + "p95": 43.15999895334244, + "p99": 46.52100056409836 }, "combine": { - "p50": 16.07999950647354, - "p90": 16.07999950647354, - "p99": 36.80000081658363 + "p50": 15.399999916553497, + "p90": 17.601000145077705, + "p95": 18.75999942421913, + "p99": 21.320000290870667 }, - "serial": { - "p50": 55.1999993622303, - "p90": 55.1999993622303, - "p99": 74.12099838256836 + "roundtrip": { + "p50": 54.23999950289726, + "p90": 57.440001517534256, + "p95": 58.921001851558685, + "p99": 60.95999851822853 }, - "dispatchLogicalBytes": 100352, - "combineLogicalBytes": 0, - "fanoutMean": null, + "isolatedSum": { + "p50": 54.71999943256378, + "p90": 59.720998629927635, + "p95": 61.91999837756157, + "p99": 67.84100085496902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, "recvTokensMax": 7, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 43.04099828004837, - "p90": 43.04099828004837, - "p99": 83.15999805927277 + "p50": 41.71999916434288, + "p90": 44.84099894762039, + "p95": 46.4400015771389, + "p99": 49.15999993681908 }, "combine": { - "p50": 16.24000072479248, - "p90": 16.24000072479248, - "p99": 27.35999971628189 + "p50": 15.599999576807022, + "p90": 17.839999869465828, + "p95": 19.88000050187111, + "p99": 22.5600004196167 }, - "serial": { - "p50": 58.72099846601486, - "p90": 58.72099846601486, - "p99": 82.04100281000137 + "roundtrip": { + "p50": 57.08099901676178, + "p90": 60.67999824881554, + "p95": 61.59999966621399, + "p99": 63.48100304603577 }, - "dispatchLogicalBytes": 200704, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 14, + "isolatedSum": { + "p50": 57.3199987411499, + "p90": 62.68099881708622, + "p95": 66.32000207901001, + "p99": 71.72000035643578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 5.0625, + "recvTokensMax": 13, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 42.80000180006027, - "p90": 42.80000180006027, - "p99": 59.31999906897545 + "p50": 41.88000038266182, + "p90": 44.08099874854088, + "p95": 45.120999217033386, + "p99": 48.239998519420624 }, "combine": { - "p50": 19.23999935388565, - "p90": 19.23999935388565, - "p99": 28.48000079393387 + "p50": 18.719999119639397, + "p90": 21.04100026190281, + "p95": 22.760000079870224, + "p99": 26.760000735521317 }, - "serial": { - "p50": 62.07999959588051, - "p90": 62.07999959588051, - "p99": 89.6809995174408 + "roundtrip": { + "p50": 61.43999844789505, + "p90": 64.43999707698822, + "p95": 65.68100303411484, + "p99": 67.87999719381332 }, - "dispatchLogicalBytes": 372736, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 26, + "isolatedSum": { + "p50": 60.599999502301216, + "p90": 65.12199901044369, + "p95": 67.88099929690361, + "p99": 74.99999925494194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 23, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.59999841451645, - "p90": 42.59999841451645, - "p99": 73.2010006904602 + "p50": 41.839998215436935, + "p90": 44.79999840259552, + "p95": 46.23999819159508, + "p99": 48.36000129580498 }, "combine": { - "p50": 21.28100022673607, - "p90": 21.28100022673607, - "p99": 29.759999364614487 + "p50": 21.199999377131462, + "p90": 22.95999974012375, + "p95": 24.19999986886978, + "p99": 26.040000841021538 }, - "serial": { - "p50": 62.401000410318375, - "p90": 62.401000410318375, - "p99": 88.96099776029587 + "roundtrip": { + "p50": 61.51999905705452, + "p90": 64.92000073194504, + "p95": 65.92000275850296, + "p99": 68.08000057935715 }, - "dispatchLogicalBytes": 716800, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 50, + "isolatedSum": { + "p50": 63.0399975925684, + "p90": 67.75999814271927, + "p95": 70.43999806046486, + "p99": 74.40000213682652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4845568, + "combineLogicalBytes": 4845568, + "fanoutMean": 5.28125, + "recvTokensMax": 45, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.681001126766205, - "p90": 42.681001126766205, - "p99": 91.92000329494476 + "p50": 42.319998145103455, + "p90": 44.759999960660934, + "p95": 46.28000035881996, + "p99": 49.240998923778534 }, "combine": { - "p50": 25.919999927282333, - "p90": 25.919999927282333, - "p99": 28.880000114440918 + "p50": 24.879999458789825, + "p90": 27.079999446868896, + "p95": 28.440000489354134, + "p99": 56.88000097870827 }, - "serial": { - "p50": 67.35999882221222, - "p90": 67.35999882221222, - "p99": 102.9210016131401 + "roundtrip": { + "p50": 66.3599967956543, + "p90": 69.95999813079834, + "p95": 70.91999799013138, + "p99": 73.00099730491638 }, - "dispatchLogicalBytes": 1347584, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 94, + "isolatedSum": { + "p50": 67.19999760389328, + "p90": 71.83999940752983, + "p95": 74.7200008481741, + "p99": 106.1209999024868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9676800, + "combineLogicalBytes": 9676800, + "fanoutMean": 5.2734375, + "recvTokensMax": 88, + "stragglerRank": 2, "correct": true, - "samplesPooled": null, - "trials": null - }, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d0599c0", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "colorKey": "mi355x_2fa43515", + "comparisonKey": "2796ed88af4b14b0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:40:45.756534+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi355x-amds_04", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 (norm)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "unknown", + "conformanceClass": "minimum-functional", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247575150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", + "createdAt": "2026-06-26T15:22:26Z", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 42.92000085115433, - "p90": 42.92000085115433, - "p99": 81.60100132226944 + "p50": 40.55999964475632, + "p90": 43.15999895334244, + "p95": 44.881001114845276, + "p99": 47.55999892950058 }, "combine": { - "p50": 31.76100179553032, - "p90": 31.76100179553032, - "p99": 35.92099994421005 + "p50": 16.119999811053276, + "p90": 18.719999119639397, + "p95": 19.840000197291374, + "p99": 22.520000115036964 }, - "serial": { - "p50": 74.60000365972519, - "p90": 74.60000365972519, - "p99": 113.08100074529648 + "roundtrip": { + "p50": 56.040000170469284, + "p90": 59.20000001788139, + "p95": 60.80099940299988, + "p99": 63.120998442173004 }, - "dispatchLogicalBytes": 2508800, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 175, + "isolatedSum": { + "p50": 56.67999945580959, + "p90": 61.879998072981834, + "p95": 64.72100131213665, + "p99": 70.07999904453754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 45.20000144839287, - "p90": 45.20000144839287, - "p99": 244.24199759960175 + "p50": 42.55999997258186, + "p90": 45.441001653671265, + "p95": 47.040000557899475, + "p99": 49.959998577833176 }, "combine": { - "p50": 39.48099911212921, - "p90": 39.48099911212921, - "p99": 50.519999116659164 + "p50": 16.16000011563301, + "p90": 18.360000103712082, + "p95": 19.600000232458115, + "p99": 22.63999916613102 }, - "serial": { - "p50": 84.84099805355072, - "p90": 84.84099805355072, - "p99": 126.40100717544556 + "roundtrip": { + "p50": 58.83999913930893, + "p90": 61.88099831342697, + "p95": 63.48100304603577, + "p99": 65.40100276470184 }, - "dispatchLogicalBytes": 5089280, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 355, + "isolatedSum": { + "p50": 58.720000088214874, + "p90": 63.80100175738335, + "p95": 66.64000079035759, + "p99": 72.5999977439642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 60.08100137114525, - "p90": 60.08100137114525, - "p99": 90.04099667072296 + "p50": 42.160000652074814, + "p90": 44.840000569820404, + "p95": 46.28000035881996, + "p99": 49.84100162982941 }, "combine": { - "p50": 52.60099843144417, - "p90": 52.60099843144417, - "p99": 56.481000036001205 + "p50": 19.039999693632126, + "p90": 22.1599992364645, + "p95": 23.48100021481514, + "p99": 54.63999882340431 }, - "serial": { - "p50": 111.08099669218063, - "p90": 111.08099669218063, - "p99": 145.28100192546844 + "roundtrip": { + "p50": 61.59999966621399, + "p90": 64.71999734640121, + "p95": 65.76000154018402, + "p99": 68.36000084877014 }, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 687, + "isolatedSum": { + "p50": 61.20000034570694, + "p90": 66.9999998062849, + "p95": 69.7610005736351, + "p99": 104.48100045323372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 88.76100182533264, - "p90": 88.76100182533264, - "p99": 166.8809950351715 + "p50": 42.52000153064728, + "p90": 45.1200008392334, + "p95": 46.080999076366425, + "p99": 48.8400012254715 }, "combine": { - "p50": 100.96099972724915, - "p90": 100.96099972724915, - "p99": 105.00100255012512 + "p50": 20.479999482631683, + "p90": 22.520000115036964, + "p95": 23.479999974370003, + "p99": 25.800000876188278 + }, + "roundtrip": { + "p50": 62.67999857664108, + "p90": 65.5599981546402, + "p95": 66.880002617836, + "p99": 68.56100261211395 }, - "serial": { - "p50": 191.68099761009216, - "p90": 191.68099761009216, - "p99": 227.44199633598328 + "isolatedSum": { + "p50": 63.00000101327896, + "p90": 67.64000095427036, + "p95": 69.56099905073643, + "p99": 74.64000210165977 }, - "dispatchLogicalBytes": 19826688, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 1383, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 145.76099812984467, - "p90": 145.76099812984467, - "p99": 398.40400218963623 + "p50": 42.67999902367592, + "p90": 45.27999833226204, + "p95": 46.799998730421066, + "p99": 49.720000475645065 }, "combine": { - "p50": 110.76100170612335, - "p90": 110.76100170612335, - "p99": 115.64099788665771 + "p50": 24.921000003814697, + "p90": 27.240000665187836, + "p95": 28.07999961078167, + "p99": 30.27999959886074 }, - "serial": { - "p50": 266.5629982948303, - "p90": 266.5629982948303, - "p99": 285.84301471710205 + "roundtrip": { + "p50": 67.9209977388382, + "p90": 71.04100286960602, + "p95": 72.12000340223312, + "p99": 74.08100366592407 }, - "dispatchLogicalBytes": 39438336, - "combineLogicalBytes": 0, - "fanoutMean": null, - "recvTokensMax": 2751, + "isolatedSum": { + "p50": 67.60099902749062, + "p90": 72.51999899744987, + "p95": 74.87999834120274, + "p99": 80.0000000745058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, "correct": true, - "samplesPooled": null, - "trials": null + "samplesPooled": 600, + "trials": 3 } ] } ], - "scannedRuns": 30, - "contributingRuns": 9, - "generatedAt": "2026-06-25T09:08:45.533Z" + "failures": [ + { + "id": "cxf-25e7e895", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:49:09.827299+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271594334", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", + "createdAt": "2026-06-26T23:47:39Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-6e691abd", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "generatedAt": "2026-06-26T17:32:59.549027+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28254359089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", + "createdAt": "2026-06-26T17:27:42Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + } + }, + { + "id": "cxf-433580a5", + "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:49:16.484836+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/runtime-visible", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271598000", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", + "createdAt": "2026-06-26T23:47:46Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-e15f2b54", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:51:34.222899+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271653486", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", + "createdAt": "2026-06-26T23:49:28Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-70961aef", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "generatedAt": "2026-06-26T17:31:08.227503+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28254435010", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", + "createdAt": "2026-06-26T17:29:12Z", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + } + }, + { + "id": "cxf-33a53f33", + "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:51:35.330044+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/runtime-visible", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271656517", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", + "createdAt": "2026-06-26T23:49:35Z", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-26d1baf4", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "generatedAt": "2026-06-26T15:40:45.756534+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "config": "bf16/normal/layout-and-dispatch", + "reason": "resource-nonconforming", + "returnCode": null, + "run": { + "id": "28247575150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", + "createdAt": "2026-06-26T15:22:26Z", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + } + } + ], + "scannedRuns": 234, + "scannedArtifacts": 255, + "contributingRuns": 234, + "generatedAt": "2026-06-27T00:54:19.552Z" } diff --git a/packages/app/scripts/generate-collectivex-data.ts b/packages/app/scripts/generate-collectivex-data.ts index 849e8374..fd139d1b 100644 --- a/packages/app/scripts/generate-collectivex-data.ts +++ b/packages/app/scripts/generate-collectivex-data.ts @@ -28,7 +28,7 @@ async function main() { await writeFile(outputPath, contents, 'utf8'); const rowCount = snapshot.series.reduce((total, series) => total + series.rows.length, 0); console.log( - `Wrote ${snapshot.series.length} CollectiveX series (${rowCount} rows) from ${snapshot.contributingRuns} runs to ${outputPath}`, + `Wrote ${snapshot.series.length} CollectiveX series (${rowCount} rows, ${snapshot.failures.length} failures) from ${snapshot.contributingRuns} runs to ${outputPath}`, ); } diff --git a/packages/app/src/components/collectivex/CollectiveXChart.tsx b/packages/app/src/components/collectivex/CollectiveXChart.tsx index 9d91d029..243da36a 100644 --- a/packages/app/src/components/collectivex/CollectiveXChart.tsx +++ b/packages/app/src/components/collectivex/CollectiveXChart.tsx @@ -26,6 +26,7 @@ interface CollectiveXChartProps { yAxis: CollectiveXYAxis; xScaleType: CollectiveXScale; yScaleType: CollectiveXScale; + prefillFloor?: number; compact?: boolean; caption?: React.ReactNode; legendElement?: React.ReactNode; @@ -35,7 +36,8 @@ interface CollectiveXChartProps { const OPERATION_LABELS: Record = { dispatch: 'Dispatch', combine: 'Combine', - serial: 'Serial (sum of isolated percentiles)', + roundtrip: 'Round trip (measured)', + 'isolated-sum': 'Isolated sum (Σp, not measured)', }; const X_AXIS_LABELS: Record = { @@ -101,14 +103,15 @@ export function CollectiveXChart({ yAxis, xScaleType, yScaleType, + prefillFloor = 128, compact = false, caption, legendElement, testId, }: CollectiveXChartProps) { const points = useMemo( - () => chartPoints(series, operation, percentile, xAxis, yAxis), - [series, operation, percentile, xAxis, yAxis], + () => chartPoints(series, operation, percentile, xAxis, yAxis, prefillFloor), + [series, operation, percentile, xAxis, yAxis, prefillFloor], ); const seriesById = useMemo(() => new Map(series.map((item) => [item.id, item])), [series]); const lines = useMemo(() => { @@ -138,6 +141,10 @@ export function CollectiveXChart({ ), [points, yScaleType], ); + const xTickValues = useMemo( + () => [...new Set(points.map((point) => point.x))].toSorted((a, b) => a - b), + [points], + ); const noDataOverlay = points.length === 0 ? ( @@ -173,6 +180,7 @@ export function CollectiveXChart({ xAxis={{ label: `${X_AXIS_LABELS[xAxis]}${xScaleType === 'log' ? ' (log2)' : ''}`, tickCount: compact ? 5 : 8, + tickValues: xTickValues, tickFormat: (value) => formatTokenCount(Number(value)), }} yAxis={{ @@ -228,17 +236,26 @@ export function CollectiveXChart({ const color = colors[point.colorKey] ?? '#888'; const row = point.row; const runLabel = point.series.run.id ? `Run ${point.series.run.id}` : 'Run unavailable'; + const roundtripLabel = row.roundtripMeasured + ? 'measured' + : 'legacy isolated-sum fallback'; + const workload = + point.series.workloadId ?? point.series.traceSignature ?? 'not canonical'; + const sourceSha = point.series.run.sha?.slice(0, 10) ?? 'unknown'; + const imageDigest = point.series.imageDigest?.slice(0, 19) ?? 'unknown'; return `
${isPinned ? '
Click elsewhere to dismiss
' : ''}
${escapeHtml(point.seriesLabel)}
-
${escapeHtml(OPERATION_LABELS[operation])} ${percentile}: ${formatMetric(point.y, yAxis)}
+
${escapeHtml(OPERATION_LABELS[operation])} ${percentile}: ${formatMetric(point.y, yAxis)} · ${escapeHtml(point.series.publicationStatus)}
${row.tokensPerRank} tokens/rank · ${row.globalTokens} global tokens
Dispatch p50/p90/p99: ${row.dispatch.p50.toFixed(1)} / ${row.dispatch.p90.toFixed(1)} / ${row.dispatch.p99.toFixed(1)} µs
Combine p50/p90/p99: ${row.combine.p50.toFixed(1)} / ${row.combine.p90.toFixed(1)} / ${row.combine.p99.toFixed(1)} µs
-
Fan-out: ${row.fanoutMean?.toFixed(2) ?? 'n/a'} · recv max: ${row.recvTokensMax ?? 'n/a'} · correctness: ${row.correct ? 'pass' : 'fail'}
-
${escapeHtml(point.series.suite)} · ${escapeHtml(point.series.topologyClass)}
-
${escapeHtml(runLabel)} · ${escapeHtml(point.series.measurementContract)}
- ${row.stitchedFromDecode ? '
Decode-range point stitched into the prefill curve
' : ''} +
Round trip p50/p90/p99: ${row.roundtrip.p50.toFixed(1)} / ${row.roundtrip.p90.toFixed(1)} / ${row.roundtrip.p99.toFixed(1)} µs (${roundtripLabel})
+
Fan-out: ${row.fanoutMean?.toFixed(2) ?? 'n/a'} · recv max: ${row.recvTokensMax ?? 'n/a'}${row.stragglerRank === null ? '' : ` · straggler: r${row.stragglerRank}`} · correctness: ${row.correct ? 'pass' : 'fail'}
+
${escapeHtml(point.series.measurementContract)} · ${escapeHtml(point.series.suite)} · ${escapeHtml(point.series.topologyClass)}
+
dispatch=${escapeHtml(point.series.shape.dispatchDtype)} · combine=${escapeHtml(point.series.shape.combineQuantMode)} · activation=${escapeHtml(point.series.shape.activationProfile)}
+
workload=${escapeHtml(workload)} · source=${escapeHtml(sourceSha)}
+
${escapeHtml(runLabel)} · image=${escapeHtml(imageDigest)}
`; }, getRulerX: (point, scale) => diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx index b3d04b49..94c4bed0 100644 --- a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx +++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx @@ -3,9 +3,6 @@ import { ExternalLink, Loader2, RefreshCw } from 'lucide-react'; import { useCallback, useEffect, useMemo, useState } from 'react'; -import { track } from '@/lib/analytics'; -import { useCollectiveX } from '@/hooks/api/use-collectivex'; -import { useThemeColors } from '@/hooks/useThemeColors'; import { Button } from '@/components/ui/button'; import { Card } from '@/components/ui/card'; import ChartLegend from '@/components/ui/chart-legend'; @@ -18,13 +15,25 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select'; +import { useCollectiveX } from '@/hooks/api/use-collectivex'; +import { useThemeColors } from '@/hooks/useThemeColors'; +import { track } from '@/lib/analytics'; +import { getModelSortIndex } from '@/lib/constants'; import { CollectiveXChart } from './CollectiveXChart'; -import { comparisonDifferences, stitchCollectiveXPrefillSeries } from './data'; +import { CollectiveXHeatmap } from './CollectiveXHeatmap'; +import { CollectiveXScaling } from './CollectiveXScaling'; +import { + CollectiveXCoverageTable, + CollectiveXFailureTable, + CollectiveXSensitivityTable, +} from './CollectiveXTables'; +import { collectiveXPrefillFloor, comparisonDifferences, publicationMatches } from './data'; import type { CollectiveXOperation, CollectiveXPercentile, CollectiveXPhase, + CollectiveXPublicationFilter, CollectiveXScale, CollectiveXSeries, CollectiveXSuite, @@ -35,9 +44,12 @@ import type { const OPERATION_OPTIONS: SegmentedToggleOption[] = [ { value: 'dispatch', label: 'Dispatch' }, { value: 'combine', label: 'Combine' }, - { value: 'serial', label: 'Serial' }, + { value: 'roundtrip', label: 'Round trip' }, + { value: 'isolated-sum', label: 'Isolated sum' }, ]; +const OVERVIEW_OPERATIONS: CollectiveXOperation[] = ['dispatch', 'combine', 'roundtrip']; + const PHASE_OPTIONS: SegmentedToggleOption[] = [ { value: 'decode', label: 'Decode' }, { value: 'prefill', label: 'Prefill' }, @@ -55,6 +67,12 @@ const SUITE_OPTIONS: SegmentedToggleOption[] = [ { value: 'resource-constrained', label: 'Resource constrained' }, ]; +const PUBLICATION_OPTIONS: SegmentedToggleOption[] = [ + { value: 'publishable', label: 'Publishable' }, + { value: 'official', label: 'Official only' }, + { value: 'all', label: 'All' }, +]; + const SCALE_OPTIONS: SegmentedToggleOption[] = [ { value: 'log', label: 'Log' }, { value: 'linear', label: 'Linear' }, @@ -63,7 +81,8 @@ const SCALE_OPTIONS: SegmentedToggleOption[] = [ const OPERATION_LABELS: Record = { dispatch: 'Dispatch', combine: 'Combine', - serial: 'Serial (sum of isolated percentiles)', + roundtrip: 'Round trip (measured)', + 'isolated-sum': 'Isolated sum (Σp, not measured)', }; const Y_AXIS_LABELS: Record = { @@ -112,7 +131,7 @@ function InlineLegend({ ...new Map(series.map((item) => [`${item.colorKey}|${item.label}`, item])).values(), ]; return ( -
+
{entries.map((item) => ( ('dispatch'); const [phase, setPhase] = useState('decode'); const [percentile, setPercentile] = useState('p50'); - const [suite, setSuite] = useState('all'); + const [suite, setSuite] = useState('backend-default'); + const [routing, setRouting] = useState('uniform'); + const [publication, setPublication] = useState('publishable'); const [xAxis, setXAxis] = useState('tokens-per-rank'); const [yAxis, setYAxis] = useState('latency'); const [xScaleType, setXScaleType] = useState('log'); @@ -167,29 +188,72 @@ export default function CollectiveXDisplay() { const [isLegendExpanded, setIsLegendExpanded] = useState(true); const [highContrast, setHighContrast] = useState(false); - const series = useMemo(() => stitchCollectiveXPrefillSeries(data?.series ?? []), [data?.series]); + const series = data?.series ?? []; + const prefillFloor = useMemo(() => collectiveXPrefillFloor(series), [series]); useEffect(() => { - if (series.length > 0) { - setActiveSeriesIds(new Set(series.map((item) => item.id))); - } + if (series.length > 0) setActiveSeriesIds(new Set(series.map((item) => item.id))); }, [series]); - const suiteSeries = useMemo( - () => series.filter((item) => suite === 'all' || item.suite === suite), - [series, suite], + const routingOptions = useMemo(() => { + const values = [...new Set(series.map((item) => item.shape.routingLabel))].toSorted((a, b) => { + if (a === 'uniform') return -1; + if (b === 'uniform') return 1; + return a.localeCompare(b); + }); + return ['all', ...values]; + }, [series]); + + useEffect(() => { + if (series.length === 0) return; + if (routingOptions.includes(routing)) return; + setRouting(routingOptions.includes('uniform') ? 'uniform' : 'all'); + }, [routing, routingOptions, series.length]); + + const filteredSeries = useMemo( + () => + series.filter( + (item) => + (suite === 'all' || item.suite === suite) && + (routing === 'all' || item.shape.routingLabel === routing) && + publicationMatches(item, publication), + ), + [publication, routing, series, suite], ); const phaseSeries = useMemo( - () => suiteSeries.filter((item) => item.phase === phase), - [suiteSeries, phase], + () => filteredSeries.filter((item) => item.phase === phase), + [filteredSeries, phase], ); const activePhaseSeries = useMemo( () => phaseSeries.filter((item) => activeSeriesIds.has(item.id)), - [phaseSeries, activeSeriesIds], + [activeSeriesIds, phaseSeries], + ); + const heatmapSeries = useMemo( + () => + series.filter( + (item) => + item.phase === phase && + (suite === 'all' || item.suite === suite) && + publicationMatches(item, publication), + ), + [phase, publication, series, suite], + ); + const scalingColorSeries = useMemo( + () => + series.filter( + (item) => + item.shape.routing === 'uniform' && + !item.shape.eplbEnabled && + item.mode === 'normal' && + item.measurementContract === 'layout-and-dispatch-v1' && + item.suite === 'backend-default' && + publicationMatches(item, 'publishable'), + ), + [series], ); const colorKeys = useMemo( - () => [...new Set(suiteSeries.map((item) => item.colorKey))], - [suiteSeries], + () => [...new Set([...filteredSeries, ...scalingColorSeries].map((item) => item.colorKey))], + [filteredSeries, scalingColorSeries], ); const { resolveColor, getCssColor } = useThemeColors({ highContrast, @@ -216,40 +280,34 @@ export default function CollectiveXDisplay() { () => comparisonDifferences(activePhaseSeries), [activePhaseSeries], ); - const runs = useMemo(() => uniqueRuns(data?.series ?? []), [data?.series]); + const runs = useMemo(() => uniqueRuns(series), [series]); const hardwareCount = new Set(activePhaseSeries.map((item) => item.sku)).size; - const availablePhases = useMemo( - () => - PHASE_OPTIONS.map((option) => option.value).filter((candidate) => - suiteSeries.some((item) => item.phase === candidate), - ), - [suiteSeries], - ); - const overviewGroups = useMemo( - () => - availablePhases.flatMap((overviewPhase) => { - const phaseCandidates = suiteSeries.filter((item) => item.phase === overviewPhase); - const epSizes = [...new Set(phaseCandidates.map((item) => item.epSize))].toSorted( - (a, b) => { - if (a === b) return 0; - if (a === null) return 1; - if (b === null) return -1; - return a - b; - }, - ); - - return epSizes.map((epSize) => ({ - phase: overviewPhase, - epSize, - series: phaseCandidates.filter( - (item) => item.epSize === epSize && activeSeriesIds.has(item.id), - ), - })); - }), - [activeSeriesIds, availablePhases, suiteSeries], - ); + const overviewGroups = useMemo(() => { + const availablePhases = PHASE_OPTIONS.map((option) => option.value).filter((candidate) => + filteredSeries.some((item) => item.phase === candidate), + ); + return availablePhases.flatMap((overviewPhase) => { + const phaseCandidates = filteredSeries.filter((item) => item.phase === overviewPhase); + const epSizes = [...new Set(phaseCandidates.map((item) => item.epSize))].toSorted((a, b) => { + if (a === b) return 0; + if (a === null) return 1; + if (b === null) return -1; + return a - b; + }); + return epSizes.map((epSize) => ({ + phase: overviewPhase, + epSize, + series: phaseCandidates.filter( + (item) => item.epSize === epSize && activeSeriesIds.has(item.id), + ), + })); + }); + }, [activeSeriesIds, filteredSeries]); const hasLegacyP90Fallback = percentile === 'p90' && activePhaseSeries.some((item) => item.schemaVersion < 3); + const hasUnmeasuredRoundtrip = + operation === 'roundtrip' && + activePhaseSeries.some((item) => item.rows.some((row) => !row.roundtripMeasured)); const routingIdentityProven = series.length > 0 && series.every((item) => item.routingConsistent === true); const pooledSamples = [ @@ -261,18 +319,29 @@ export default function CollectiveXDisplay() { ), ), ].toSorted((a, b) => a - b); + const placementCount = new Set(heatmapSeries.map((item) => item.placement.kind)).size; + const eplbExample = series.find( + (item) => item.eplbImbalanceBefore !== null && item.eplbImbalanceAfter !== null, + ); const legendItems = useMemo( () => - phaseSeries.map((item) => ({ - name: item.id, - label: item.label, - color: colors[item.colorKey] ?? 'var(--muted-foreground)', - isActive: activeSeriesIds.has(item.id), - title: `${item.topologyClass} · ${item.measurementContract}`, - onClick: () => toggleSeries(item.id), - })), - [phaseSeries, colors, activeSeriesIds, toggleSeries], + phaseSeries + .toSorted( + (a, b) => + getModelSortIndex(a.sku) - getModelSortIndex(b.sku) || + a.label.localeCompare(b.label) || + a.identity.localeCompare(b.identity), + ) + .map((item) => ({ + name: item.id, + label: item.label, + color: colors[item.colorKey] ?? 'var(--muted-foreground)', + isActive: activeSeriesIds.has(item.id), + title: `${item.publicationStatus} · ${item.shape.routingLabel} · ${item.topologyClass} · ${item.measurementContract}`, + onClick: () => toggleSeries(item.id), + })), + [activeSeriesIds, colors, phaseSeries, toggleSeries], ); const handleRefresh = useCallback(() => { @@ -316,14 +385,14 @@ export default function CollectiveXDisplay() {

- Cross-vendor MoE expert-parallel dispatch and combine benchmarks, rendered from a - generated snapshot of successful GitHub Actions artifacts produced by the{' '} + Cross-vendor MoE expert-parallel dispatch, combine, and independently measured + round-trip benchmarks from the{' '} collectivex branch.

- These are experimental communication microbenchmarks, not official serving results. - Hardware topology, routing, EP degree, resource budget, and timing contract remain - part of every line's identity. + Uniform routing is the controlled cross-hardware headline. Skewed routing, EPLB, + activation profiles, resource budgets, topology, EP degree, and timing contract are + sensitivity dimensions and remain part of each line's identity.

@@ -368,14 +437,22 @@ export default function CollectiveXDisplay() {
-
+

{activePhaseSeries.length}

Visible configurations

{hardwareCount}

-

Hardware platforms

+

Visible hardware

+
+
+

{series.length}

+

Retained sweeps

+
+
+

{data.failures.length}

+

Quarantined cases

{data.contributingRuns}

@@ -389,7 +466,7 @@ export default function CollectiveXDisplay() { -
+
@@ -439,6 +517,39 @@ export default function CollectiveXDisplay() { className="flex-wrap" /> + + + + + { + setPublication(value); + track('collectivex_publication_changed', { publication: value }); + }} + ariaLabel="CollectiveX publication status" + testId="collectivex-publication-toggle" + className="flex-wrap" + /> + { - setRouting(value); - track('collectivex_routing_changed', { routing: value }); - }} - > - - - - - {routingOptions.map((value) => ( - - {value === 'all' ? 'All' : value} - - ))} - - - - - { - setPublication(value); - track('collectivex_publication_changed', { publication: value }); - }} - ariaLabel="CollectiveX publication status" - testId="collectivex-publication-toggle" - className="flex-wrap" - /> - - - - - - - - - { - setXScaleType(value); - track('collectivex_x_scale_changed', { scale: value }); - }} - ariaLabel="CollectiveX x scale" - testId="collectivex-x-scale-toggle" - /> - - - { - setYScaleType(value); - track('collectivex_y_scale_changed', { scale: value }); - }} - ariaLabel="CollectiveX y scale" - testId="collectivex-y-scale-toggle" - /> - +
+

+ Chart +

+
+
+ + { + setOperation(value); + track('collectivex_operation_changed', { operation: value }); + }} + ariaLabel="CollectiveX operation" + testId="collectivex-operation-toggle" + className="flex-wrap" + /> + +
+ + { + setPhase(value); + track('collectivex_phase_changed', { phase: value }); + }} + ariaLabel="CollectiveX phase" + testId="collectivex-phase-toggle" + /> + + + { + setPercentile(value); + track('collectivex_percentile_changed', { percentile: value }); + }} + ariaLabel="CollectiveX percentile" + testId="collectivex-percentile-toggle" + /> + + + + + + + + + { + setXScaleType(value); + track('collectivex_x_scale_changed', { scale: value }); + }} + ariaLabel="CollectiveX x scale" + testId="collectivex-x-scale-toggle" + /> + + + { + setYScaleType(value); + track('collectivex_y_scale_changed', { scale: value }); + }} + ariaLabel="CollectiveX y scale" + testId="collectivex-y-scale-toggle" + /> + +
+
+ +
+

+ Filters +

+

+ Precision is the dispatch dtype. Activation profile is an independent benchmark + dimension. +

+
+
+ + { + setPrecision(value); + track('collectivex_precision_changed', { precision: value }); + }} + ariaLabel="CollectiveX dispatch precision" + testId="collectivex-precision-toggle" + className="flex w-full border-brand/40 bg-background/70 p-1" + buttonClassName="min-h-8 flex-1 justify-center px-3" + activeButtonClassName="bg-brand text-primary-foreground shadow-sm" + inactiveButtonClassName="text-muted-foreground hover:bg-brand/10 hover:text-foreground" + /> + +
+
+ + + +
+ + + + + { + setSuite(value); + track('collectivex_suite_changed', { suite: value }); + }} + ariaLabel="CollectiveX comparison suite" + testId="collectivex-suite-toggle" + className="flex-wrap" + /> + + + { + setPublication(value); + track('collectivex_publication_changed', { publication: value }); + }} + ariaLabel="CollectiveX publication status" + testId="collectivex-publication-toggle" + className="flex-wrap" + /> + +
@@ -671,6 +783,7 @@ export default function CollectiveXDisplay() {

{OPERATION_LABELS[operation]} · {phase} · {percentile} {precision === 'all' ? '' : ` · ${precision.toUpperCase()}`} + {activation === 'all' ? '' : ` · ${formatActivation(activation)} activation`} {routing === 'all' ? '' : ` · ${routing}`}

@@ -785,7 +898,8 @@ export default function CollectiveXDisplay() { {overviewGroups.length === 0 ? (

- No latency panels match the current suite, routing, and publication filters. + No latency panels match the current precision, activation, suite, routing, and + publication filters.

) : ( @@ -838,11 +952,12 @@ export default function CollectiveXDisplay() {

Scaling

Strong and weak scaling are distinct experiments with separately labeled fixed-work - contracts. They appear once a SKU has matched measurements at two EP degrees. + contracts. Precision and activation filters apply; a chart appears once a SKU has + matched measurements at two EP degrees.

- - + +
@@ -850,8 +965,8 @@ export default function CollectiveXDisplay() {

Heatmaps

Dispatch p50 across EP, routing, and resource dimensions for the current phase, - precision, suite, and publication filters. The routing selector is intentionally not - applied here. + precision, activation, suite, and publication filters. The routing selector is + intentionally not applied here.

@@ -884,7 +999,7 @@ export default function CollectiveXDisplay() {
- + From 72b55d40828eb11dd10be44565eeaaebe8df5349 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Mon, 29 Jun 2026 13:21:49 +0800 Subject: [PATCH 06/23] feat: align CollectiveX with v3 report --- packages/app/cypress/e2e/collectivex.cy.ts | 279 +- .../app/cypress/fixtures/api/collectivex.json | 5661 +- packages/app/public/data/collectivex.json | 138860 ++++++++++----- .../collectivex/CollectiveXDecision.tsx | 195 + .../collectivex/CollectiveXDisplay.tsx | 890 +- .../collectivex/CollectiveXSizeChart.tsx | 341 + .../collectivex/CollectiveXSizePanel.tsx | 201 + .../src/components/collectivex/data.test.ts | 119 + .../app/src/components/collectivex/data.ts | 992 + .../app/src/components/collectivex/types.ts | 118 +- .../app/src/lib/collectivex-snapshot.test.ts | 48 +- packages/app/src/lib/collectivex-snapshot.ts | 100 +- 12 files changed, 104887 insertions(+), 42917 deletions(-) create mode 100644 packages/app/src/components/collectivex/CollectiveXDecision.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXSizeChart.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXSizePanel.tsx diff --git a/packages/app/cypress/e2e/collectivex.cy.ts b/packages/app/cypress/e2e/collectivex.cy.ts index 82c2abe0..81218518 100644 --- a/packages/app/cypress/e2e/collectivex.cy.ts +++ b/packages/app/cypress/e2e/collectivex.cy.ts @@ -24,57 +24,45 @@ describe('CollectiveX', () => { cy.wait('@collectivexData'); }); - it('renders the updated artifact-backed report hierarchy', () => { + it('renders the v3 report hierarchy with headline defaults', () => { cy.get('[data-testid="collectivex-display"]') .should('contain.text', 'CollectiveX') .and('contain.text', 'Retained sweeps'); - cy.get('[data-testid="collectivex-explorer-chart"] svg').should('be.visible'); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); - cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 21); - xTickLabels().should('deep.equal', ['1', '2', '4', '8', '16', '32', '64', '128']); - cy.get('[data-testid="collectivex-comparison-warning"]') - .should('contain.text', 'Not directly comparable') - .and('contain.text', 'source SHA'); - cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 6); - cy.get('[data-testid="collectivex-overview-decode-ep8"]').should('exist'); - cy.get('[data-testid="collectivex-overview-prefill-ep8"]').should('exist'); - cy.get('[data-testid^="collectivex-heatmap-"]').should('have.length.at.least', 3); - cy.get('[data-testid="collectivex-sensitivity-table"]').should('contain.text', 'zipf'); - cy.get('[data-testid="collectivex-failures-table"]').should( - 'contain.text', - 'roundtrip_gt_isolated_sum', + cy.contains('[role="tab"]', 'EP dispatch / combine').should( + 'have.attr', + 'aria-selected', + 'true', ); - cy.get('[data-testid="collectivex-coverage-table"]').should('contain.text', 'official'); - cy.get('[data-testid="collectivex-scaling-weak"]').should( - 'contain.text', - 'two or more EP degrees', - ); - }); + cy.contains('[role="tab"]', 'Decision').should('be.visible'); + cy.contains('[role="tab"]', 'All-reduce').should('be.visible'); + cy.contains('[role="tab"]', 'KV-cache transfer').should('be.visible'); - it('uses evenly spaced measured powers of two on the log2 token axis', () => { - cy.get('[data-testid="collectivex-explorer-chart"] .x-axis .tick').then(($ticks) => { - const positions = $ticks.toArray().map((tick) => { - const transform = tick.getAttribute('transform') ?? ''; - const match = /translate\((?[-\d.]+)/u.exec(transform); - return Number(match?.groups?.x); - }); - const gaps = positions.slice(1).map((position, index) => position - positions[index]); - expect(Math.max(...gaps) - Math.min(...gaps)).to.be.lessThan(0.5); - }); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'Round trip (measured) · decode · p99') + .and('contain.text', 'DeepSeek-V3/V4') + .and('contain.text', 'BF16') + .and('contain.text', 'EP8'); + cy.get('[data-testid="collectivex-explorer-chart"] svg').should('be.visible'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); + cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 20); + xTickLabels().should('include.members', ['1', '8', '128']); }); - it('keeps log Y axes sparse in the explorer and overview panels', () => { + it('keeps log axes sparse and byte/tokens axes readable', () => { cy.get('[data-testid="collectivex-explorer-chart"] .y-axis .tick').should(($ticks) => { - expect($ticks.length).to.be.greaterThan(1); + expect($ticks.length).to.be.greaterThan(0); expect($ticks.length).to.be.at.most(5); }); - cy.get('[data-testid^="collectivex-overview-chart-"] .y-axis').each(($axis) => { - expect($axis.find('.tick').length).to.be.at.most(4); + + cy.contains('[role="tab"]', 'All-reduce').click(); + cy.get('[data-testid="collectivex-all-reduce-chart"] svg').should('be.visible'); + cy.get('[data-testid="collectivex-all-reduce-chart"] .x-axis .tick').should(($ticks) => { + expect($ticks.length).to.be.at.most(10); }); }); - it('exposes the new operation, routing, publication, and axis controls', () => { + it('exposes model, precision, activation, EP, publication, and axis controls', () => { expectToggleOptions('collectivex-operation-toggle', [ 'Dispatch', 'Combine', @@ -82,195 +70,102 @@ describe('CollectiveX', () => { 'Isolated sum', ]); expectToggleOptions('collectivex-phase-toggle', ['Decode', 'Prefill']); - cy.get('[data-testid="collectivex-filter-controls"]') - .should('be.visible') - .and('contain.text', 'Precision (dispatch dtype)') - .and('contain.text', 'Activation profile'); - expectToggleOptions('collectivex-precision-toggle', ['All', 'BF16', 'FP8']); - cy.get('[data-testid="collectivex-precision-toggle"]').should('be.visible'); expectToggleOptions('collectivex-percentile-toggle', ['p50', 'p90', 'p99']); expectToggleOptions('collectivex-suite-toggle', [ 'All', 'Backend default', 'Resource constrained', ]); - expectToggleOptions('collectivex-publication-toggle', ['Publishable', 'Official only', 'All']); + expectToggleOptions('collectivex-publication-toggle', [ + 'Official headline', + 'Publishable', + 'Official only', + 'All', + ]); + expectToggleOptions('collectivex-precision-toggle', ['All', 'BF16', 'FP8']); + expectToggleOptions('collectivex-ep-toggle', ['All', 'EP8']); expectToggleOptions('collectivex-x-scale-toggle', ['Log', 'Linear']); expectToggleOptions('collectivex-y-scale-toggle', ['Log', 'Linear']); + cy.get('[data-testid="collectivex-model-shape-select"]').should( + 'contain.text', + 'DeepSeek-V3/V4', + ); cy.get('[data-testid="collectivex-activation-select"]').click(); cy.get('[role="option"]').then(($options) => { - expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ + expect($options.toArray().map((option) => option.textContent?.trim())).to.include.members([ 'All', 'Normal', ]); }); cy.contains('[role="option"]', 'Normal').click(); cy.get('[data-testid="collectivex-activation-select"]').should('contain.text', 'Normal'); - - cy.get('[data-testid="collectivex-routing-select"]').click(); - cy.get('[role="option"]').then(($options) => { - expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ - 'All', - 'uniform', - 'zipf', - 'zipf+eplb', - ]); - }); - cy.contains('[role="option"]', 'zipf').click(); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'zipf'); - - cy.get('[data-testid="collectivex-operation-toggle"]').contains('button', 'Round trip').click(); - cy.get('[data-testid="collectivex-main-chart"]').should( - 'contain.text', - 'Round trip (measured)', - ); - cy.get('[data-testid="collectivex-operation-toggle"]') - .contains('button', 'Isolated sum') - .click(); - cy.get('[data-testid="collectivex-main-chart"]') - .should('contain.text', 'Isolated sum (Σp, not measured)') - .and('contain.text', 'not a measured chained operation'); - - cy.get('[data-testid="collectivex-percentile-toggle"]').contains('button', 'p99').click(); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'decode · p99'); - - cy.get('[data-testid="collectivex-x-axis-select"]').click(); - cy.contains('[role="option"]', 'Global source tokens').click(); - cy.get('[data-testid="collectivex-x-axis-select"]').should( - 'contain.text', - 'Global source tokens', - ); - - cy.get('[data-testid="collectivex-y-axis-select"]').click(); - cy.contains('[role="option"]', 'Logical routed payload rate').click(); - cy.get('[data-testid="collectivex-main-chart"]') - .should('contain.text', 'Logical payload rate versus') - .and('contain.text', 'not wire'); }); - it('filters the explorer and overview by dispatch precision', () => { - cy.fixture('api/collectivex.json').then((snapshot) => { - const source = snapshot.series[0]; - const fp8Series = { - ...source, - id: 'cx-fp8-test', - identity: `${source.identity}|fp8-test`, - colorKey: 'h100_fp8_test', - label: 'H100 EP8 · deepep · fp8', - shape: { - ...source.shape, - dispatchDtype: 'fp8', - }, - }; - cy.intercept('GET', '/data/collectivex.json', { - ...snapshot, - series: [...snapshot.series, fp8Series], - }).as('collectivexPrecisionData'); - cy.visit('/collectivex'); - cy.wait('@collectivexPrecisionData'); - }); - - expectToggleOptions('collectivex-precision-toggle', ['All', 'BF16', 'FP8']); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 4); - + it('filters the EP explorer by precision, activation, routing, and phase', () => { cy.get('[data-testid="collectivex-precision-toggle"]').contains('button', 'FP8').click(); cy.get('[data-testid="collectivex-main-chart"]') - .should('contain.text', 'decode · p50 · FP8') - .and('contain.text', 'H100 EP8 · deepep · fp8'); + .should('contain.text', 'FP8') + .and('contain.text', 'B300 EP8 · deepep · fp8'); cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); - cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 8); - cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 3); cy.get('[data-testid="collectivex-precision-toggle"]').contains('button', 'BF16').click(); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); - cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 6); - }); - - it('filters charts and report summaries by activation profile', () => { - cy.fixture('api/collectivex.json').then((snapshot) => { - const source = snapshot.series[0]; - const zerosSeries = { - ...source, - id: 'cx-zeros-test', - identity: `${source.identity}|zeros-test`, - colorKey: 'h100_zeros_test', - label: source.label, - shape: { - ...source.shape, - activationProfile: 'zeros', - }, - }; - cy.intercept('GET', '/data/collectivex.json', { - ...snapshot, - series: [...snapshot.series, zerosSeries], - }).as('collectivexActivationData'); - cy.visit('/collectivex'); - cy.wait('@collectivexActivationData'); - }); - - cy.get('[data-testid="collectivex-activation-select"]').click(); - cy.get('[role="option"]').then(($options) => { - expect($options.toArray().map((option) => option.textContent?.trim())).to.deep.equal([ - 'All', - 'Normal', - 'Zeros', - ]); - }); - cy.contains('[role="option"]', 'Zeros').click(); - - cy.get('[data-testid="collectivex-main-chart"]') - .should('contain.text', 'Zeros activation') - .and('contain.text', 'H100 EP8 · deepep · bf16 · Zeros activation'); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); - cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 8); - cy.get('[data-testid^="collectivex-overview-chart-"]').should('have.length', 3); - cy.get('[data-testid="collectivex-sensitivity"]').should( - 'contain.text', - 'No data available for the current filters.', - ); - cy.get('[data-testid="collectivex-scaling-weak"]').should( - 'contain.text', - 'two or more EP degrees', + cy.get('[data-testid="collectivex-routing-select"]').click(); + cy.contains('[role="option"]', 'zipf').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'zipf'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should( + 'have.length.at.least', + 1, ); - cy.get('[data-testid="collectivex-activation-select"]').click(); - cy.contains('[role="option"]', 'All').click(); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 4); - }); - - it('keeps decode observations out of the prefill panel', () => { + cy.get('[data-testid="collectivex-routing-select"]').click(); + cy.contains('[role="option"]', 'uniform').click(); cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'Dispatch · prefill'); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'prefill'); cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); - cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 12); - xTickLabels().should('deep.equal', ['128', '256', '512', '1,024', '2,048', '4,096']); - cy.get('[data-testid="collectivex-main-chart"]').should( - 'not.contain.text', - 'stitched into the prefill curve', - ); + xTickLabels().should('include.members', ['128', '256']); }); - it('publication filtering keeps diagnostic data quarantined by default', () => { - cy.get('[data-testid="collectivex-suite-toggle"]') - .contains('button', 'Resource constrained') - .click(); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); - - cy.get('[data-testid="collectivex-publication-toggle"]').contains('button', 'All').click(); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); - cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'MI355X'); + it('renders Decision and Evidence tabs from the static snapshot', () => { + cy.contains('[role="tab"]', 'Decision').click(); + cy.get('[data-testid="collectivex-decision"]') + .should('contain.text', 'Best backend') + .and('contain.text', 'Max tokens under round-trip p99 budget'); + cy.get('[data-testid="collectivex-summary-card"]').should('have.length', 7); + cy.get('[data-testid="collectivex-budget-table"]').should('contain.text', '<= 100 us'); + + cy.contains('[role="tab"]', 'Evidence').click(); + cy.get('[data-testid="collectivex-sensitivity-table"]').should('exist'); + cy.get('[data-testid="collectivex-failures-table"]').should('contain.text', 'diagnostic'); + cy.get('[data-testid="collectivex-coverage-table"]').should('contain.text', 'official'); + cy.get('section[data-testid="collectivex-display"]').should('contain.text', 'Provenance'); + }); - cy.get('[data-testid="collectivex-publication-toggle"]') - .contains('button', 'Publishable') - .click(); - cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); + it('renders all new collective and transfer family tabs', () => { + const familyTabs = [ + ['All-reduce', 'collectivex-all-reduce-chart', 'collectivex-all-reduce-metric-toggle'], + ['All-gather', 'collectivex-all-gather-chart', 'collectivex-all-gather-metric-toggle'], + ['CPU-GPU offload', 'collectivex-offload-chart', 'collectivex-offload-metric-toggle'], + ['KV-cache transfer', 'collectivex-kv-cache-chart', 'collectivex-kv-cache-metric-toggle'], + [ + 'Copy-engine / SDMA', + 'collectivex-copy-engine-chart', + 'collectivex-copy-engine-metric-toggle', + ], + ['RL mesh', 'collectivex-rl-mesh-chart', 'collectivex-rl-mesh-metric-toggle'], + ]; + + for (const [label, chartTestId, metricTestId] of familyTabs) { + cy.contains('[role="tab"]', label).click(); + cy.get(`[data-testid="${metricTestId}"]`).should('be.visible'); + cy.get(`[data-testid="${chartTestId}"] svg`).should('be.visible'); + cy.get(`[data-testid="${chartTestId}"] .line-path`).should('have.length.at.least', 1); + } }); - it('legend toggles remove and restore a rendered series', () => { + it('legend toggles remove and restore a rendered EP series', () => { cy.get('[data-testid="collectivex-main-chart"]').within(() => { - cy.contains('label', 'H100 EP8 · deepep · bf16').click(); + cy.contains('label', 'B300 EP8 · deepep · bf16').click(); }); cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); cy.get('[data-testid="collectivex-main-chart"]').contains('button', 'Reset filter').click(); diff --git a/packages/app/cypress/fixtures/api/collectivex.json b/packages/app/cypress/fixtures/api/collectivex.json index 95180f8b..5dd8acb7 100644 --- a/packages/app/cypress/fixtures/api/collectivex.json +++ b/packages/app/cypress/fixtures/api/collectivex.json @@ -1,29 +1,30 @@ { - "snapshotVersion": 2, + "snapshotVersion": 3, "series": [ { - "id": "cx-7a284f4e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_42947950", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-f0dd83d8", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_c1ad910f", + "comparisonKey": "80e2eefb7447672f", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:32.113885+00:00", + "generatedAt": "2026-06-26T17:41:08.828331+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -38,14 +39,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -63,45 +64,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271543513", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271543513", - "createdAt": "2026-06-26T23:46:04Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:41:08.828331+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 71.00799679756165, - "p90": 100.67199915647507, - "p95": 101.6319990158081, - "p99": 103.74400019645691 + "p50": 56.992001831531525, + "p90": 59.039998799562454, + "p95": 61.824001371860504, + "p99": 73.44000041484833 }, "combine": { - "p50": 73.34399968385696, - "p90": 81.79199695587158, - "p95": 117.47200042009354, - "p99": 304.4799864292145 + "p50": 66.3359984755516, + "p90": 67.4239993095398, + "p95": 68.15999746322632, + "p99": 77.47200131416321 }, "roundtrip": { - "p50": 126.52799487113953, - "p90": 130.3360015153885, - "p95": 131.84000551700592, - "p99": 137.95199990272522 + "p50": 106.81600123643875, + "p90": 113.08799684047699, + "p95": 114.23999816179276, + "p99": 135.6479972600937 }, "isolatedSum": { - "p50": 144.3519964814186, - "p90": 182.46399611234665, - "p95": 219.10399943590164, - "p99": 408.2239866256714 + "p50": 123.32800030708313, + "p90": 126.46399810910225, + "p95": 129.98399883508682, + "p99": 150.91200172901154 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -110,35 +111,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 68.57600063085556, - "p90": 76.31999999284744, - "p95": 79.13599908351898, - "p99": 88.32000195980072 + "p50": 56.992001831531525, + "p90": 58.78400057554245, + "p95": 60.92799827456474, + "p99": 73.21599870920181 }, "combine": { - "p50": 72.54400104284286, - "p90": 73.98399710655212, - "p95": 74.36800003051758, - "p99": 78.84799689054489 + "p50": 67.32799857854843, + "p90": 69.11999732255936, + "p95": 70.65600156784058, + "p99": 79.93599772453308 }, "roundtrip": { - "p50": 126.81600451469421, - "p90": 131.1360001564026, - "p95": 134.24000144004822, - "p99": 137.69599795341492 + "p50": 106.9440022110939, + "p90": 109.40799862146378, + "p95": 110.88000237941742, + "p99": 119.39200013875961 }, "isolatedSum": { - "p50": 141.12000167369843, - "p90": 150.30399709939957, - "p95": 153.50399911403656, - "p99": 167.1679988503456 + "p50": 124.32000041007996, + "p90": 127.9039978981018, + "p95": 131.58399984240532, + "p99": 153.1519964337349 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -147,35 +148,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 73.05599749088287, - "p90": 103.67999970912933, - "p95": 108.51199924945831, - "p99": 261.34398579597473 + "p50": 57.792000472545624, + "p90": 59.39200147986412, + "p95": 61.28000095486641, + "p99": 68.09599697589874 }, "combine": { - "p50": 73.37599992752075, - "p90": 80.03199845552444, - "p95": 87.0399996638298, - "p99": 87.87199854850769 + "p50": 67.80800223350525, + "p90": 69.66400146484375, + "p95": 76.99199765920639, + "p99": 78.75200361013412 }, "roundtrip": { - "p50": 130.52800297737122, - "p90": 157.4079990386963, - "p95": 160.76800227165222, - "p99": 164.22399878501892 + "p50": 116.22399836778641, + "p90": 122.68800288438797, + "p95": 124.35200065374374, + "p99": 127.93600559234619 }, "isolatedSum": { - "p50": 146.43199741840363, - "p90": 183.71199816465378, - "p95": 195.55199891328812, - "p99": 349.2159843444824 + "p50": 125.60000270605087, + "p90": 129.05600294470787, + "p95": 138.2719986140728, + "p99": 146.84800058603287 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -184,35 +185,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 71.45600020885468, - "p90": 98.88000041246414, - "p95": 103.00800204277039, - "p99": 109.69600081443787 + "p50": 59.29600074887276, + "p90": 61.15199998021126, + "p95": 62.39999830722809, + "p99": 68.1919977068901 }, "combine": { - "p50": 73.7600028514862, - "p90": 82.59200304746628, - "p95": 83.99999886751175, - "p99": 88.41600269079208 + "p50": 68.38399916887283, + "p90": 77.31200009584427, + "p95": 77.72800326347351, + "p99": 78.78399640321732 }, "roundtrip": { - "p50": 131.29599392414093, - "p90": 154.59200739860535, - "p95": 157.05600380897522, - "p99": 165.66400229930878 + "p50": 120.25599926710129, + "p90": 125.82400441169739, + "p95": 126.75200402736664, + "p99": 133.44000279903412 }, "isolatedSum": { - "p50": 145.21600306034088, - "p90": 181.47200345993042, - "p95": 187.00800091028214, - "p99": 198.11200350522995 + "p50": 127.67999991774559, + "p90": 138.46400007605553, + "p95": 140.1280015707016, + "p99": 146.97599411010742 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -221,35 +222,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 82.97599852085114, - "p90": 100.8640006184578, - "p95": 103.26399654150009, - "p99": 108.44799876213074 + "p50": 62.78400123119354, + "p90": 69.023996591568, + "p95": 71.03999704122543, + "p99": 76.73600316047668 }, "combine": { - "p50": 74.49600100517273, - "p90": 87.10400015115738, - "p95": 87.74399757385254, - "p99": 88.86399865150452 + "p50": 77.2479996085167, + "p90": 78.5600021481514, + "p95": 78.72000336647034, + "p99": 80.86399734020233 }, "roundtrip": { - "p50": 128.1919926404953, - "p90": 158.720001578331, - "p95": 161.53599321842194, - "p99": 164.09599781036377 + "p50": 119.61600184440613, + "p90": 122.72000312805176, + "p95": 124.35200065374374, + "p99": 131.29599392414093 }, "isolatedSum": { - "p50": 157.47199952602386, - "p90": 187.96800076961517, - "p95": 191.00799411535263, - "p99": 197.31199741363525 + "p50": 140.03200083971024, + "p90": 147.5839987397194, + "p95": 149.76000040769577, + "p99": 157.60000050067902 }, "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -258,35 +259,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 90.81599861383438, - "p90": 103.04000228643417, - "p95": 107.87200182676315, - "p99": 111.51999980211258 + "p50": 69.24799829721451, + "p90": 70.91200351715088, + "p95": 73.69600236415863, + "p99": 81.69600367546082 }, "combine": { - "p50": 81.50400221347809, - "p90": 89.9519994854927, - "p95": 90.43200314044952, - "p99": 96.19200229644775 + "p50": 78.59200239181519, + "p90": 79.80799674987793, + "p95": 80.73599636554718, + "p99": 90.94399958848953 }, "roundtrip": { - "p50": 140.47999680042267, - "p90": 163.29599916934967, - "p95": 166.87999665737152, - "p99": 171.03999853134155 + "p50": 130.68799674510956, + "p90": 135.23200154304504, + "p95": 136.51199638843536, + "p99": 140.47999680042267 }, "isolatedSum": { - "p50": 172.32000082731247, - "p90": 192.99200177192688, - "p95": 198.30400496721268, - "p99": 207.71200209856033 + "p50": 147.8400006890297, + "p90": 150.7200002670288, + "p95": 154.4319987297058, + "p99": 172.64000326395035 }, "roundtripMeasured": true, "dispatchLogicalBytes": 19726336, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 3, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -295,28 +296,28 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 97.31200337409973, - "p90": 119.10399794578552, - "p95": 121.69600278139114, - "p99": 131.26400113105774 + "p50": 82.49600231647491, + "p90": 92.70399808883667, + "p95": 95.0080007314682, + "p99": 99.45599734783173 }, "combine": { - "p50": 90.20800143480301, - "p90": 97.15200215578079, - "p95": 103.93600165843964, - "p99": 104.47999835014343 + "p50": 92.25600212812424, + "p90": 100.09600222110748, + "p95": 102.36799716949463, + "p99": 106.65600001811981 }, "roundtrip": { - "p50": 162.1759980916977, - "p90": 181.7920058965683, - "p95": 184.4799965620041, - "p99": 187.74400651454926 + "p50": 158.65600109100342, + "p90": 163.00800442695618, + "p95": 164.19200599193573, + "p99": 169.50400173664093 }, "isolatedSum": { - "p50": 187.52000480890274, - "p90": 216.25600010156631, - "p95": 225.63200443983078, - "p99": 235.74399948120117 + "p50": 174.75200444459915, + "p90": 192.80000030994415, + "p95": 197.37599790096283, + "p99": 206.11199736595154 }, "roundtripMeasured": true, "dispatchLogicalBytes": 38993920, @@ -332,35 +333,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 113.3119985461235, - "p90": 132.7359974384308, - "p95": 134.5919966697693, - "p99": 140.35199582576752 + "p50": 93.91999989748001, + "p90": 95.83999961614609, + "p95": 98.04800152778625, + "p99": 104.99200224876404 }, "combine": { - "p50": 108.41599851846695, - "p90": 120.44800072908401, - "p95": 120.7360029220581, - "p99": 121.47200107574463 + "p50": 115.35999923944473, + "p90": 115.93600362539291, + "p95": 116.60800129175186, + "p99": 119.45600062608719 }, "roundtrip": { - "p50": 198.2080042362213, - "p90": 216.86400473117828, - "p95": 221.24800086021423, - "p99": 223.80800545215607 + "p50": 192.51200556755066, + "p90": 198.88000190258026, + "p95": 199.48799908161163, + "p99": 209.47200059890747 }, "isolatedSum": { - "p50": 221.72799706459045, - "p90": 253.1839981675148, - "p95": 255.3279995918274, - "p99": 261.82399690151215 + "p50": 209.27999913692474, + "p90": 211.776003241539, + "p95": 214.65600281953812, + "p99": 224.44800287485123 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -368,28 +369,29 @@ ] }, { - "id": "cx-efe3a643", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_42947950", - "comparisonKey": "4c920ba7523ac63b", + "id": "cx-4ad32f1a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "2a087c80bac58077", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:28.966623+00:00", + "generatedAt": "2026-06-26T15:27:59.966964+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", + "runner": "h100-dgxc-slurm_12", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -400,16 +402,16 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "fp8-saturation", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "unknown", + "conformanceClass": "resource-conforming", "fixedKernel": false, "paretoEligible": false }, @@ -420,8 +422,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -429,259 +431,186 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271547494", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271547494", - "createdAt": "2026-06-26T23:46:11Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28247603308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308", + "createdAt": "2026-06-26T15:27:59.966964+00:00", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 111.84000223875046, - "p90": 124.15999919176102, - "p95": 131.1360001564026, - "p99": 137.66400516033173 + "p50": 96.73599898815155, + "p90": 102.49599814414978, + "p95": 104.12800312042236, + "p99": 112.19199746847153 }, "combine": { - "p50": 106.6880002617836, - "p90": 114.30399864912033, - "p95": 120.09599804878235, - "p99": 123.03999811410904 + "p50": 79.42400127649307, + "p90": 81.4720019698143, + "p95": 82.14399963617325, + "p99": 87.93599903583527 }, "roundtrip": { - "p50": 199.0399956703186, - "p90": 207.58399367332458, - "p95": 216.3199931383133, - "p99": 222.1119999885559 + "p50": 146.84799313545227, + "p90": 156.15999698638916, + "p95": 159.13599729537964, + "p99": 164.000004529953 }, "isolatedSum": { - "p50": 218.52800250053406, - "p90": 238.46399784088135, - "p95": 251.23199820518494, - "p99": 260.70400327444077 + "p50": 176.16000026464462, + "p90": 183.96800011396408, + "p95": 186.2720027565956, + "p99": 200.1279965043068 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 142.97600090503693, - "p90": 152.3520052433014, - "p95": 161.28000617027283, - "p99": 169.21600699424744 + "p50": 98.33600372076035, + "p90": 103.93600165843964, + "p95": 106.52799904346466, + "p99": 111.58400028944016 }, "combine": { - "p50": 150.176003575325, - "p90": 155.68000078201294, - "p95": 162.36799955368042, - "p99": 171.26399278640747 + "p50": 80.03199845552444, + "p90": 86.84799820184708, + "p95": 87.61599659919739, + "p99": 88.06400001049042 }, "roundtrip": { - "p50": 263.2319927215576, - "p90": 269.72800493240356, - "p95": 276.0320007801056, - "p99": 290.5920147895813 + "p50": 151.64799988269806, + "p90": 159.16800498962402, + "p95": 160.35200655460358, + "p99": 165.50399363040924 }, "isolatedSum": { - "p50": 293.15200448036194, - "p90": 308.03200602531433, - "p95": 323.64800572395325, - "p99": 340.4799997806549 + "p50": 178.3680021762848, + "p90": 190.7839998602867, + "p95": 194.14399564266205, + "p99": 199.64800029993057 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 200.8640021085739, - "p90": 211.39200031757355, - "p95": 214.27200734615326, - "p99": 220.96000611782074 + "p50": 99.90400075912476, + "p90": 105.76000064611435, + "p95": 108.15999656915665, + "p99": 116.60800129175186 }, "combine": { - "p50": 229.72799837589264, - "p90": 236.67199909687042, - "p95": 238.71999979019165, - "p99": 246.2719976902008 + "p50": 87.90399879217148, + "p90": 90.55999666452408, + "p95": 95.23200243711472, + "p99": 96.57599776983261 }, "roundtrip": { - "p50": 400.86400508880615, - "p90": 413.5040044784546, - "p95": 418.94400119781494, - "p99": 428.51200699806213 + "p50": 157.82399475574493, + "p90": 163.7759953737259, + "p95": 166.78400337696075, + "p99": 169.95200514793396 }, "isolatedSum": { - "p50": 430.59200048446655, - "p90": 448.06399941444397, - "p95": 452.9920071363449, - "p99": 467.23200380802155 + "p50": 187.80799955129623, + "p90": 196.31999731063843, + "p95": 203.39199900627136, + "p99": 213.18399906158447 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 303.1040132045746, - "p90": 308.9280128479004, - "p95": 311.2959861755371, - "p99": 318.015992641449 + "p50": 128.60800325870514, + "p90": 133.53599607944489, + "p95": 135.51999628543854, + "p99": 138.49599659442902 }, "combine": { - "p50": 365.9839928150177, - "p90": 372.8959858417511, - "p95": 375.39198994636536, - "p99": 382.4320137500763 + "p50": 112.57600039243698, + "p90": 120.4800009727478, + "p95": 120.7680031657219, + "p99": 122.40000069141388 }, "roundtrip": { - "p50": 644.8000073432922, - "p90": 654.528021812439, - "p95": 657.8879952430725, - "p99": 668.4799790382385 + "p50": 208.3519995212555, + "p90": 215.71199595928192, + "p95": 217.56799519062042, + "p99": 220.5439954996109 }, "isolatedSum": { - "p50": 669.0880060195923, - "p90": 681.8239986896515, - "p95": 686.6879761219025, - "p99": 700.4480063915253 + "p50": 241.18400365114212, + "p90": 254.0159970521927, + "p95": 256.28799945116043, + "p99": 260.8959972858429 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 526.8800258636475, - "p90": 540.5759811401367, - "p95": 545.0239777565002, - "p99": 551.6160130500793 - }, - "combine": { - "p50": 638.0159854888916, - "p90": 650.2400040626526, - "p95": 653.1519889831543, - "p99": 660.1920127868652 - }, - "roundtrip": { - "p50": 1135.424017906189, - "p90": 1147.7760076522827, - "p95": 1151.0720252990723, - "p99": 1157.5039625167847 - }, - "isolatedSum": { - "p50": 1164.896011352539, - "p90": 1190.8159852027893, - "p95": 1198.1759667396545, - "p99": 1211.8080258369446 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1005.2160024642944, - "p90": 1027.2639989852905, - "p95": 1033.5359573364258, - "p99": 1050.271987915039 - }, - "combine": { - "p50": 1168.511986732483, - "p90": 1181.7599534988403, - "p95": 1189.1520023345947, - "p99": 1202.015995979309 - }, - "roundtrip": { - "p50": 2131.455898284912, - "p90": 2150.815963745117, - "p95": 2158.112049102783, - "p99": 2167.3600673675537 - }, - "isolatedSum": { - "p50": 2173.7279891967773, - "p90": 2209.023952484131, - "p95": 2222.6879596710205, - "p99": 2252.287983894348 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 } ] }, { - "id": "cx-9ca51f4f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_d982b749", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-0d6ef23b", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_c851a534", + "comparisonKey": "6b4f4d7f65293019", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:18.590174+00:00", + "generatedAt": "2026-06-26T17:29:45.312905+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", + "runner": "h200-dgxc-slurm_2", "sku": "h200", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h200-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H200 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -696,14 +625,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -721,45 +650,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271601584", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271601584", - "createdAt": "2026-06-26T23:47:53Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254392935", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", + "createdAt": "2026-06-26T17:29:45.312905+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 105.0880029797554, - "p90": 132.7040046453476, - "p95": 145.21600306034088, - "p99": 190.11199474334717 + "p50": 74.11199808120728, + "p90": 94.11200135946274, + "p95": 104.35199737548828, + "p99": 138.0160003900528 }, "combine": { - "p50": 71.3919997215271, - "p90": 93.37600320577621, - "p95": 98.01600128412247, - "p99": 108.51199924945831 + "p50": 68.41599941253662, + "p90": 78.72000336647034, + "p95": 83.48800241947174, + "p99": 105.72800040245056 }, "roundtrip": { - "p50": 123.45600128173828, - "p90": 180.60800433158875, - "p95": 190.7840073108673, - "p99": 233.2800030708313 + "p50": 124.4800016283989, + "p90": 144.31999623775482, + "p95": 156.3200056552887, + "p99": 193.53599846363068 }, "isolatedSum": { - "p50": 176.4800027012825, - "p90": 226.0800078511238, - "p95": 243.23200434446335, - "p99": 298.6239939928055 + "p50": 142.5279974937439, + "p90": 172.83200472593307, + "p95": 187.83999979496002, + "p99": 243.74400079250336 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 3, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -768,35 +697,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 87.10400015115738, - "p90": 130.0799995660782, - "p95": 139.96799290180206, - "p99": 167.1999990940094 + "p50": 74.33599978685379, + "p90": 99.42399710416794, + "p95": 109.66400057077408, + "p99": 131.71200454235077 }, "combine": { - "p50": 75.58400183916092, - "p90": 97.50399738550186, - "p95": 105.31199723482132, - "p99": 143.61600577831268 + "p50": 69.85600292682648, + "p90": 83.00799876451492, + "p95": 90.40000289678574, + "p99": 114.33599889278412 }, "roundtrip": { - "p50": 144.83200013637543, - "p90": 179.1040003299713, - "p95": 191.96799397468567, - "p99": 229.5680046081543 + "p50": 122.43200093507767, + "p90": 144.6080058813095, + "p95": 154.62400019168854, + "p99": 173.69599640369415 }, "isolatedSum": { - "p50": 162.6880019903183, - "p90": 227.58399695158005, - "p95": 245.27999013662338, - "p99": 310.8160048723221 + "p50": 144.19200271368027, + "p90": 182.43199586868286, + "p95": 200.06400346755981, + "p99": 246.0480034351349 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 5, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -805,35 +734,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 72.06399738788605, - "p90": 86.14400029182434, - "p95": 95.51999717950821, - "p99": 111.87200248241425 + "p50": 74.97599720954895, + "p90": 95.29600292444229, + "p95": 104.12800312042236, + "p99": 139.74399864673615 }, "combine": { - "p50": 68.67200136184692, - "p90": 80.06399869918823, - "p95": 85.66399663686752, - "p99": 102.52799838781357 + "p50": 69.40799951553345, + "p90": 81.63200318813324, + "p95": 88.22400122880936, + "p99": 119.4240003824234 }, "roundtrip": { - "p50": 121.95199728012085, - "p90": 146.43199741840363, - "p95": 154.7199934720993, - "p99": 173.47200214862823 + "p50": 123.74400347471237, + "p90": 150.36800503730774, + "p95": 160.3199988603592, + "p99": 204.8960030078888 }, "isolatedSum": { - "p50": 140.73599874973297, - "p90": 166.20799899101257, - "p95": 181.18399381637573, - "p99": 214.4000008702278 + "p50": 144.3839967250824, + "p90": 176.92800611257553, + "p95": 192.35200434923172, + "p99": 259.16799902915955 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -842,35 +771,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 72.9919970035553, - "p90": 93.12000125646591, - "p95": 103.2319962978363, - "p99": 120.7360029220581 + "p50": 74.78400319814682, + "p90": 92.25600212812424, + "p95": 102.91200131177902, + "p99": 123.16799908876419 }, "combine": { - "p50": 69.24799829721451, - "p90": 82.07999914884567, - "p95": 88.41600269079208, - "p99": 100.67199915647507 + "p50": 70.52800059318542, + "p90": 81.95199817419052, + "p95": 87.48800307512283, + "p99": 100.51199793815613 }, "roundtrip": { - "p50": 124.1919994354248, - "p90": 152.8639942407608, - "p95": 164.09599781036377, - "p99": 197.85599410533905 + "p50": 124.03199821710587, + "p90": 147.20000326633453, + "p95": 153.9199948310852, + "p99": 180.00000715255737 }, "isolatedSum": { - "p50": 142.2399953007698, - "p90": 175.20000040531158, - "p95": 191.6479989886284, - "p99": 221.40800207853317 + "p50": 145.31200379133224, + "p90": 174.20800030231476, + "p95": 190.40000438690186, + "p99": 223.67999702692032 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 4, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -879,35 +808,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 69.72800195217133, - "p90": 91.36000275611877, - "p95": 105.66399991512299, - "p99": 141.56800508499146 + "p50": 73.18399846553802, + "p90": 92.83199906349182, + "p95": 103.61599922180176, + "p99": 195.93599438667297 }, "combine": { - "p50": 70.592001080513, - "p90": 82.04799890518188, - "p95": 87.3280018568039, - "p99": 99.45599734783173 + "p50": 71.32799923419952, + "p90": 86.33600175380707, + "p95": 92.03200042247772, + "p99": 120.80000340938568 }, "roundtrip": { - "p50": 123.96799772977829, - "p90": 151.32799744606018, - "p95": 162.23999857902527, - "p99": 186.46399676799774 + "p50": 129.72800433635712, + "p90": 161.31199896335602, + "p95": 172.86400496959686, + "p99": 215.10399878025055 }, "isolatedSum": { - "p50": 140.32000303268433, - "p90": 173.40800166130066, - "p95": 192.99200177192688, - "p99": 241.02400243282318 + "p50": 144.51199769973755, + "p90": 179.1680008172989, + "p95": 195.64799964427948, + "p99": 316.73599779605865 }, "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 4, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -916,35 +845,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.85599744319916, - "p90": 108.89600217342377, - "p95": 117.95199662446976, - "p99": 130.5599957704544 + "p50": 82.2720006108284, + "p90": 100.80000013113022, + "p95": 108.92800241708755, + "p99": 134.88000631332397 }, "combine": { - "p50": 77.56800204515457, - "p90": 96.25600278377533, - "p95": 99.7759997844696, - "p99": 110.43199896812439 + "p50": 76.03199779987335, + "p90": 89.40800279378891, + "p95": 94.97600048780441, + "p99": 117.95199662446976 }, "roundtrip": { - "p50": 136.19199395179749, - "p90": 168.19199919700623, - "p95": 180.25599420070648, - "p99": 210.01599729061127 + "p50": 130.8480054140091, + "p90": 154.33600544929504, + "p95": 164.73600268363953, + "p99": 204.0639966726303 }, "isolatedSum": { - "p50": 159.42399948835373, - "p90": 205.1520049571991, - "p95": 217.72799640893936, - "p99": 240.9919947385788 + "p50": 158.30399841070175, + "p90": 190.20800292491913, + "p95": 203.90400290489197, + "p99": 252.83200293779373 }, "roundtripMeasured": true, "dispatchLogicalBytes": 19726336, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 4, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -953,35 +882,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 94.81599926948547, - "p90": 107.16799646615982, - "p95": 116.99199676513672, - "p99": 140.6719982624054 + "p50": 91.32800251245499, + "p90": 110.04800349473953, + "p95": 116.86400324106216, + "p99": 146.84799313545227 }, "combine": { - "p50": 85.75999736785889, - "p90": 97.79199957847595, - "p95": 106.04800283908844, - "p99": 131.04000687599182 + "p50": 87.2960016131401, + "p90": 98.36799651384354, + "p95": 104.70400005578995, + "p99": 124.92799758911133 }, "roundtrip": { - "p50": 156.5759927034378, - "p90": 172.19200730323792, - "p95": 179.00800704956055, - "p99": 190.49599766731262 + "p50": 156.031996011734, + "p90": 173.24799299240112, + "p95": 180.38399517536163, + "p99": 215.39199352264404 }, "isolatedSum": { - "p50": 180.57599663734436, - "p90": 204.95999604463577, - "p95": 223.03999960422516, - "p99": 271.7120051383972 + "p50": 178.6240041255951, + "p90": 208.41600000858307, + "p95": 221.5680032968521, + "p99": 271.7759907245636 }, "roundtripMeasured": true, "dispatchLogicalBytes": 38993920, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -990,35 +919,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 115.90400338172913, - "p90": 139.23199474811554, - "p95": 145.47200500965118, - "p99": 182.65600502490997 + "p50": 116.03199690580368, + "p90": 129.7599971294403, + "p95": 136.57599687576294, + "p99": 149.24800395965576 }, "combine": { - "p50": 103.84000092744827, - "p90": 120.25599926710129, - "p95": 126.56000256538391, - "p99": 146.68799936771393 + "p50": 103.42399775981903, + "p90": 116.54400080442429, + "p95": 123.3920007944107, + "p99": 141.95199310779572 }, "roundtrip": { - "p50": 196.19199633598328, - "p90": 217.15199947357178, - "p95": 223.68000447750092, - "p99": 249.2160052061081 + "p50": 192.54399836063385, + "p90": 208.8959962129593, + "p95": 215.64799547195435, + "p99": 228.7359982728958 }, "isolatedSum": { - "p50": 219.7440043091774, - "p90": 259.4879940152168, - "p95": 272.0320075750351, - "p99": 329.3440043926239 + "p50": 219.4559946656227, + "p90": 246.3039979338646, + "p95": 259.96799767017365, + "p99": 291.1999970674515 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 5, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1026,28 +955,29 @@ ] }, { - "id": "cx-5553e87c", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_d982b749", - "comparisonKey": "6da1f9e2ab025dbe", + "id": "cx-e7727ce9", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_c1ad910f", + "comparisonKey": "9532205a80f3d757", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:31.030615+00:00", + "generatedAt": "2026-06-26T17:38:48.516779+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -1062,14 +992,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -1087,45 +1017,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271605214", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271605214", - "createdAt": "2026-06-26T23:47:59Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:38:48.516779+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.64000153541565, - "p90": 132.9600065946579, - "p95": 139.80799913406372, - "p99": 183.1039935350418 + "p50": 94.11200135946274, + "p90": 98.9760011434555, + "p95": 100.54399818181992, + "p99": 116.44800007343292 }, "combine": { - "p50": 106.11200332641602, - "p90": 121.08799815177917, - "p95": 127.61600315570831, - "p99": 162.7199947834015 + "p50": 115.1999980211258, + "p90": 115.9679964184761, + "p95": 116.89600348472595, + "p99": 129.02399897575378 }, "roundtrip": { - "p50": 197.11999595165253, - "p90": 216.67200326919556, - "p95": 225.2800017595291, - "p99": 246.75199389457703 + "p50": 193.2159960269928, + "p90": 198.43199849128723, + "p95": 199.8080015182495, + "p99": 217.50399470329285 }, "isolatedSum": { - "p50": 222.75200486183167, - "p90": 254.04800474643707, - "p95": 267.42400228977203, - "p99": 345.8239883184433 + "p50": 209.31199938058853, + "p90": 214.9439975619316, + "p95": 217.44000166654587, + "p99": 245.4719990491867 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1134,35 +1064,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 143.8719928264618, - "p90": 166.143998503685, - "p95": 172.7360039949417, - "p99": 195.8719938993454 + "p50": 135.42400300502777, + "p90": 138.75199854373932, + "p95": 141.184002161026, + "p99": 151.0079950094223 }, "combine": { - "p50": 143.327996134758, - "p90": 159.743994474411, - "p95": 162.81600296497345, - "p99": 171.7119961977005 + "p50": 154.59200739860535, + "p90": 163.90399634838104, + "p95": 164.5440012216568, + "p99": 176.54399573802948 }, "roundtrip": { - "p50": 260.70401072502136, - "p90": 280.8319926261902, - "p95": 286.27198934555054, - "p99": 329.3119966983795 + "p50": 271.67999744415283, + "p90": 277.6319980621338, + "p95": 280.70399165153503, + "p99": 291.3599908351898 }, "isolatedSum": { - "p50": 287.1999889612198, - "p90": 325.887992978096, - "p95": 335.55200695991516, - "p99": 367.5839900970459 + "p50": 290.0160104036331, + "p90": 302.65599489212036, + "p95": 305.7280033826828, + "p99": 327.5519907474518 }, "roundtripMeasured": true, "dispatchLogicalBytes": 155889664, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1171,35 +1101,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 203.23200523853302, - "p90": 227.00800001621246, - "p95": 239.07199501991272, - "p99": 277.1199941635132 + "p50": 193.24800372123718, + "p90": 199.61600005626678, + "p95": 200.80000162124634, + "p99": 206.68800175189972 }, "combine": { - "p50": 224.60800409317017, - "p90": 241.31199717521667, - "p95": 248.44799935817719, - "p99": 268.22400093078613 + "p50": 265.8880054950714, + "p90": 274.59201216697693, + "p95": 275.2000093460083, + "p99": 286.78399324417114 }, "roundtrip": { - "p50": 403.0719995498657, - "p90": 426.68798565864563, - "p95": 434.4640076160431, - "p99": 486.01600527763367 + "p50": 442.59199500083923, + "p90": 448.96000623703003, + "p95": 455.00800013542175, + "p99": 461.40798926353455 }, "isolatedSum": { - "p50": 427.8400093317032, - "p90": 468.31999719142914, - "p95": 487.5199943780899, - "p99": 545.3439950942993 + "p50": 459.1360092163086, + "p90": 474.2080122232437, + "p95": 476.00001096725464, + "p99": 493.47199499607086 }, "roundtripMeasured": true, "dispatchLogicalBytes": 312266752, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1208,35 +1138,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 314.7520124912262, - "p90": 335.90400218963623, - "p95": 347.51999378204346, - "p99": 390.9119963645935 + "p50": 326.2079954147339, + "p90": 329.75998520851135, + "p95": 331.6799998283386, + "p99": 341.6000008583069 }, "combine": { - "p50": 357.9519987106323, - "p90": 372.1280097961426, - "p95": 378.9440095424652, - "p99": 416.6080057621002 + "p50": 457.66401290893555, + "p90": 459.77601408958435, + "p95": 469.760000705719, + "p99": 473.7600088119507 }, "roundtrip": { - "p50": 646.7199921607971, - "p90": 668.3200001716614, - "p95": 684.4800114631653, - "p99": 754.4959783554077 + "p50": 762.5920176506042, + "p90": 771.7440128326416, + "p95": 774.2080092430115, + "p99": 789.6320223808289 }, "isolatedSum": { - "p50": 672.7040112018585, - "p90": 708.0320119857788, - "p95": 726.4640033245087, - "p99": 807.5200021266937 + "p50": 783.8720083236694, + "p90": 789.5359992980957, + "p95": 801.4400005340576, + "p99": 815.3600096702576 }, "roundtripMeasured": true, "dispatchLogicalBytes": 623443968, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1245,35 +1175,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 542.0799851417542, - "p90": 560.1279735565186, - "p95": 575.3600001335144, - "p99": 736.2880110740662 + "p50": 577.1200060844421, + "p90": 582.5920104980469, + "p95": 583.5520029067993, + "p99": 591.2960171699524 }, "combine": { - "p50": 621.8879818916321, - "p90": 636.031985282898, - "p95": 641.6959762573242, - "p99": 732.7359914779663 + "p50": 817.2799944877625, + "p90": 828.4159898757935, + "p95": 831.8719863891602, + "p99": 913.4079813957214 }, "roundtrip": { - "p50": 1137.279987335205, - "p90": 1170.591950416565, - "p95": 1213.7600183486938, - "p99": 1369.6320056915283 + "p50": 1376.9279718399048, + "p90": 1386.9119882583618, + "p95": 1392.7680253982544, + "p99": 1453.8240432739258 }, "isolatedSum": { - "p50": 1163.9679670333862, - "p90": 1196.1599588394165, - "p95": 1217.0559763908386, - "p99": 1469.0240025520325 + "p50": 1394.4000005722046, + "p90": 1411.0080003738403, + "p95": 1415.4239892959595, + "p99": 1504.7039985656738 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1243805696, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1282,35 +1212,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 997.3120093345642, - "p90": 1021.28005027771, - "p95": 1029.7919511795044, - "p99": 1212.8000259399414 + "p50": 1069.5040225982666, + "p90": 1078.0160427093506, + "p95": 1080.2559852600098, + "p99": 1090.880036354065 }, "combine": { - "p50": 1121.6000318527222, - "p90": 1139.456033706665, - "p95": 1149.2160558700562, - "p99": 1185.4079961776733 + "p50": 1528.8959741592407, + "p90": 1540.4479503631592, + "p95": 1542.688012123108, + "p99": 1554.751992225647 }, "roundtrip": { - "p50": 2089.888095855713, - "p90": 2112.6720905303955, - "p95": 2126.431941986084, - "p99": 2277.951955795288 + "p50": 2581.9520950317383, + "p90": 2594.6240425109863, + "p95": 2602.303981781006, + "p99": 2637.9199028015137 }, "isolatedSum": { - "p50": 2118.9120411872864, - "p90": 2160.736083984375, - "p95": 2179.0080070495605, - "p99": 2398.2080221176147 + "p50": 2598.3999967575073, + "p90": 2618.4639930725098, + "p95": 2622.9439973831177, + "p99": 2645.632028579712 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2487009280, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 5, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1318,28 +1248,29 @@ ] }, { - "id": "cx-60c60832", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "3677ee6ace04ac65", + "id": "cx-19a8d159", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_7b3247bf", + "comparisonKey": "0ac8f8817cb63abb", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:53:59.155172+00:00", + "generatedAt": "2026-06-26T17:30:47.651979+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_05", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -1354,218 +1285,255 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28273516714", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714", - "createdAt": "2026-06-27T00:53:08Z", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:30:47.651979+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 40.6000018119812, - "p90": 43.76000165939331, - "p95": 45.239999890327454, - "p99": 54.71999943256378 + "p50": 110.46399921178818, + "p90": 116.35199934244156, + "p95": 117.8240031003952, + "p99": 166.01599752902985 }, "combine": { - "p50": 17.920000478625298, - "p90": 19.039999693632126, - "p95": 20.999999716877937, - "p99": 22.87999913096428 + "p50": 106.1440035700798, + "p90": 111.51999980211258, + "p95": 112.06399649381638, + "p99": 114.07999694347382 }, "roundtrip": { - "p50": 56.32000043988228, - "p90": 59.4400018453598, - "p95": 60.64099818468094, - "p99": 63.19999694824219 + "p50": 197.40800559520721, + "p90": 200.9280025959015, + "p95": 203.0400037765503, + "p99": 206.01600408554077 }, "isolatedSum": { - "p50": 58.5200022906065, - "p90": 62.800001353025436, - "p95": 66.23999960720539, - "p99": 77.59999856352806 + "p50": 216.60800278186798, + "p90": 227.87199914455414, + "p95": 229.88799959421158, + "p99": 280.09599447250366 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 42.64000058174133, - "p90": 45.35999894142151, - "p95": 46.76000028848648, - "p99": 50.23999884724617 + "p50": 147.39200472831726, + "p90": 150.68799257278442, + "p95": 151.7760008573532, + "p99": 154.33600544929504 }, "combine": { - "p50": 16.759999096393585, - "p90": 18.68000067770481, - "p95": 19.801000133156776, - "p99": 22.08000048995018 + "p50": 145.1839953660965, + "p90": 149.88799393177032, + "p95": 151.67999267578125, + "p99": 154.7199934720993 }, "roundtrip": { - "p50": 58.9199997484684, - "p90": 61.799999326467514, - "p95": 62.95999884605408, - "p99": 65.20000100135803 + "p50": 262.4000012874603, + "p90": 267.2640085220337, + "p95": 269.27998661994934, + "p99": 357.34400153160095 }, "isolatedSum": { - "p50": 59.39999967813492, - "p90": 64.03999961912632, - "p95": 66.56100042164326, - "p99": 72.31999933719635 + "p50": 292.57600009441376, + "p90": 300.57598650455475, + "p95": 303.45599353313446, + "p99": 309.05599892139435 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 42.44000092148781, - "p90": 45.281000435352325, - "p95": 46.4400015771389, - "p99": 47.919999808073044 + "p50": 204.92799580097198, + "p90": 219.39200162887573, + "p95": 221.76000475883484, + "p99": 226.4000028371811 }, "combine": { - "p50": 19.999999552965164, - "p90": 21.99999988079071, - "p95": 23.360000923275948, - "p99": 25.72000026702881 + "p50": 217.15199947357178, + "p90": 221.3120013475418, + "p95": 224.57599639892578, + "p99": 227.743998169899 }, "roundtrip": { - "p50": 61.91999837756157, - "p90": 65.20099937915802, - "p95": 66.3599967956543, - "p99": 67.84100085496902 + "p50": 392.60798692703247, + "p90": 397.47199416160583, + "p95": 400.09599924087524, + "p99": 421.37598991394043 }, "isolatedSum": { - "p50": 62.44000047445297, - "p90": 67.28100031614304, - "p95": 69.80000250041485, - "p99": 73.64000007510185 + "p50": 422.07999527454376, + "p90": 440.70400297641754, + "p95": 446.3360011577606, + "p99": 454.1440010070801 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 42.44000092148781, - "p90": 45.00100016593933, - "p95": 46.88100144267082, - "p99": 49.27999898791313 + "p50": 319.93600726127625, + "p90": 324.8960077762604, + "p95": 327.1679878234863, + "p99": 330.55999875068665 }, "combine": { - "p50": 20.880000665783882, - "p90": 22.840000689029694, - "p95": 24.240000173449516, - "p99": 26.399999856948853 + "p50": 330.01598715782166, + "p90": 335.1680040359497, + "p95": 336.64000034332275, + "p99": 340.2239978313446 }, "roundtrip": { - "p50": 62.401000410318375, - "p90": 65.48000127077103, - "p95": 66.28099828958511, - "p99": 68.00000369548798 + "p50": 624.064028263092, + "p90": 629.2480230331421, + "p95": 631.6159963607788, + "p99": 638.2399797439575 }, "isolatedSum": { - "p50": 63.32000158727169, - "p90": 67.84100085496902, - "p95": 71.12100161612034, - "p99": 75.67999884486198 + "p50": 649.9519944190979, + "p90": 660.0640118122101, + "p95": 663.8079881668091, + "p99": 670.7839965820312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 42.520999908447266, - "p90": 45.1200008392334, - "p95": 46.59999907016754, - "p99": 49.04000088572502 + "p50": 570.9440112113953, + "p90": 584.5119953155518, + "p95": 589.1519784927368, + "p99": 593.9199924468994 }, "combine": { - "p50": 25.8799996227026, - "p90": 27.879999950528145, - "p95": 29.239999130368233, - "p99": 31.800001859664917 + "p50": 564.9920105934143, + "p90": 574.3039846420288, + "p95": 576.7999887466431, + "p99": 583.5199952125549 }, "roundtrip": { - "p50": 67.80099868774414, - "p90": 71.16000354290009, - "p95": 72.2000002861023, - "p99": 74.47999715805054 + "p50": 1105.5680513381958, + "p90": 1120.1599836349487, + "p95": 1124.7680187225342, + "p99": 1134.719967842102 }, "isolatedSum": { - "p50": 68.40099953114986, - "p90": 73.00000078976154, - "p95": 75.83999820053577, - "p99": 80.84000274538994 + "p50": 1135.9360218048096, + "p90": 1158.8159799575806, + "p95": 1165.9519672393799, + "p99": 1177.4399876594543 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1075.8719444274902, + "p90": 1088.703989982605, + "p95": 1093.5360193252563, + "p99": 1102.463960647583 + }, + "combine": { + "p50": 1031.872034072876, + "p90": 1041.3119792938232, + "p95": 1044.4799661636353, + "p99": 1055.359959602356 + }, + "roundtrip": { + "p50": 2082.304000854492, + "p90": 2096.640110015869, + "p95": 2100.895881652832, + "p99": 2108.031988143921 + }, + "isolatedSum": { + "p50": 2107.743978500366, + "p90": 2130.015969276428, + "p95": 2138.0159854888916, + "p99": 2157.823920249939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1573,16 +1541,16 @@ ] }, { - "id": "cx-7f743bfe", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h100_aa268d13", - "comparisonKey": "791af0af2f802328", + "id": "cx-7d11224e", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_8d2811e3", + "comparisonKey": "801e704d68c28ca9", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:41.322977+00:00", + "generatedAt": "2026-06-27T09:48:25.920368+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "runner": "b300-nv_09", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -1590,11 +1558,12 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -1610,9 +1579,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -1634,45 +1603,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271945409", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409", - "createdAt": "2026-06-26T23:58:46Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285620595", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285620595", + "createdAt": "2026-06-27T09:48:25.920368+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 93.37600320577621, - "p90": 101.59999877214432, - "p95": 103.16800326108932, - "p99": 108.15999656915665 + "p50": 56.384000927209854, + "p90": 58.81600081920624, + "p95": 61.37600168585777, + "p99": 80.60800284147263 }, "combine": { - "p50": 73.69600236415863, - "p90": 78.17599922418594, - "p95": 79.99999821186066, - "p99": 82.59200304746628 + "p50": 65.47199934720993, + "p90": 66.3679987192154, + "p95": 66.72000139951706, + "p99": 68.09599697589874 }, "roundtrip": { - "p50": 142.59199798107147, - "p90": 150.62400698661804, - "p95": 152.54400670528412, - "p99": 159.5200002193451 + "p50": 107.42399841547012, + "p90": 111.84000223875046, + "p95": 112.96000331640244, + "p99": 126.14400684833527 }, "isolatedSum": { - "p50": 167.07200556993484, - "p90": 179.77599799633026, - "p95": 183.16800147294998, - "p99": 190.75199961662292 + "p50": 121.85600027441978, + "p90": 125.18399953842163, + "p95": 128.09600308537483, + "p99": 148.70399981737137 }, "roundtripMeasured": true, "dispatchLogicalBytes": 444416, "combineLogicalBytes": 444416, "fanoutMean": 3.875, "recvTokensMax": 8, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1681,35 +1650,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 96.0640013217926, - "p90": 100.89600086212158, - "p95": 101.82400047779083, - "p99": 107.07200318574905 + "p50": 56.2559999525547, + "p90": 58.33600088953972, + "p95": 60.447998344898224, + "p99": 72.83200323581696 }, "combine": { - "p50": 74.43200051784515, - "p90": 80.48000186681747, - "p95": 81.216000020504, - "p99": 82.11199939250946 + "p50": 66.01600348949432, + "p90": 66.68800115585327, + "p95": 67.48799979686737, + "p99": 91.90399944782257 }, "roundtrip": { - "p50": 143.39199662208557, - "p90": 147.87200093269348, - "p95": 153.31199765205383, - "p99": 168.60799491405487 + "p50": 105.02400249242783, + "p90": 112.41599917411804, + "p95": 113.0559965968132, + "p99": 119.64800208806992 }, "isolatedSum": { - "p50": 170.49600183963776, - "p90": 181.37600272893906, - "p95": 183.04000049829483, - "p99": 189.18400257825851 + "p50": 122.27200344204903, + "p90": 125.02400204539299, + "p95": 127.9359981417656, + "p99": 164.73600268363953 }, "roundtripMeasured": true, "dispatchLogicalBytes": 845824, "combineLogicalBytes": 845824, "fanoutMean": 3.6875, "recvTokensMax": 16, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1718,35 +1687,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 95.87199985980988, - "p90": 100.73599964380264, - "p95": 102.81600058078766, - "p99": 109.95200276374817 + "p50": 56.73599988222122, + "p90": 59.29600074887276, + "p95": 60.99199876189232, + "p99": 73.11999797821045 }, "combine": { - "p50": 74.30399954319, - "p90": 80.89599758386612, - "p95": 81.4720019698143, - "p99": 84.19200032949448 + "p50": 66.20799750089645, + "p90": 67.55200028419495, + "p95": 68.80000233650208, + "p99": 79.74400371313095 }, "roundtrip": { - "p50": 142.752006649971, - "p90": 153.02400290966034, - "p95": 154.9759954214096, - "p99": 160.0639969110489 + "p50": 105.85600137710571, + "p90": 108.73600095510483, + "p95": 110.43199896812439, + "p99": 124.92799758911133 }, "isolatedSum": { - "p50": 170.17599940299988, - "p90": 181.63199722766876, - "p95": 184.28800255060196, - "p99": 194.14400309324265 + "p50": 122.94399738311768, + "p90": 126.8480010330677, + "p95": 129.7920010983944, + "p99": 152.8640016913414 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1691648, "combineLogicalBytes": 1691648, "fanoutMean": 3.6875, "recvTokensMax": 32, - "stragglerRank": 3, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1755,35 +1724,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 97.88800030946732, - "p90": 101.82400047779083, - "p95": 103.96800190210342, - "p99": 111.42399907112122 + "p50": 58.30400064587593, + "p90": 64.57599997520447, + "p95": 65.85600227117538, + "p99": 70.88000327348709 }, "combine": { - "p50": 75.6160020828247, - "p90": 81.4720019698143, - "p95": 82.04799890518188, - "p99": 84.03199911117554 + "p50": 66.81600213050842, + "p90": 68.51200014352798, + "p95": 69.023996591568, + "p99": 78.17599922418594 }, "roundtrip": { - "p50": 146.7519998550415, - "p90": 153.47200632095337, - "p95": 154.9759954214096, - "p99": 167.9680049419403 + "p50": 114.56000059843063, + "p90": 121.15199863910675, + "p95": 122.5920021533966, + "p99": 138.72000575065613 }, "isolatedSum": { - "p50": 173.50400239229202, - "p90": 183.29600244760513, - "p95": 186.0160008072853, - "p99": 195.45599818229675 + "p50": 125.12000277638435, + "p90": 133.08800011873245, + "p95": 134.87999886274338, + "p99": 149.05600249767303 }, "roundtripMeasured": true, "dispatchLogicalBytes": 3354624, "combineLogicalBytes": 3354624, "fanoutMean": 3.65625, "recvTokensMax": 64, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1792,35 +1761,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 97.08800166845322, - "p90": 100.67199915647507, - "p95": 104.25599664449692, - "p99": 110.6560006737709 + "p50": 59.29600074887276, + "p90": 63.45599889755249, + "p95": 66.3679987192154, + "p99": 85.82399785518646 }, "combine": { - "p50": 78.94399762153625, - "p90": 82.04799890518188, - "p95": 82.78399705886841, - "p99": 89.40800279378891 + "p50": 67.03999638557434, + "p90": 69.023996591568, + "p95": 70.3359991312027, + "p99": 79.93599772453308 }, "roundtrip": { - "p50": 150.7200002670288, - "p90": 159.10400450229645, - "p95": 161.69600188732147, - "p99": 167.07199811935425 + "p50": 122.6240023970604, + "p90": 125.66399574279785, + "p95": 126.65599584579468, + "p99": 131.9359987974167 }, "isolatedSum": { - "p50": 176.03199928998947, - "p90": 182.71999806165695, - "p95": 187.03999370336533, - "p99": 200.06400346755981 + "p50": 126.3359971344471, + "p90": 132.47999548912048, + "p95": 136.7039978504181, + "p99": 165.75999557971954 }, "roundtripMeasured": true, "dispatchLogicalBytes": 6537216, "combineLogicalBytes": 6537216, "fanoutMean": 3.5625, "recvTokensMax": 127, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1829,35 +1798,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 96.47999703884125, - "p90": 101.31199657917023, - "p95": 104.5759990811348, - "p99": 110.62400043010712 + "p50": 74.14399832487106, + "p90": 76.54400169849396, + "p95": 77.85599678754807, + "p99": 89.4400030374527 }, "combine": { - "p50": 86.46400272846222, - "p90": 90.11200070381165, - "p95": 90.62399715185165, - "p99": 93.18400174379349 + "p50": 77.11999863386154, + "p90": 78.52800190448761, + "p95": 78.68800312280655, + "p99": 89.4400030374527 }, "roundtrip": { - "p50": 158.75199437141418, - "p90": 163.55200111865997, - "p95": 164.89599645137787, - "p99": 169.21600699424744 + "p50": 127.10399925708771, + "p90": 132.1280002593994, + "p95": 133.760005235672, + "p99": 136.3839954137802 }, "isolatedSum": { - "p50": 182.94399976730347, - "p90": 191.42399728298187, - "p95": 195.19999623298645, - "p99": 203.8080021739006 + "p50": 151.2639969587326, + "p90": 155.07200360298157, + "p95": 156.54399991035461, + "p99": 178.8800060749054 }, "roundtripMeasured": true, "dispatchLogicalBytes": 12859392, "combineLogicalBytes": 12859392, "fanoutMean": 3.50390625, "recvTokensMax": 255, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1866,35 +1835,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 106.9440022110939, - "p90": 138.36799561977386, - "p95": 143.0400013923645, - "p99": 250.2720057964325 + "p50": 79.77599650621414, + "p90": 81.53600245714188, + "p95": 82.8159973025322, + "p99": 89.9839997291565 }, "combine": { - "p50": 95.0080007314682, - "p90": 98.39999675750732, - "p95": 98.91200065612793, - "p99": 105.59999942779541 + "p50": 90.87999910116196, + "p90": 102.88000106811523, + "p95": 104.41599786281586, + "p99": 115.58400094509125 }, "roundtrip": { - "p50": 176.67199671268463, - "p90": 184.03199315071106, - "p95": 187.3600035905838, - "p99": 190.5599981546402 + "p50": 157.95199573040009, + "p90": 162.59199380874634, + "p95": 164.19200599193573, + "p99": 182.68799781799316 }, "isolatedSum": { - "p50": 201.9520029425621, - "p90": 236.7679923772812, - "p95": 241.95200204849243, - "p99": 355.8720052242279 + "p50": 170.6559956073761, + "p90": 184.4160035252571, + "p95": 187.23199516534805, + "p99": 205.56800067424774 }, "roundtripMeasured": true, "dispatchLogicalBytes": 25145344, "combineLogicalBytes": 25145344, "fanoutMean": 3.42578125, "recvTokensMax": 510, - "stragglerRank": 2, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1903,28 +1872,28 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 128.4160017967224, - "p90": 145.9520012140274, - "p95": 148.83199334144592, - "p99": 151.99999511241913 + "p50": 101.24800354242325, + "p90": 104.22399640083313, + "p95": 105.3759977221489, + "p99": 124.67200309038162 }, "combine": { - "p50": 119.74400281906128, - "p90": 122.56000190973282, - "p95": 123.80799651145935, - "p99": 129.7920048236847 + "p50": 126.17599964141846, + "p90": 127.71199643611908, + "p95": 128.31999361515045, + "p99": 139.93600010871887 }, "roundtrip": { - "p50": 228.2560020685196, - "p90": 233.88800024986267, - "p95": 236.12800240516663, - "p99": 240.28800427913666 + "p50": 208.92800390720367, + "p90": 213.76000344753265, + "p95": 214.78399634361267, + "p99": 229.0239930152893 }, "isolatedSum": { - "p50": 248.1600046157837, - "p90": 268.5120031237602, - "p95": 272.6399898529053, - "p99": 281.7919999361038 + "p50": 227.4240031838417, + "p90": 231.9359928369522, + "p95": 233.69599133729935, + "p99": 264.6080031991005 }, "roundtripMeasured": true, "dispatchLogicalBytes": 49946624, @@ -1939,16 +1908,16 @@ ] }, { - "id": "cx-a38d13e8", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h100_769b9c4b", - "comparisonKey": "115d84ad1ee38d09", + "id": "cx-cc647506", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "b300_8d2811e3", + "comparisonKey": "478acd4108c50326", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:11.807854+00:00", + "generatedAt": "2026-06-26T23:58:32.426052+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "runner": "b300-nv_05", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -1956,29 +1925,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, + "experts": 256, "routing": "zipf", - "routingLabel": "zipf+eplb", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -1991,18 +1961,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271948775", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775", - "createdAt": "2026-06-26T23:58:53Z", + "id": "28271886823", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823", + "createdAt": "2026-06-26T23:58:32.426052+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -2010,109 +1980,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 70.39999961853027, - "p90": 100.832000374794, - "p95": 105.56799918413162, - "p99": 192.73599982261658 - }, - "combine": { - "p50": 73.18399846553802, - "p90": 88.44800293445587, - "p95": 188.38399648666382, - "p99": 344.2560136318207 - }, - "roundtrip": { - "p50": 123.77600371837616, - "p90": 133.08799266815186, - "p95": 149.4400054216385, - "p99": 156.12800419330597 - }, - "isolatedSum": { - "p50": 143.5839980840683, - "p90": 189.28000330924988, - "p95": 293.95199567079544, - "p99": 536.9920134544373 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 68.38399916887283, - "p90": 75.71200281381607, - "p95": 77.11999863386154, - "p99": 95.61599791049957 - }, - "combine": { - "p50": 71.29599899053574, - "p90": 73.44000041484833, - "p95": 74.36800003051758, - "p99": 82.2720006108284 - }, - "roundtrip": { - "p50": 126.68800354003906, - "p90": 130.87999820709229, - "p95": 133.56800377368927, - "p99": 142.59199798107147 - }, - "isolatedSum": { - "p50": 139.67999815940857, - "p90": 149.1520032286644, - "p95": 151.48799866437912, - "p99": 177.88799852132797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.54400104284286, - "p90": 99.2640033364296, - "p95": 102.08000242710114, - "p99": 107.39199817180634 + "p50": 56.703999638557434, + "p90": 59.90400165319443, + "p95": 62.65600025653839, + "p99": 69.98399645090103 }, "combine": { - "p50": 72.9919970035553, - "p90": 79.71200346946716, - "p95": 84.22400057315826, - "p99": 87.39200234413147 + "p50": 65.88800251483917, + "p90": 66.43199920654297, + "p95": 66.72000139951706, + "p99": 73.7600028514862 }, "roundtrip": { - "p50": 130.23999333381653, - "p90": 156.41599893569946, - "p95": 160.22400557994843, - "p99": 165.53600132465363 + "p50": 107.16799646615982, + "p90": 112.83200234174728, + "p95": 114.14399743080139, + "p99": 120.44800072908401 }, "isolatedSum": { - "p50": 145.53599804639816, - "p90": 178.97600680589676, - "p95": 186.3040030002594, - "p99": 194.7840005159378 + "p50": 122.5920021533966, + "p90": 126.3360008597374, + "p95": 129.37600165605545, + "p99": 143.74399930238724 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 5, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2121,72 +2017,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 96.19200229644775, - "p90": 109.56799983978271, - "p95": 112.73600161075592, - "p99": 155.87200224399567 - }, - "combine": { - "p50": 75.45600086450577, - "p90": 88.06400001049042, - "p95": 89.4400030374527, - "p99": 97.37599641084671 - }, - "roundtrip": { - "p50": 130.94399869441986, - "p90": 154.4319987297058, - "p95": 156.44800662994385, - "p99": 176.67199671268463 - }, - "isolatedSum": { - "p50": 171.64800316095352, - "p90": 197.63199985027313, - "p95": 202.17600464820862, - "p99": 253.24799865484238 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 82.97599852085114, - "p90": 100.16000270843506, - "p95": 103.55199873447418, - "p99": 106.72000050544739 + "p50": 58.848001062870026, + "p90": 60.80000102519989, + "p95": 62.84800171852112, + "p99": 74.40000027418137 }, "combine": { - "p50": 74.14399832487106, - "p90": 87.3280018568039, - "p95": 88.95999938249588, - "p99": 89.82399851083755 + "p50": 68.00000369548798, + "p90": 70.30399888753891, + "p95": 76.99199765920639, + "p99": 78.5600021481514 }, "roundtrip": { - "p50": 131.6480040550232, - "p90": 158.9760035276413, - "p95": 161.31199896335602, - "p99": 166.78400337696075 + "p50": 116.54400080442429, + "p90": 123.29600006341934, + "p95": 124.83199685811996, + "p99": 130.46400249004364 }, "isolatedSum": { - "p50": 157.1199968457222, - "p90": 187.48800456523895, - "p95": 192.51199811697006, - "p99": 196.54399901628494 + "p50": 126.848004758358, + "p90": 131.1039999127388, + "p95": 139.8399993777275, + "p99": 152.96000242233276 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 3, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2195,71 +2054,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 90.30400216579437, - "p90": 105.6319996714592, - "p95": 106.6880002617836, - "p99": 111.04000359773636 - }, - "combine": { - "p50": 80.99199831485748, - "p90": 89.15200084447861, - "p95": 89.88799899816513, - "p99": 90.91199934482574 - }, - "roundtrip": { - "p50": 142.17600226402283, - "p90": 157.6640009880066, - "p95": 160.44799983501434, - "p99": 164.8319959640503 - }, - "isolatedSum": { - "p50": 171.29600048065186, - "p90": 194.7840005159378, - "p95": 196.57599925994873, - "p99": 201.9520029425621 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 95.74399888515472, - "p90": 116.2559986114502, - "p95": 121.98399752378464, - "p99": 398.6560106277466 + "p50": 75.9039968252182, + "p90": 78.27199995517731, + "p95": 79.52000200748444, + "p99": 87.5839963555336 }, "combine": { - "p50": 90.20800143480301, - "p90": 101.1200025677681, - "p95": 104.25599664449692, - "p99": 111.55200004577637 + "p50": 78.40000092983246, + "p90": 79.19999957084656, + "p95": 79.71200346946716, + "p99": 83.64800363779068 }, "roundtrip": { - "p50": 160.76800227165222, - "p90": 181.536003947258, - "p95": 185.37600338459015, - "p99": 188.35200369358063 + "p50": 134.24000144004822, + "p90": 138.20800185203552, + "p95": 139.5840048789978, + "p99": 144.3520039319992 }, "isolatedSum": { - "p50": 185.95200031995773, - "p90": 217.3760011792183, - "p95": 226.23999416828156, - "p99": 510.20801067352295 + "p50": 154.30399775505066, + "p90": 157.47199952602386, + "p95": 159.2320054769516, + "p99": 171.23199999332428 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -2269,35 +2091,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 114.3679991364479, - "p90": 133.44000279903412, - "p95": 137.31199502944946, - "p99": 142.7839994430542 + "p50": 103.07200253009796, + "p90": 105.98400235176086, + "p95": 107.04000294208527, + "p99": 113.21599781513214 }, "combine": { - "p50": 108.15999656915665, - "p90": 120.2239990234375, - "p95": 121.24799937009811, - "p99": 123.99999797344208 + "p50": 127.13600695133209, + "p90": 128.1599998474121, + "p95": 128.57599556446075, + "p99": 131.04000687599182 }, "roundtrip": { - "p50": 199.35999810695648, - "p90": 217.31199324131012, - "p95": 220.15999257564545, - "p99": 380.8319866657257 + "p50": 209.1200053691864, + "p90": 214.30400013923645, + "p95": 216.12800657749176, + "p99": 229.66399788856506 }, "isolatedSum": { - "p50": 222.52799570560455, - "p90": 253.66400182247162, - "p95": 258.5599943995476, - "p99": 266.7839974164963 + "p50": 230.20800948143005, + "p90": 234.14400219917297, + "p95": 235.61599850654602, + "p99": 244.25600469112396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 4, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2305,28 +2127,29 @@ ] }, { - "id": "cx-2f9f6948", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", + "id": "cx-c27e2cad", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "ac13ebc2bb2c560a", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:02.253264+00:00", + "generatedAt": "2026-06-27T10:26:01.213105+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -2336,19 +2159,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -2362,49 +2185,49 @@ "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "2.0.0+af9a040", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254315809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", - "createdAt": "2026-06-26T17:26:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28286436120", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120", + "createdAt": "2026-06-27T10:26:01.213105+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 95.74399888515472, - "p90": 102.78400033712387, - "p95": 104.99200224876404, - "p99": 109.37599837779999 + "p50": 56.2559999525547, + "p90": 58.78400057554245, + "p95": 61.28000095486641, + "p99": 77.69600301980972 }, "combine": { - "p50": 79.32800054550171, - "p90": 82.07999914884567, - "p95": 82.87999778985977, - "p99": 88.03199976682663 + "p50": 61.983998864889145, + "p90": 78.8159966468811, + "p95": 86.87999844551086, + "p99": 95.10400146245956 }, "roundtrip": { - "p50": 147.74399995803833, - "p90": 154.6880006790161, - "p95": 157.44000673294067, - "p99": 171.9360053539276 + "p50": 120.44800072908401, + "p90": 123.19999933242798, + "p95": 125.82400441169739, + "p99": 144.03200149536133 }, "isolatedSum": { - "p50": 175.07199943065643, - "p90": 184.86399948596954, - "p95": 187.8720000386238, - "p99": 197.40799814462662 + "p50": 118.23999881744385, + "p90": 137.59999722242355, + "p95": 148.15999940037727, + "p99": 172.8000044822693 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2413,35 +2236,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 71.23199850320816, - "p90": 101.27999633550644, - "p95": 102.52799838781357, - "p99": 107.87200182676315 + "p50": 57.151999324560165, + "p90": 59.039998799562454, + "p95": 59.99999865889549, + "p99": 73.11999797821045 }, "combine": { - "p50": 72.22399860620499, - "p90": 80.92799782752991, - "p95": 81.44000172615051, - "p99": 84.76799726486206 + "p50": 64.54399973154068, + "p90": 66.17599725723267, + "p95": 67.16799736022949, + "p99": 74.23999905586243 }, "roundtrip": { - "p50": 127.45599448680878, - "p90": 153.02400290966034, - "p95": 155.64799308776855, - "p99": 159.4880074262619 + "p50": 124.15999919176102, + "p90": 126.39999389648438, + "p95": 129.60000336170197, + "p99": 138.49599659442902 }, "isolatedSum": { - "p50": 143.45599710941315, - "p90": 182.20799416303635, - "p95": 183.96800011396408, - "p99": 192.6399990916252 + "p50": 121.69599905610085, + "p90": 125.21599605679512, + "p95": 127.16799601912498, + "p99": 147.35999703407288 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2450,35 +2273,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 95.23200243711472, - "p90": 102.36799716949463, - "p95": 107.84000158309937, - "p99": 439.64800238609314 + "p50": 58.88000130653381, + "p90": 61.37600168585777, + "p95": 63.10400366783142, + "p99": 91.10400080680847 }, "combine": { - "p50": 72.95999675989151, - "p90": 81.66400343179703, - "p95": 86.81599795818329, - "p99": 88.92799913883209 + "p50": 67.35999882221222, + "p90": 69.50400024652481, + "p95": 70.14399766921997, + "p99": 86.30400151014328 }, "roundtrip": { - "p50": 128.7360042333603, - "p90": 159.19999778270721, - "p95": 161.31199896335602, - "p99": 167.1680063009262 + "p50": 127.68000364303589, + "p90": 130.14400005340576, + "p95": 131.55199587345123, + "p99": 137.08800077438354 }, "isolatedSum": { - "p50": 168.19199919700623, - "p90": 184.03200060129166, - "p95": 194.65599954128265, - "p99": 528.5760015249252 + "p50": 126.24000012874603, + "p90": 130.88000193238258, + "p95": 133.2480013370514, + "p99": 177.40800231695175 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2487,35 +2310,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 95.42399644851685, - "p90": 102.52799838781357, - "p95": 104.89600151777267, - "p99": 113.53600025177002 + "p50": 60.03199890255928, + "p90": 62.30400130152702, + "p95": 63.26399743556976, + "p99": 69.2799985408783 }, "combine": { - "p50": 79.58400249481201, - "p90": 82.91199803352356, - "p95": 87.07199990749359, - "p99": 87.96799927949905 + "p50": 68.76800209283829, + "p90": 70.46400010585785, + "p95": 71.3919997215271, + "p99": 87.74399757385254 }, "roundtrip": { - "p50": 151.48800611495972, - "p90": 159.90400314331055, - "p95": 162.20800578594208, - "p99": 169.47199404239655 + "p50": 130.62399625778198, + "p90": 133.08799266815186, + "p95": 134.94400680065155, + "p99": 141.88799262046814 }, "isolatedSum": { - "p50": 175.00799894332886, - "p90": 185.43999642133713, - "p95": 191.96800142526627, - "p99": 201.50399953126907 + "p50": 128.80000099539757, + "p90": 132.76800140738487, + "p95": 134.65599715709686, + "p99": 157.02399611473083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2524,31 +2347,31 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 95.71199864149094, - "p90": 100.8640006184578, - "p95": 102.68799960613251, - "p99": 106.49599879980087 + "p50": 61.28000095486641, + "p90": 63.551999628543854, + "p95": 64.89600241184235, + "p99": 75.58400183916092 }, "combine": { - "p50": 80.64000308513641, - "p90": 87.90399879217148, - "p95": 89.24800157546997, - "p99": 95.23200243711472 + "p50": 69.47200000286102, + "p90": 71.45600020885468, + "p95": 72.38399982452393, + "p99": 76.67200267314911 }, "roundtrip": { - "p50": 152.319997549057, - "p90": 160.19199788570404, - "p95": 162.23999857902527, - "p99": 168.92799735069275 + "p50": 132.9919993877411, + "p90": 135.55200397968292, + "p95": 137.37599551677704, + "p99": 149.63200688362122 }, "isolatedSum": { - "p50": 176.35200172662735, - "p90": 188.76799941062927, - "p95": 191.93600118160248, - "p99": 201.7280012369156 + "p50": 130.75200095772743, + "p90": 135.00799983739853, + "p95": 137.28000223636627, + "p99": 152.25600451231003 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, @@ -2561,35 +2384,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 80.86399734020233, - "p90": 103.26399654150009, - "p95": 105.47199845314026, - "p99": 113.18399757146835 + "p50": 64.2239972949028, + "p90": 66.39999896287918, + "p95": 67.87200272083282, + "p99": 82.8159973025322 }, "combine": { - "p50": 80.35200089216232, - "p90": 89.31200206279755, - "p95": 90.04800021648407, - "p99": 95.74399888515472 + "p50": 75.39200037717819, + "p90": 77.02399790287018, + "p95": 77.72800326347351, + "p99": 85.82399785518646 }, "roundtrip": { - "p50": 136.48000359535217, - "p90": 164.60800170898438, - "p95": 167.10400581359863, - "p99": 175.10400712490082 + "p50": 145.37599682807922, + "p90": 147.8399932384491, + "p95": 148.83199334144592, + "p99": 160.41600704193115 }, "isolatedSum": { - "p50": 161.21599823236465, - "p90": 192.57599860429764, - "p95": 195.51999866962433, - "p99": 208.92799645662308 + "p50": 139.615997672081, + "p90": 143.42399686574936, + "p95": 145.60000598430634, + "p99": 168.63999515771866 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2598,35 +2421,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 103.4879982471466, - "p90": 112.8000020980835, - "p95": 114.3679991364479, - "p99": 125.72799623012543 + "p50": 72.9919970035553, + "p90": 75.6480023264885, + "p95": 76.89599692821503, + "p99": 89.79199826717377 }, "combine": { - "p50": 96.83199971914291, - "p90": 104.12800312042236, - "p95": 104.99200224876404, - "p99": 106.33599758148193 + "p50": 89.24800157546997, + "p90": 91.2960022687912, + "p95": 92.99200028181076, + "p99": 104.76800054311752 }, "roundtrip": { - "p50": 170.71999609470367, - "p90": 181.21600151062012, - "p95": 182.91200697422028, - "p99": 186.81600689888 + "p50": 173.92000555992126, + "p90": 176.9919991493225, + "p95": 179.1040003299713, + "p99": 198.08000326156616 }, "isolatedSum": { - "p50": 200.31999796628952, - "p90": 216.92800521850586, - "p95": 219.36000138521194, - "p99": 232.06399381160736 + "p50": 162.23999857902527, + "p90": 166.9440045952797, + "p95": 169.8879972100258, + "p99": 194.5599988102913 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2635,31 +2458,31 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 111.29599809646606, - "p90": 130.87999820709229, - "p95": 133.5040032863617, - "p99": 139.93600010871887 + "p50": 83.90399813652039, + "p90": 86.65599673986435, + "p95": 87.96799927949905, + "p99": 94.2080020904541 }, "combine": { - "p50": 106.27199709415436, - "p90": 119.58400160074234, - "p95": 119.99999731779099, - "p99": 122.3360002040863 + "p50": 110.20799726247787, + "p90": 112.92800307273865, + "p95": 113.88800293207169, + "p99": 120.92799693346024 }, "roundtrip": { - "p50": 197.56799936294556, - "p90": 215.80800414085388, - "p95": 217.92000532150269, - "p99": 219.80799734592438 + "p50": 220.19200026988983, + "p90": 223.4240025281906, + "p95": 224.99200701713562, + "p99": 245.08799612522125 }, "isolatedSum": { - "p50": 217.56799519062042, - "p90": 250.46399980783463, - "p95": 253.50400060415268, - "p99": 262.2720003128052 + "p50": 194.11199539899826, + "p90": 199.583999812603, + "p95": 201.85600221157074, + "p99": 215.13599902391434 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, @@ -2671,32 +2494,33 @@ ] }, { - "id": "cx-2d0599c0", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", - "colorKey": "mi355x_2fa43515", - "comparisonKey": "2796ed88af4b14b0", + "id": "cx-0eafa1d5", + "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_c9569580", + "comparisonKey": "62e1e2299cdc509d", "schemaVersion": 3, - "generatedAt": "2026-06-26T15:40:45.756534+00:00", + "generatedAt": "2026-06-27T11:14:16.179311+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "mi355x-amds_04", - "sku": "mi355x", - "backend": "mori", + "publicationStatus": "official", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -2707,70 +2531,70 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "unknown", - "conformanceClass": "minimum-functional", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28247575150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", - "createdAt": "2026-06-26T15:22:26Z", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + "id": "28287508460", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460", + "createdAt": "2026-06-27T11:14:16.179311+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.55999964475632, - "p90": 43.15999895334244, - "p95": 44.881001114845276, - "p99": 47.55999892950058 + "p50": 56.992001831531525, + "p90": 59.328000992536545, + "p95": 62.55999952554703, + "p99": 80.38400113582611 }, "combine": { - "p50": 16.119999811053276, - "p90": 18.719999119639397, - "p95": 19.840000197291374, - "p99": 22.520000115036964 + "p50": 55.00800162553787, + "p90": 57.0559985935688, + "p95": 64.41599875688553, + "p99": 65.92000275850296 }, "roundtrip": { - "p50": 56.040000170469284, - "p90": 59.20000001788139, - "p95": 60.80099940299988, - "p99": 63.120998442173004 + "p50": 94.81599926948547, + "p90": 97.63199836015701, + "p95": 99.04000163078308, + "p99": 108.0000028014183 }, "isolatedSum": { - "p50": 56.67999945580959, - "p90": 61.879998072981834, - "p95": 64.72100131213665, - "p99": 70.07999904453754 + "p50": 112.0000034570694, + "p90": 116.38399958610535, + "p95": 126.97599828243256, + "p99": 146.30400389432907 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2779,35 +2603,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.55999997258186, - "p90": 45.441001653671265, - "p95": 47.040000557899475, - "p99": 49.959998577833176 + "p50": 56.89600110054016, + "p90": 59.039998799562454, + "p95": 61.15199998021126, + "p99": 82.04799890518188 }, "combine": { - "p50": 16.16000011563301, - "p90": 18.360000103712082, - "p95": 19.600000232458115, - "p99": 22.63999916613102 + "p50": 55.67999929189682, + "p90": 58.400001376867294, + "p95": 64.67200070619583, + "p99": 76.67200267314911 }, "roundtrip": { - "p50": 58.83999913930893, - "p90": 61.88099831342697, - "p95": 63.48100304603577, - "p99": 65.40100276470184 + "p50": 95.16800194978714, + "p90": 98.11200201511383, + "p95": 100.67199915647507, + "p99": 112.03200370073318 }, "isolatedSum": { - "p50": 58.720000088214874, - "p90": 63.80100175738335, - "p95": 66.64000079035759, - "p99": 72.5999977439642 + "p50": 112.57600039243698, + "p90": 117.44000017642975, + "p95": 125.82400068640709, + "p99": 158.720001578331 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2816,35 +2640,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 42.160000652074814, - "p90": 44.840000569820404, - "p95": 46.28000035881996, - "p99": 49.84100162982941 + "p50": 57.21599981188774, + "p90": 59.74400043487549, + "p95": 61.664000153541565, + "p99": 77.18399912118912 }, "combine": { - "p50": 19.039999693632126, - "p90": 22.1599992364645, - "p95": 23.48100021481514, - "p99": 54.63999882340431 + "p50": 56.063998490571976, + "p90": 58.14399942755699, + "p95": 64.92800265550613, + "p99": 78.68800312280655 }, "roundtrip": { - "p50": 61.59999966621399, - "p90": 64.71999734640121, - "p95": 65.76000154018402, - "p99": 68.36000084877014 + "p50": 95.74399888515472, + "p90": 98.78399968147278, + "p95": 103.26399654150009, + "p99": 113.0559965968132 }, "isolatedSum": { - "p50": 61.20000034570694, - "p90": 66.9999998062849, - "p95": 69.7610005736351, - "p99": 104.48100045323372 + "p50": 113.27999830245972, + "p90": 117.88799986243248, + "p95": 126.5920028090477, + "p99": 155.87200224399567 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2853,35 +2677,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.52000153064728, - "p90": 45.1200008392334, - "p95": 46.080999076366425, - "p99": 48.8400012254715 + "p50": 58.079998940229416, + "p90": 61.08799949288368, + "p95": 62.65600025653839, + "p99": 71.68000191450119 }, "combine": { - "p50": 20.479999482631683, - "p90": 22.520000115036964, - "p95": 23.479999974370003, - "p99": 25.800000876188278 + "p50": 64.44799900054932, + "p90": 66.23999774456024, + "p95": 66.59200042486191, + "p99": 69.023996591568 }, "roundtrip": { - "p50": 62.67999857664108, - "p90": 65.5599981546402, - "p95": 66.880002617836, - "p99": 68.56100261211395 + "p50": 108.8000014424324, + "p90": 113.95200341939926, + "p95": 114.84800279140472, + "p99": 122.72000312805176 }, "isolatedSum": { - "p50": 63.00000101327896, - "p90": 67.64000095427036, - "p95": 69.56099905073643, - "p99": 74.64000210165977 + "p50": 122.52799794077873, + "p90": 127.32799723744392, + "p95": 129.2480006814003, + "p99": 140.70399850606918 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2890,35 +2714,146 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.67999902367592, - "p90": 45.27999833226204, - "p95": 46.799998730421066, - "p99": 49.720000475645065 + "p50": 58.687999844551086, + "p90": 61.055999249219894, + "p95": 63.00800293684006, + "p99": 71.96799665689468 }, "combine": { - "p50": 24.921000003814697, - "p90": 27.240000665187836, - "p95": 28.07999961078167, - "p99": 30.27999959886074 + "p50": 57.82400071620941, + "p90": 66.3679987192154, + "p95": 66.81600213050842, + "p99": 77.98399776220322 }, "roundtrip": { - "p50": 67.9209977388382, - "p90": 71.04100286960602, - "p95": 72.12000340223312, - "p99": 74.08100366592407 + "p50": 111.39199882745743, + "p90": 122.04799801111221, + "p95": 126.5919953584671, + "p99": 132.86399841308594 }, "isolatedSum": { - "p50": 67.60099902749062, - "p90": 72.51999899744987, - "p95": 74.87999834120274, - "p99": 80.0000000745058 + "p50": 116.5120005607605, + "p90": 127.42399796843529, + "p95": 129.82400506734848, + "p99": 149.9519944190979 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.8480030298233, + "p90": 74.68800246715546, + "p95": 75.71200281381607, + "p99": 81.31200075149536 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 67.07199662923813, + "p95": 67.71200150251389, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 108.99200290441513, + "p90": 114.07999694347382, + "p95": 116.7680025100708, + "p99": 132.47999548912048 + }, + "isolatedSum": { + "p50": 137.15200126171112, + "p90": 141.75999909639359, + "p95": 143.42400431632996, + "p99": 158.4639996290207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 70.72000205516815, + "p90": 72.95999675989151, + "p95": 74.8480036854744, + "p99": 81.02399855852127 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 79.55200225114822, + "p95": 80.19199967384338, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 131.77600502967834, + "p90": 136.63999736309052, + "p95": 138.91200721263885, + "p99": 158.04800391197205 + }, + "isolatedSum": { + "p50": 149.47200566530228, + "p90": 152.51199901103973, + "p95": 155.04000335931778, + "p99": 176.9919991493225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.5280025601387, + "p90": 85.21600067615509, + "p95": 88.16000074148178, + "p99": 100.80000013113022 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 94.59199756383896, + "p95": 101.72799974679947, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 157.53600001335144, + "p90": 165.24800658226013, + "p95": 166.97600483894348, + "p99": 184.76800620555878 + }, + "isolatedSum": { + "p50": 174.30400103330612, + "p90": 179.80799823999405, + "p95": 189.88800048828125, + "p99": 205.72800189256668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2942,13 +2877,2793 @@ "run": { "id": "28254359089", "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", - "createdAt": "2026-06-26T17:27:42Z", + "createdAt": "2026-06-26T17:32:59.549027+00:00", "sha": "60dec7d70f554e252fec87709e2be52752947db1" } } ], - "scannedRuns": 12, - "scannedArtifacts": 10, - "contributingRuns": 9, - "generatedAt": "2026-06-27T00:53:59.155172+00:00" + "summaryCards": [ + { + "title": "Best backend · decode EP8", + "value": "flashinfer · B300", + "sub": "71 us RT p99 · mxfp8 · T=64" + }, + { + "title": "Best backend · prefill EP8", + "value": "flashinfer · B300", + "sub": "85 us RT p99 · nvfp4 · T=256" + }, + { + "title": "LL -> normal crossover", + "value": "T~128 tok/rank", + "sub": "H100 EP8 fp8 · normal RT p50 wins above this" + }, + { + "title": "Resource-normalized winner", + "value": "deepep · H100", + "sub": "113 us RT p99 · bf16 · T=64" + }, + { + "title": "Backend-default winner", + "value": "flashinfer · B300", + "sub": "71 us RT p99 · mxfp8 · T=64" + }, + { + "title": "Most unstable config", + "value": "H100 · deepep decode", + "sub": "3.27x p99 under zipf-heavy vs uniform", + "warning": true + }, + { + "title": "Invalid / diagnostic cases", + "value": "8", + "sub": "see Evidence failed table", + "warning": true, + "href": "#tab-evidence" + } + ], + "decision": { + "budgetsUs": [100, 250, 500], + "maxTokensUnderBudget": [ + { + "id": "cxb-3f6620d0", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-c27e2cad", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-567c4192", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-directcast", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-10314900", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-pertoken", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-238797ce", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 512 + } + }, + { + "id": "cxb-67e5feea", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 256 + } + } + ], + "recommendations": [ + { + "id": "cxr-d2992d7c", + "sku": "b300", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 71.4, + "config": "mxfp8/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-1c3060b2", + "sku": "b300", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 85, + "config": "nvfp4/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-8fcf986c", + "sku": "h100", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 53.1, + "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized", + "epSize": 8 + }, + { + "id": "cxr-466c0bc2", + "sku": "h100", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 104.6, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", + "epSize": 8 + } + ], + "llCrossover": [ + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + } + ], + "resourcePareto": [ + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.2, + "dispatch_p99": 93, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57, + "dispatch_p99": 73.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 80.9, + "dispatch_p99": 89.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57, + "dispatch_p99": 73.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.4, + "dispatch_p99": 107, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57.8, + "dispatch_p99": 68.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.4, + "dispatch_p99": 93.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 59.3, + "dispatch_p99": 68.2, + "resource_class": "resource-constrained" + } + ] + } + ], + "topologyPenalty": [ + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "ep8_p50": 123.6, + "ep16_p50": 578.4, + "penalty_pct": 367.9 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "ep8_p50": 114.8, + "ep16_p50": 547.2, + "penalty_pct": 376.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "ep8_p50": 111.7, + "ep16_p50": 621.5, + "penalty_pct": 456.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "ep8_p50": 112.8, + "ep16_p50": 611.8, + "penalty_pct": 442.2 + } + ], + "skewPenalty": [ + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.694, + "p99_amplification": 0.867 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.695, + "p99_amplification": 0.811 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.697, + "p99_amplification": 0.683 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.716, + "p99_amplification": 0.76 + } + ] + }, + "nccl": [ + { + "id": "cxn-940e3e1c", + "identity": "nccl|b300|all_reduce|b300-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_03", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "139076c9959b0653", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 218.816, + "status": "valid", + "valid": true, + "colorKey": "b300_940e3e1c", + "label": "B300 · b300-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:24.142157+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:24.142157+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 28.3, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 28.3, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 27.27, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.27, + "inPlaceUs": 27.06, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 27.25, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.25, + "inPlaceUs": 27.3, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 27.32, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.32, + "inPlaceUs": 27.28, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 27.42, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 27.42, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 27.26, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 27.26, + "inPlaceUs": 27.32, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 27.16, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 27.16, + "inPlaceUs": 27.38, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 27.33, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 27.33, + "inPlaceUs": 27.14, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 27.36, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 27.36, + "inPlaceUs": 27.33, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 27.3, + "algBandwidthGbps": 0.15, + "busBandwidthGbps": 0.26, + "outOfPlaceUs": 27.3, + "inPlaceUs": 27.35, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 27.52, + "algBandwidthGbps": 0.3, + "busBandwidthGbps": 0.52, + "outOfPlaceUs": 27.52, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 27.64, + "algBandwidthGbps": 0.59, + "busBandwidthGbps": 1.04, + "outOfPlaceUs": 27.64, + "inPlaceUs": 27.61, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 27.76, + "algBandwidthGbps": 1.18, + "busBandwidthGbps": 2.07, + "outOfPlaceUs": 27.76, + "inPlaceUs": 27.85, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 27.7, + "algBandwidthGbps": 2.37, + "busBandwidthGbps": 4.14, + "outOfPlaceUs": 28.19, + "inPlaceUs": 27.7, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 28.21, + "algBandwidthGbps": 4.65, + "busBandwidthGbps": 8.13, + "outOfPlaceUs": 28.59, + "inPlaceUs": 28.21, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 28.56, + "algBandwidthGbps": 9.18, + "busBandwidthGbps": 16.06, + "outOfPlaceUs": 29.16, + "inPlaceUs": 28.56, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 29.89, + "algBandwidthGbps": 17.54, + "busBandwidthGbps": 30.7, + "outOfPlaceUs": 29.89, + "inPlaceUs": 29.93, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 32.16, + "algBandwidthGbps": 32.61, + "busBandwidthGbps": 57.06, + "outOfPlaceUs": 32.16, + "inPlaceUs": 32.67, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 37.47, + "algBandwidthGbps": 55.97, + "busBandwidthGbps": 97.94, + "outOfPlaceUs": 37.47, + "inPlaceUs": 38.07, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 56.79, + "algBandwidthGbps": 73.86, + "busBandwidthGbps": 129.26, + "outOfPlaceUs": 56.88, + "inPlaceUs": 56.79, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 77.08, + "algBandwidthGbps": 108.83, + "busBandwidthGbps": 190.45, + "outOfPlaceUs": 78.24, + "inPlaceUs": 77.08, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 104.77, + "algBandwidthGbps": 160.14, + "busBandwidthGbps": 280.24, + "outOfPlaceUs": 106.93, + "inPlaceUs": 104.77, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 166.18, + "algBandwidthGbps": 201.91, + "busBandwidthGbps": 353.34, + "outOfPlaceUs": 168.44, + "inPlaceUs": 166.18, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 274.52, + "algBandwidthGbps": 244.46, + "busBandwidthGbps": 427.8, + "outOfPlaceUs": 274.52, + "inPlaceUs": 275.23, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 391.34, + "algBandwidthGbps": 342.97, + "busBandwidthGbps": 600.19, + "outOfPlaceUs": 391.34, + "inPlaceUs": 392.6, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 711.09, + "algBandwidthGbps": 377.5, + "busBandwidthGbps": 660.62, + "outOfPlaceUs": 711.09, + "inPlaceUs": 712.3, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1324.96, + "algBandwidthGbps": 405.2, + "busBandwidthGbps": 709.1, + "outOfPlaceUs": 1324.96, + "inPlaceUs": 1327.33, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2553.96, + "algBandwidthGbps": 420.42, + "busBandwidthGbps": 735.74, + "outOfPlaceUs": 2558.96, + "inPlaceUs": 2553.96, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4571.5, + "algBandwidthGbps": 469.75, + "busBandwidthGbps": 822.07, + "outOfPlaceUs": 4576.46, + "inPlaceUs": 4571.5, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9024.56, + "algBandwidthGbps": 475.92, + "busBandwidthGbps": 832.86, + "outOfPlaceUs": 9034.78, + "inPlaceUs": 9024.56, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 17971.9, + "algBandwidthGbps": 477.96, + "busBandwidthGbps": 836.44, + "outOfPlaceUs": 17991.5, + "inPlaceUs": 17971.9, + "correct": true + } + ] + }, + { + "id": "cxn-fd5a787b", + "identity": "allreduce-fw|b300|flashinfer-oneshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "81bfaa10f5beda36", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_fd5a787b", + "label": "B300 · flashinfer-oneshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 11.661, + "algBandwidthGbps": 0.351, + "busBandwidthGbps": 0.615, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 11.601, + "algBandwidthGbps": 1.412, + "busBandwidthGbps": 2.472, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 12.381, + "algBandwidthGbps": 5.293, + "busBandwidthGbps": 9.263, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 14.274, + "algBandwidthGbps": 18.365, + "busBandwidthGbps": 32.139, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 23.854, + "algBandwidthGbps": 43.958, + "busBandwidthGbps": 76.926, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 75.394, + "algBandwidthGbps": 55.632, + "busBandwidthGbps": 97.356, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 244.644, + "algBandwidthGbps": 68.578, + "busBandwidthGbps": 120.011, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 956.149, + "algBandwidthGbps": 70.187, + "busBandwidthGbps": 122.827, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-087af4ad", + "identity": "allreduce-fw|b300|flashinfer-twoshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "183298dcd11c3e1e", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_087af4ad", + "label": "B300 · flashinfer-twoshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 385.191, + "algBandwidthGbps": 0.17, + "busBandwidthGbps": 0.298, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 118.644, + "algBandwidthGbps": 2.209, + "busBandwidthGbps": 3.867, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 47.46, + "algBandwidthGbps": 22.094, + "busBandwidthGbps": 38.664, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 43.002, + "algBandwidthGbps": 97.537, + "busBandwidthGbps": 170.69, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 90.81, + "algBandwidthGbps": 184.75, + "busBandwidthGbps": 323.313, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 353.165, + "algBandwidthGbps": 190.021, + "busBandwidthGbps": 332.537, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-a8203ce9", + "identity": "nccl|b300|all_gather|b300-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "b300", + "runner": "b300-nv_03", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "e6eafb7204b78dd3", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 186.922, + "status": "valid", + "valid": true, + "colorKey": "b300_a8203ce9", + "label": "B300 · b300-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:41.342024+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:41.342024+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 27.36, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.36, + "inPlaceUs": 27.26, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 26.88, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 26.88, + "inPlaceUs": 26.89, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 27.11, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 27.11, + "inPlaceUs": 27.07, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 26.64, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 26.64, + "inPlaceUs": 26.87, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 27.03, + "algBandwidthGbps": 0.08, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 27.03, + "inPlaceUs": 26.8, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 26.95, + "algBandwidthGbps": 0.15, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 26.95, + "inPlaceUs": 27.51, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 26.84, + "algBandwidthGbps": 0.31, + "busBandwidthGbps": 0.27, + "outOfPlaceUs": 27.05, + "inPlaceUs": 26.84, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 27.2, + "algBandwidthGbps": 0.6, + "busBandwidthGbps": 0.53, + "outOfPlaceUs": 27.2, + "inPlaceUs": 26.86, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 26.68, + "algBandwidthGbps": 1.23, + "busBandwidthGbps": 1.07, + "outOfPlaceUs": 26.98, + "inPlaceUs": 26.68, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 26.75, + "algBandwidthGbps": 2.45, + "busBandwidthGbps": 2.14, + "outOfPlaceUs": 26.89, + "inPlaceUs": 26.75, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 27.63, + "algBandwidthGbps": 4.74, + "busBandwidthGbps": 4.15, + "outOfPlaceUs": 27.63, + "inPlaceUs": 27.81, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 28.34, + "algBandwidthGbps": 9.25, + "busBandwidthGbps": 8.09, + "outOfPlaceUs": 28.34, + "inPlaceUs": 28.46, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 29.45, + "algBandwidthGbps": 17.8, + "busBandwidthGbps": 15.58, + "outOfPlaceUs": 29.49, + "inPlaceUs": 29.45, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 31.36, + "algBandwidthGbps": 33.43, + "busBandwidthGbps": 29.25, + "outOfPlaceUs": 31.51, + "inPlaceUs": 31.36, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 35.8, + "algBandwidthGbps": 58.58, + "busBandwidthGbps": 51.26, + "outOfPlaceUs": 35.94, + "inPlaceUs": 35.8, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 36.17, + "algBandwidthGbps": 115.95, + "busBandwidthGbps": 101.45, + "outOfPlaceUs": 36.29, + "inPlaceUs": 36.17, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 36.99, + "algBandwidthGbps": 226.76, + "busBandwidthGbps": 198.42, + "outOfPlaceUs": 37.02, + "inPlaceUs": 36.99, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 47.07, + "algBandwidthGbps": 356.41, + "busBandwidthGbps": 311.86, + "outOfPlaceUs": 47.08, + "inPlaceUs": 47.07, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 74.95, + "algBandwidthGbps": 447.68, + "busBandwidthGbps": 391.72, + "outOfPlaceUs": 75.78, + "inPlaceUs": 74.95, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 138.64, + "algBandwidthGbps": 484.06, + "busBandwidthGbps": 423.55, + "outOfPlaceUs": 139.26, + "inPlaceUs": 138.64, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 211.47, + "algBandwidthGbps": 634.68, + "busBandwidthGbps": 555.34, + "outOfPlaceUs": 211.47, + "inPlaceUs": 211.53, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 399.32, + "algBandwidthGbps": 672.24, + "busBandwidthGbps": 588.21, + "outOfPlaceUs": 399.32, + "inPlaceUs": 399.95, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 779.11, + "algBandwidthGbps": 689.08, + "busBandwidthGbps": 602.95, + "outOfPlaceUs": 779.96, + "inPlaceUs": 779.11, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 1532.87, + "algBandwidthGbps": 700.48, + "busBandwidthGbps": 612.92, + "outOfPlaceUs": 1533.45, + "inPlaceUs": 1532.87, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 3010.48, + "algBandwidthGbps": 713.34, + "busBandwidthGbps": 624.17, + "outOfPlaceUs": 3010.48, + "inPlaceUs": 3011.29, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 5911.41, + "algBandwidthGbps": 726.55, + "busBandwidthGbps": 635.74, + "outOfPlaceUs": 5949.57, + "inPlaceUs": 5911.41, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 11675.3, + "algBandwidthGbps": 735.74, + "busBandwidthGbps": 643.77, + "outOfPlaceUs": 11728.1, + "inPlaceUs": 11675.3, + "correct": true + } + ] + }, + { + "id": "cxn-17454439", + "identity": "nccl|h100|all_gather|h100-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "h100", + "runner": "h100-dgxc-slurm_09", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "dacea770825df094", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 110.587, + "status": "valid", + "valid": true, + "colorKey": "h100_17454439", + "label": "H100 · h100-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:57.699787+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:57.699787+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 40.4, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 40.4, + "inPlaceUs": 39.34, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 38.62, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.62, + "inPlaceUs": 38.09, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 38.41, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.41, + "inPlaceUs": 38.32, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.68, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 38.68, + "inPlaceUs": 37.58, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 37.29, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 37.29, + "inPlaceUs": 37.12, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 37.53, + "algBandwidthGbps": 0.11, + "busBandwidthGbps": 0.1, + "outOfPlaceUs": 37.53, + "inPlaceUs": 37.17, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 37.52, + "algBandwidthGbps": 0.22, + "busBandwidthGbps": 0.19, + "outOfPlaceUs": 37.52, + "inPlaceUs": 37.53, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 37.13, + "algBandwidthGbps": 0.44, + "busBandwidthGbps": 0.39, + "outOfPlaceUs": 37.13, + "inPlaceUs": 37.09, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 37.43, + "algBandwidthGbps": 0.88, + "busBandwidthGbps": 0.77, + "outOfPlaceUs": 37.43, + "inPlaceUs": 37.42, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 37.64, + "algBandwidthGbps": 1.74, + "busBandwidthGbps": 1.52, + "outOfPlaceUs": 37.64, + "inPlaceUs": 37.63, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 38.19, + "algBandwidthGbps": 3.43, + "busBandwidthGbps": 3, + "outOfPlaceUs": 38.48, + "inPlaceUs": 38.19, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 39.66, + "algBandwidthGbps": 6.61, + "busBandwidthGbps": 5.78, + "outOfPlaceUs": 39.66, + "inPlaceUs": 40.15, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 41.79, + "algBandwidthGbps": 12.55, + "busBandwidthGbps": 10.98, + "outOfPlaceUs": 42.17, + "inPlaceUs": 41.79, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 43.89, + "algBandwidthGbps": 23.89, + "busBandwidthGbps": 20.9, + "outOfPlaceUs": 45.09, + "inPlaceUs": 43.89, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 44.32, + "algBandwidthGbps": 47.31, + "busBandwidthGbps": 41.4, + "outOfPlaceUs": 44.55, + "inPlaceUs": 44.32, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 44.97, + "algBandwidthGbps": 93.27, + "busBandwidthGbps": 81.61, + "outOfPlaceUs": 44.97, + "inPlaceUs": 45, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 45.6, + "algBandwidthGbps": 183.98, + "busBandwidthGbps": 160.98, + "outOfPlaceUs": 46.08, + "inPlaceUs": 45.6, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 67.94, + "algBandwidthGbps": 246.95, + "busBandwidthGbps": 216.08, + "outOfPlaceUs": 70.1, + "inPlaceUs": 67.94, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 122.71, + "algBandwidthGbps": 273.44, + "busBandwidthGbps": 239.26, + "outOfPlaceUs": 125.34, + "inPlaceUs": 122.71, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 206.56, + "algBandwidthGbps": 324.88, + "busBandwidthGbps": 284.27, + "outOfPlaceUs": 210.98, + "inPlaceUs": 206.56, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 390.25, + "algBandwidthGbps": 343.93, + "busBandwidthGbps": 300.94, + "outOfPlaceUs": 396.19, + "inPlaceUs": 390.25, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 728.52, + "algBandwidthGbps": 368.47, + "busBandwidthGbps": 322.41, + "outOfPlaceUs": 733.59, + "inPlaceUs": 728.52, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1394.3, + "algBandwidthGbps": 385.05, + "busBandwidthGbps": 336.92, + "outOfPlaceUs": 1397.39, + "inPlaceUs": 1394.3, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2705.03, + "algBandwidthGbps": 396.94, + "busBandwidthGbps": 347.33, + "outOfPlaceUs": 2729.3, + "inPlaceUs": 2705.03, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 5306.37, + "algBandwidthGbps": 404.7, + "busBandwidthGbps": 354.11, + "outOfPlaceUs": 5374.68, + "inPlaceUs": 5306.37, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 10451.7, + "algBandwidthGbps": 410.93, + "busBandwidthGbps": 359.57, + "outOfPlaceUs": 10616.4, + "inPlaceUs": 10451.7, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 20734.1, + "algBandwidthGbps": 414.29, + "busBandwidthGbps": 362.5, + "outOfPlaceUs": 21013.2, + "inPlaceUs": 20734.1, + "correct": true + } + ] + } + ], + "offload": [ + { + "id": "cxt-2254035a", + "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pageable|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_2254035a", + "label": "B300 · d2h · pageable", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.319, + "latency": 12.8224, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.197, + "latency": 13.6896, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.07, + "latency": 16.1008, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.171, + "latency": 25.7744, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 16.232, + "latency": 64.5984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 22.845, + "latency": 183.6016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 25.057, + "latency": 669.5584, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 25.741, + "latency": 2607.0801, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 25.884, + "latency": 10370.5231, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-ec9c695d", + "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pinned|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_ec9c695d", + "label": "B300 · d2h · pinned", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.314, + "latency": 3.1168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.911, + "latency": 3.336, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 16.26, + "latency": 4.0304, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.371, + "latency": 7.4112, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.656, + "latency": 21.1168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.179, + "latency": 76.0128, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 56.698, + "latency": 295.9056, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.243, + "latency": 1172.3568, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.376, + "latency": 4678.5118, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-0325201a", + "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pageable|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_0325201a", + "label": "B300 · h2d · pageable", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.48, + "latency": 8.5408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.73, + "latency": 9.4704, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.35, + "latency": 15.0656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.573, + "latency": 22.6512, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 19.272, + "latency": 54.408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 32.974, + "latency": 127.2, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 38.009, + "latency": 441.4016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 39.678, + "latency": 1691.3168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 40.13, + "latency": 6689.2288, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-6112e71d", + "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pinned|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_6112e71d", + "label": "B300 · h2d · pinned", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.204, + "latency": 3.4032, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.481, + "latency": 3.656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 15.087, + "latency": 4.344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 32.966, + "latency": 7.952, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.231, + "latency": 21.2992, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.149, + "latency": 76.0544, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 57.026, + "latency": 294.2016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.572, + "latency": 1165.6432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.71, + "latency": 4651.4656, + "sizeClass": null, + "correct": null + } + ] + } + ], + "copyEngine": [ + { + "id": "cxt-6e3131b7", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_6e3131b7", + "label": "B300 · dtod · copy-engine", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.729, + "latency": 8.4789, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.851, + "latency": 8.2304, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 131.475, + "latency": 7.9755, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 506.069, + "latency": 8.288, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2092.131, + "latency": 8.0192, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8232.735, + "latency": 8.1515, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 33743.395, + "latency": 7.9552, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-214329f7", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|sm|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_214329f7", + "label": "B300 · dtod · sm", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.772, + "latency": 8.432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.011, + "latency": 8.4533, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 127.139, + "latency": 8.2475, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 515.355, + "latency": 8.1387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2004.925, + "latency": 8.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8245.683, + "latency": 8.1387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32844.98, + "latency": 8.1728, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-64e7ea33", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|copy-engine|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_64e7ea33", + "label": "B300 · htod · copy-engine", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 8.922, + "latency": 7.3451, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.159, + "latency": 7.456, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 139.617, + "latency": 7.5104, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 525.479, + "latency": 7.9819, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2004.925, + "latency": 8.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8211.245, + "latency": 8.1728, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32556.046, + "latency": 8.2453, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-4b3f523b", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|sm|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_4b3f523b", + "label": "B300 · htod · sm", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.918, + "latency": 8.2773, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.703, + "latency": 8.2688, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 127.9, + "latency": 8.1984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 509.743, + "latency": 8.2283, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2022.716, + "latency": 8.2944, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8166.48, + "latency": 8.2176, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32413.478, + "latency": 8.2816, + "sizeClass": null, + "correct": null + } + ] + } + ], + "kvCache": [ + { + "id": "cxt-72e44191", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_72e44191", + "label": "B300 · dtod-local · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.86, + "latency": 0.00337, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 18.31, + "latency": 0.00358, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 79.48, + "latency": 0.0033, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 315.89, + "latency": 0.00332, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 1140.42, + "latency": 0.00368, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2696.03, + "latency": 0.00622, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 2724.4, + "latency": 0.02463, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 3189.99, + "latency": 0.08415, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-0198272e", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_0198272e", + "label": "B300 · dtod-local · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.27, + "latency": 0.005, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 13.15, + "latency": 0.00498, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 13.46, + "latency": 0.01948, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 13.76, + "latency": 0.07619, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 13.84, + "latency": 0.30311, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 13.87, + "latency": 1.20968, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 13.83, + "latency": 4.85211, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 13.89, + "latency": 19.32599, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-65e093de", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_65e093de", + "label": "B300 · dtod-remote · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.08, + "latency": 0.01514, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.52, + "latency": 0.01451, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 17.43, + "latency": 0.01504, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 67.07, + "latency": 0.01563, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 205.84, + "latency": 0.02038, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 409.12, + "latency": 0.04101, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 644.24, + "latency": 0.10417, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 736.42, + "latency": 0.36451, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-502d7923", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_502d7923", + "label": "B300 · dtod-remote · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.11, + "latency": 0.01473, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.35, + "latency": 0.01507, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 4.3, + "latency": 0.06098, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.27, + "latency": 0.24556, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.26, + "latency": 0.98559, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.24, + "latency": 3.9593, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4.27, + "latency": 15.72352, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 4.25, + "latency": 63.14588, + "sizeClass": "prefill", + "correct": true + } + ] + } + ], + "rlMesh": [ + { + "id": "cxt-e28663d4", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_e28663d4", + "label": "B300 · gen->trn · paired", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 22.43, + "latency": 0.04675, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 210.03, + "latency": 0.01997, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 444.24, + "latency": 0.03777, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 613.35, + "latency": 0.10941, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 672.64, + "latency": 0.39908, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 681.89, + "latency": 1.57465, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-abc63f3d", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_abc63f3d", + "label": "B300 · gen->trn · redistribute", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.02, + "latency": 44.24712, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 56.86, + "latency": 0.07377, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 168.78, + "latency": 0.0994, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 466.61, + "latency": 0.14382, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 565.6, + "latency": 0.4746, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 656.22, + "latency": 1.63626, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-08ab0854", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_08ab0854", + "label": "B300 · trn->gen · paired", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.13, + "latency": 0.12892, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 161.07, + "latency": 0.02604, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 455.8, + "latency": 0.03681, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 613.96, + "latency": 0.10931, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 670.34, + "latency": 0.40045, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 681.46, + "latency": 1.57564, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-bea1bfbd", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_bea1bfbd", + "label": "B300 · trn->gen · redistribute", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.01, + "latency": 74.91642, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 66.21, + "latency": 0.06334, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 295.56, + "latency": 0.05676, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 581.82, + "latency": 0.11534, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 543.6, + "latency": 0.49381, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 659.57, + "latency": 1.62794, + "sizeClass": null, + "correct": true + } + ] + } + ], + "scannedRuns": 3, + "scannedArtifacts": 42, + "contributingRuns": 3, + "generatedAt": "2026-06-29T02:42:52.989Z" } diff --git a/packages/app/public/data/collectivex.json b/packages/app/public/data/collectivex.json index cebcf471..2b44f424 100644 --- a/packages/app/public/data/collectivex.json +++ b/packages/app/public/data/collectivex.json @@ -1,16 +1,16 @@ { - "snapshotVersion": 2, + "snapshotVersion": 3, "series": [ { - "id": "cx-3f6620d0", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "id": "cx-0eafa1d5", + "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", "colorKey": "b300_c9569580", - "comparisonKey": "11fb97077712804e", + "comparisonKey": "62e1e2299cdc509d", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:34.883169+00:00", + "generatedAt": "2026-06-27T11:14:16.179311+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_05", + "runner": "b300-nv_14", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -24,17 +24,18 @@ "worldSize": 8, "epSize": 8, "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { @@ -54,8 +55,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -63,336 +64,266 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272154473", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272154473", - "createdAt": "2026-06-27T00:05:17Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287508460", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460", + "createdAt": "2026-06-27T11:14:16.179311+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.40800127387047, - "p90": 59.26400050520897, - "p95": 61.055999249219894, - "p99": 69.66400146484375 + "p50": 56.992001831531525, + "p90": 59.328000992536545, + "p95": 62.55999952554703, + "p99": 80.38400113582611 }, "combine": { - "p50": 66.30399823188782, - "p90": 67.32799857854843, - "p95": 68.25599819421768, - "p99": 77.02399790287018 + "p50": 55.00800162553787, + "p90": 57.0559985935688, + "p95": 64.41599875688553, + "p99": 65.92000275850296 }, "roundtrip": { - "p50": 106.88000172376633, - "p90": 111.35999858379364, - "p95": 112.96000331640244, - "p99": 129.31199371814728 + "p50": 94.81599926948547, + "p90": 97.63199836015701, + "p95": 99.04000163078308, + "p99": 108.0000028014183 }, "isolatedSum": { - "p50": 123.71199950575829, - "p90": 126.5919990837574, - "p95": 129.31199744343758, - "p99": 146.68799936771393 + "p50": 112.0000034570694, + "p90": 116.38399958610535, + "p95": 126.97599828243256, + "p99": 146.30400389432907 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 58.33600088953972, - "p90": 60.67200005054474, - "p95": 62.68800050020218, - "p99": 68.15999746322632 + "p50": 56.89600110054016, + "p90": 59.039998799562454, + "p95": 61.15199998021126, + "p99": 82.04799890518188 }, "combine": { - "p50": 67.84000247716904, - "p90": 77.2159993648529, - "p95": 77.88799703121185, - "p99": 78.75200361013412 + "p50": 55.67999929189682, + "p90": 58.400001376867294, + "p95": 64.67200070619583, + "p99": 76.67200267314911 }, "roundtrip": { - "p50": 121.88799679279327, - "p90": 125.05599856376648, - "p95": 126.08000636100769, - "p99": 136.99199259281158 + "p50": 95.16800194978714, + "p90": 98.11200201511383, + "p95": 100.67199915647507, + "p99": 112.03200370073318 }, "isolatedSum": { - "p50": 126.17600336670876, - "p90": 137.88799941539764, - "p95": 140.57599753141403, - "p99": 146.91200107336044 + "p50": 112.57600039243698, + "p90": 117.44000017642975, + "p95": 125.82400068640709, + "p99": 158.720001578331 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 69.85600292682648, - "p90": 74.27199929952621, - "p95": 75.3600001335144, - "p99": 82.97599852085114 + "p50": 57.21599981188774, + "p90": 59.74400043487549, + "p95": 61.664000153541565, + "p99": 77.18399912118912 }, "combine": { - "p50": 78.52800190448761, - "p90": 79.19999957084656, - "p95": 79.99999821186066, - "p99": 82.8159973025322 + "p50": 56.063998490571976, + "p90": 58.14399942755699, + "p95": 64.92800265550613, + "p99": 78.68800312280655 }, "roundtrip": { - "p50": 131.3599944114685, - "p90": 135.903999209404, - "p95": 136.76799833774567, - "p99": 147.5519984960556 + "p50": 95.74399888515472, + "p90": 98.78399968147278, + "p95": 103.26399654150009, + "p99": 113.0559965968132 }, "isolatedSum": { - "p50": 148.3840048313141, - "p90": 153.47199887037277, - "p95": 155.35999834537506, - "p99": 165.79199582338333 + "p50": 113.27999830245972, + "p90": 117.88799986243248, + "p95": 126.5920028090477, + "p99": 155.87200224399567 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 94.24000233411789, - "p90": 96.79999947547913, - "p95": 99.39199686050415, - "p99": 103.74400019645691 + "p50": 58.079998940229416, + "p90": 61.08799949288368, + "p95": 62.65600025653839, + "p99": 71.68000191450119 }, "combine": { - "p50": 115.35999923944473, - "p90": 116.12799763679504, - "p95": 116.73600226640701, - "p99": 127.29600071907043 + "p50": 64.44799900054932, + "p90": 66.23999774456024, + "p95": 66.59200042486191, + "p99": 69.023996591568 }, "roundtrip": { - "p50": 193.4400051832199, - "p90": 198.91199469566345, - "p95": 199.71199333667755, - "p99": 208.3200067281723 + "p50": 108.8000014424324, + "p90": 113.95200341939926, + "p95": 114.84800279140472, + "p99": 122.72000312805176 }, "isolatedSum": { - "p50": 209.60000157356262, - "p90": 212.92799711227417, - "p95": 216.12799912691116, - "p99": 231.04000091552734 + "p50": 122.52799794077873, + "p90": 127.32799723744392, + "p95": 129.2480006814003, + "p99": 140.70399850606918 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-854f00de", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "afbd085a57d290fd", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:27.937449+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_17", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271865772", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271865772", - "createdAt": "2026-06-26T23:56:07Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 55.58399856090546, - "p90": 57.40800127387047, - "p95": 59.13599953055382, - "p99": 65.63200056552887 + "p50": 58.687999844551086, + "p90": 61.055999249219894, + "p95": 63.00800293684006, + "p99": 71.96799665689468 }, "combine": { - "p50": 66.14399701356888, - "p90": 67.55200028419495, - "p95": 68.38399916887283, - "p99": 77.2159993648529 + "p50": 57.82400071620941, + "p90": 66.3679987192154, + "p95": 66.81600213050842, + "p99": 77.98399776220322 }, "roundtrip": { - "p50": 105.18400371074677, - "p90": 111.29599809646606, - "p95": 113.50400000810623, - "p99": 132.1280002593994 + "p50": 111.39199882745743, + "p90": 122.04799801111221, + "p95": 126.5919953584671, + "p99": 132.86399841308594 }, "isolatedSum": { - "p50": 121.72799557447433, - "p90": 124.96000155806541, - "p95": 127.51999869942665, - "p99": 142.84799993038177 + "p50": 116.5120005607605, + "p90": 127.42399796843529, + "p95": 129.82400506734848, + "p99": 149.9519944190979 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 58.400001376867294, - "p90": 60.99199876189232, - "p95": 62.880001962184906, - "p99": 73.05599749088287 + "p50": 70.8480030298233, + "p90": 74.68800246715546, + "p95": 75.71200281381607, + "p99": 81.31200075149536 }, "combine": { - "p50": 67.29599833488464, - "p90": 77.15199887752533, - "p95": 77.72800326347351, - "p99": 79.64800298213959 + "p50": 66.30399823188782, + "p90": 67.07199662923813, + "p95": 67.71200150251389, + "p99": 77.15199887752533 }, "roundtrip": { - "p50": 117.95199662446976, - "p90": 122.72000312805176, - "p95": 123.9359974861145, - "p99": 138.46400380134583 + "p50": 108.99200290441513, + "p90": 114.07999694347382, + "p95": 116.7680025100708, + "p99": 132.47999548912048 }, "isolatedSum": { - "p50": 125.69599971175194, - "p90": 138.14399763941765, - "p95": 140.60800522565842, - "p99": 152.70400047302246 + "p50": 137.15200126171112, + "p90": 141.75999909639359, + "p95": 143.42400431632996, + "p99": 158.4639996290207 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 67.9360032081604, - "p90": 71.16799801588058, - "p95": 73.72800260782242, - "p99": 86.5280032157898 + "p50": 70.72000205516815, + "p90": 72.95999675989151, + "p95": 74.8480036854744, + "p99": 81.02399855852127 }, "combine": { - "p50": 77.95199751853943, - "p90": 79.19999957084656, - "p95": 80.06399869918823, - "p99": 83.8719978928566 + "p50": 78.75200361013412, + "p90": 79.55200225114822, + "p95": 80.19199967384338, + "p99": 95.96800059080124 }, "roundtrip": { - "p50": 128.7039965391159, - "p90": 131.1360001564026, - "p95": 132.76800513267517, - "p99": 140.6400054693222 + "p50": 131.77600502967834, + "p90": 136.63999736309052, + "p95": 138.91200721263885, + "p99": 158.04800391197205 }, "isolatedSum": { - "p50": 145.88800072669983, - "p90": 150.36799758672714, - "p95": 153.79200130701065, - "p99": 170.4000011086464 + "p50": 149.47200566530228, + "p90": 152.51199901103973, + "p95": 155.04000335931778, + "p99": 176.9919991493225 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -402,34 +333,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 92.70399808883667, - "p90": 97.63199836015701, - "p95": 99.71199929714203, - "p99": 135.42400300502777 + "p50": 82.5280025601387, + "p90": 85.21600067615509, + "p95": 88.16000074148178, + "p99": 100.80000013113022 }, "combine": { - "p50": 114.78400230407715, - "p90": 116.70400202274323, - "p95": 118.97599697113037, - "p99": 164.0319973230362 + "p50": 91.77599847316742, + "p90": 94.59199756383896, + "p95": 101.72799974679947, + "p99": 104.92800176143646 }, "roundtrip": { - "p50": 190.62399864196777, - "p90": 196.60800695419312, - "p95": 197.66399264335632, - "p99": 203.99999618530273 + "p50": 157.53600001335144, + "p90": 165.24800658226013, + "p95": 166.97600483894348, + "p99": 184.76800620555878 }, "isolatedSum": { - "p50": 207.48800039291382, - "p90": 214.33600038290024, - "p95": 218.6879962682724, - "p99": 299.45600032806396 + "p50": 174.30400103330612, + "p90": 179.80799823999405, + "p95": 189.88800048828125, + "p99": 205.72800189256668 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -438,15 +369,15 @@ ] }, { - "id": "cx-2fa7319c", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "b300_c9569580", - "comparisonKey": "89fa2de88509570c", + "id": "cx-73ede381", + "identity": "b300|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_307ed708", + "comparisonKey": "29583b2aa22167e0", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:54:19.552522+00:00", + "generatedAt": "2026-06-27T09:51:53.146142+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_01", + "runner": "b300-nv_04", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -454,16 +385,17 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -490,8 +422,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -499,45 +431,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28273513209", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273513209", - "createdAt": "2026-06-27T00:53:00Z", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + "id": "28285698979", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285698979", + "createdAt": "2026-06-27T09:51:53.146142+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.41600117087364, - "p90": 58.46399813890457, - "p95": 60.95999851822853, - "p99": 71.55200093984604 + "p50": 55.776000022888184, + "p90": 58.14399942755699, + "p95": 59.93599817156792, + "p99": 65.95200300216675 }, "combine": { - "p50": 66.27199798822403, - "p90": 67.55200028419495, - "p95": 68.28799843788147, - "p99": 77.27999985218048 + "p50": 54.71999943256378, + "p90": 56.063998490571976, + "p95": 57.151999324560165, + "p99": 65.69600105285645 }, "roundtrip": { - "p50": 105.85600137710571, - "p90": 112.28799819946289, - "p95": 113.3119985461235, - "p99": 124.09599870443344 + "p50": 93.31200271844864, + "p90": 95.96800059080124, + "p95": 98.01600128412247, + "p99": 104.86400127410889 }, "isolatedSum": { - "p50": 122.68799915909767, - "p90": 126.01599842309952, - "p95": 129.24799695611, - "p99": 148.83200079202652 + "p50": 110.49599945545197, + "p90": 114.20799791812897, + "p95": 117.08799749612808, + "p99": 131.6480040550232 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -546,35 +478,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.60799890756607, - "p90": 58.04799869656563, - "p95": 59.39200147986412, - "p99": 63.64800035953522 + "p50": 56.09599873423576, + "p90": 57.792000472545624, + "p95": 59.55199897289276, + "p99": 68.15999746322632 }, "combine": { - "p50": 67.03999638557434, - "p90": 68.7360018491745, - "p95": 69.15199756622314, - "p99": 77.2159993648529 + "p50": 55.07199838757515, + "p90": 56.671999394893646, + "p95": 57.28000029921532, + "p99": 65.2799978852272 }, "roundtrip": { - "p50": 107.04000294208527, - "p90": 109.76000130176544, - "p95": 111.35999858379364, - "p99": 119.19999867677689 + "p50": 95.39200365543365, + "p90": 101.79200023412704, + "p95": 102.55999863147736, + "p99": 108.0000028014183 }, "isolatedSum": { - "p50": 123.64799529314041, - "p90": 126.78400054574013, - "p95": 128.54399904608727, - "p99": 140.86399972438812 + "p50": 111.16799712181091, + "p90": 114.46399986743927, + "p95": 116.83199927210808, + "p99": 133.43999534845352 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, + "recvTokensMax": 12, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -583,35 +515,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 58.81600081920624, - "p90": 64.44799900054932, - "p95": 66.01600348949432, - "p99": 71.61600142717361 + "p50": 56.0000017285347, + "p90": 57.88800120353699, + "p95": 59.67999994754791, + "p99": 68.7360018491745 }, "combine": { - "p50": 67.26399809122086, - "p90": 69.63200122117996, - "p95": 77.15199887752533, - "p99": 78.91199737787247 + "p50": 56.12799897789955, + "p90": 65.37599861621857, + "p95": 65.72800129652023, + "p99": 66.97600334882736 }, "roundtrip": { - "p50": 122.20799922943115, - "p90": 125.18399953842163, - "p95": 125.91999769210815, - "p99": 130.3360015153885 + "p50": 105.18400371074677, + "p90": 111.10399663448334, + "p95": 112.2559979557991, + "p99": 115.10399729013443 }, "isolatedSum": { - "p50": 126.0799989104271, - "p90": 134.08000022172928, - "p95": 143.16800236701965, - "p99": 150.52799880504608 + "p50": 112.12800070643425, + "p90": 123.26399981975555, + "p95": 125.40800124406815, + "p99": 135.71200519800186 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -620,35 +552,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.42400172352791, - "p90": 64.25599753856659, - "p95": 67.87200272083282, - "p99": 74.62400197982788 + "p50": 56.86400085687637, + "p90": 58.88000130653381, + "p95": 60.54399907588959, + "p99": 67.6800012588501 }, "combine": { - "p50": 68.9919963479042, - "p90": 78.015998005867, - "p95": 78.62400263547897, - "p99": 81.88799768686295 + "p50": 65.24799764156342, + "p90": 66.46399945020676, + "p95": 66.81600213050842, + "p99": 70.62400132417679 }, "roundtrip": { - "p50": 119.39200013875961, - "p90": 125.05599856376648, - "p95": 126.17599964141846, - "p99": 130.36799430847168 + "p50": 105.79200088977814, + "p90": 112.35199868679047, + "p95": 112.83200234174728, + "p99": 116.48000031709671 }, "isolatedSum": { - "p50": 128.4159980714321, - "p90": 142.2719955444336, - "p95": 146.4960053563118, - "p99": 156.51199966669083 + "p50": 122.11199849843979, + "p90": 125.34400075674057, + "p95": 127.36000120639801, + "p99": 138.3040025830269 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -657,35 +589,146 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 66.68800115585327, - "p90": 73.7600028514862, - "p95": 75.13599842786789, - "p99": 80.35200089216232 + "p50": 58.6559996008873, + "p90": 60.5119988322258, + "p95": 63.00800293684006, + "p99": 79.0719985961914 }, "combine": { - "p50": 69.88800317049026, - "p90": 78.5600021481514, - "p95": 78.75200361013412, - "p99": 82.56000280380249 + "p50": 65.31199812889099, + "p90": 66.43199920654297, + "p95": 66.97600334882736, + "p99": 69.40799951553345 }, "roundtrip": { - "p50": 119.26399916410446, - "p90": 121.47200107574463, - "p95": 123.52000176906586, - "p99": 127.68000364303589 + "p50": 105.85600137710571, + "p90": 107.87200182676315, + "p95": 109.66400057077408, + "p99": 115.64800143241882 }, "isolatedSum": { - "p50": 136.57600432634354, - "p90": 152.3200049996376, - "p95": 153.888002038002, - "p99": 162.9120036959648 + "p50": 123.96799772977829, + "p90": 126.94399803876877, + "p95": 129.98400628566742, + "p99": 148.47999811172485 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.80000299215317, + "p90": 75.83999633789062, + "p95": 76.60800218582153, + "p99": 84.63999629020691 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 67.03999638557434, + "p95": 67.55200028419495, + "p99": 69.47200000286102 + }, + "roundtrip": { + "p50": 109.24799740314484, + "p90": 115.32799899578094, + "p95": 116.57600104808807, + "p99": 131.32800161838531 + }, + "isolatedSum": { + "p50": 139.0720009803772, + "p90": 142.87999272346497, + "p95": 144.16000247001648, + "p99": 154.11199629306793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 69.66400146484375, + "p90": 71.9040036201477, + "p95": 73.05599749088287, + "p99": 79.64800298213959 + }, + "combine": { + "p50": 78.65600287914276, + "p90": 79.68000322580338, + "p95": 80.1599994301796, + "p99": 89.59999680519104 + }, + "roundtrip": { + "p50": 130.8480054140091, + "p90": 134.33599472045898, + "p95": 137.92000710964203, + "p99": 152.12799608707428 + }, + "isolatedSum": { + "p50": 148.3200043439865, + "p90": 151.58400684595108, + "p95": 153.21599692106247, + "p99": 169.24799978733063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.30400085449219, + "p90": 86.14400029182434, + "p95": 90.7519981265068, + "p99": 98.59199821949005 + }, + "combine": { + "p50": 91.90399944782257, + "p90": 95.0080007314682, + "p95": 101.6639992594719, + "p99": 102.52799838781357 + }, + "roundtrip": { + "p50": 166.81599617004395, + "p90": 173.88799786567688, + "p95": 175.32800137996674, + "p99": 189.4720047712326 + }, + "isolatedSum": { + "p50": 174.20800030231476, + "p90": 181.15200102329254, + "p95": 192.4159973859787, + "p99": 201.11999660730362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -693,12 +736,12 @@ ] }, { - "id": "cx-dc6ca42c", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "8a9fa1be98f83eb3", + "id": "cx-b2b86614", + "identity": "b300|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_307ed708", + "comparisonKey": "246ad32f5ce8e310", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:17.025326+00:00", + "generatedAt": "2026-06-27T09:52:24.032758+00:00", "status": "valid", "publicationStatus": "official", "runner": "b300-nv_14", @@ -709,23 +752,24 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, "label": "B300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { @@ -745,8 +789,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -754,336 +798,266 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272146490", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272146490", - "createdAt": "2026-06-27T00:05:03Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285710659", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285710659", + "createdAt": "2026-06-27T09:52:24.032758+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 55.84000051021576, - "p90": 57.95200169086456, - "p95": 60.54399907588959, - "p99": 68.09599697589874 + "p50": 56.03199824690819, + "p90": 58.75200033187866, + "p95": 61.69600039720535, + "p99": 71.03999704122543 }, "combine": { - "p50": 66.20799750089645, - "p90": 66.94400310516357, - "p95": 67.52000004053116, - "p99": 90.87999910116196 + "p50": 55.424001067876816, + "p90": 57.5999990105629, + "p95": 64.7680014371872, + "p99": 65.5359998345375 }, "roundtrip": { - "p50": 106.04800283908844, - "p90": 111.07199639081955, - "p95": 112.67200112342834, + "p50": 94.59199756383896, + "p90": 97.85600006580353, + "p95": 101.85600072145462, "p99": 125.15200674533844 }, "isolatedSum": { - "p50": 122.04799801111221, - "p90": 124.89600479602814, - "p95": 128.06399911642075, - "p99": 158.9759960770607 + "p50": 111.455999314785, + "p90": 116.35199934244156, + "p95": 126.46400183439255, + "p99": 136.57599687576294 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 59.07199904322624, - "p90": 62.3680017888546, - "p95": 65.08799642324448, - "p99": 71.00799679756165 + "p50": 55.93600124120712, + "p90": 57.88800120353699, + "p95": 59.74400043487549, + "p99": 67.26399809122086 }, "combine": { - "p50": 69.18399780988693, - "p90": 78.14399898052216, - "p95": 78.59200239181519, - "p99": 88.22400122880936 + "p50": 56.28800019621849, + "p90": 65.63200056552887, + "p95": 66.17599725723267, + "p99": 76.60800218582153 }, "roundtrip": { - "p50": 119.07199770212173, - "p90": 124.32000041007996, - "p95": 125.37600100040436, - "p99": 140.06400108337402 + "p50": 104.09600287675858, + "p90": 111.10399663448334, + "p95": 112.12799698114395, + "p99": 116.95999652147293 }, "isolatedSum": { - "p50": 128.25599685311317, - "p90": 140.51200076937675, - "p95": 143.67999881505966, - "p99": 159.231998026371 + "p50": 112.22400143742561, + "p90": 123.52000176906586, + "p95": 125.91999769210815, + "p99": 143.8720002770424 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 68.70400160551071, - "p90": 73.66400212049484, - "p95": 75.13599842786789, - "p99": 93.56799721717834 + "p50": 55.93600124120712, + "p90": 57.920001447200775, + "p95": 59.10399928689003, + "p99": 65.92000275850296 }, "combine": { - "p50": 78.62400263547897, - "p90": 79.6160027384758, - "p95": 81.44000172615051, - "p99": 91.48799628019333 + "p50": 65.37599861621857, + "p90": 66.39999896287918, + "p95": 66.52799993753433, + "p99": 69.72800195217133 }, "roundtrip": { - "p50": 130.65600395202637, - "p90": 135.71199774742126, - "p95": 136.76799833774567, - "p99": 144.1279947757721 + "p50": 105.85600137710571, + "p90": 112.86400258541107, + "p95": 113.72800171375275, + "p99": 131.42399489879608 }, "isolatedSum": { - "p50": 147.32800424098969, - "p90": 153.28000485897064, - "p95": 156.5760001540184, - "p99": 185.05599349737167 + "p50": 121.31199985742569, + "p90": 124.32000041007996, + "p95": 125.63199922442436, + "p99": 135.6480047106743 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 93.08800101280212, - "p90": 98.78399968147278, - "p95": 100.63999891281128, - "p99": 110.17599701881409 + "p50": 57.18399956822395, + "p90": 59.13599953055382, + "p95": 60.92799827456474, + "p99": 75.55200159549713 }, "combine": { - "p50": 115.39199948310852, - "p90": 116.28799885511398, - "p95": 117.21599847078323, - "p99": 126.39999389648438 + "p50": 66.0799965262413, + "p90": 66.65600091218948, + "p95": 67.52000004053116, + "p99": 81.02399855852127 }, "roundtrip": { - "p50": 192.25600361824036, - "p90": 198.2080042362213, - "p95": 198.7839937210083, - "p99": 203.61599326133728 + "p50": 105.43999820947647, + "p90": 107.96800255775452, + "p95": 109.98400300741196, + "p99": 118.04799735546112 }, "isolatedSum": { - "p50": 208.48000049591064, - "p90": 215.07199853658676, - "p95": 217.8559973835945, - "p99": 236.57599091529846 + "p50": 123.26399609446526, + "p90": 125.7920004427433, + "p95": 128.4479983150959, + "p99": 156.5760001540184 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-a995e296", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "fe9431c5beaaf675", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:39.072562+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272150514", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272150514", - "createdAt": "2026-06-27T00:05:10Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1758.687973022461, - "p90": 2565.7920837402344, - "p95": 2910.815954208374, - "p99": 3400.576114654541 + "p50": 58.36800113320351, + "p90": 60.47999858856201, + "p95": 62.272001057863235, + "p99": 68.09599697589874 }, "combine": { - "p50": 1759.8719596862793, - "p90": 1907.871961593628, - "p95": 2670.1760292053223, - "p99": 2940.095901489258 + "p50": 66.14399701356888, + "p90": 66.84800237417221, + "p95": 67.4239993095398, + "p99": 76.76800340414047 }, "roundtrip": { - "p50": 1802.39999294281, - "p90": 1987.0719909667969, - "p95": 2666.1760807037354, - "p99": 2924.000024795532 + "p50": 106.36799782514572, + "p90": 108.67200046777725, + "p95": 110.97600311040878, + "p99": 117.76000261306763 }, "isolatedSum": { - "p50": 3518.5599327087402, - "p90": 4473.664045333862, - "p95": 5580.991983413696, - "p99": 6340.672016143799 + "p50": 124.51199814677238, + "p90": 127.32800096273422, + "p95": 129.69600036740303, + "p99": 144.86400038003922 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1754.8799514770508, - "p90": 2488.703966140747, - "p95": 2823.359966278076, - "p99": 3391.4880752563477 + "p50": 74.5600014925003, + "p90": 76.4160007238388, + "p95": 77.08799839019775, + "p99": 81.85599744319916 }, "combine": { - "p50": 1760.4479789733887, - "p90": 1861.184000968933, - "p95": 2647.264003753662, - "p99": 2955.8401107788086 + "p50": 67.32799857854843, + "p90": 71.45600020885468, + "p95": 77.2159993648529, + "p99": 90.01599997282028 }, "roundtrip": { - "p50": 1819.2960023880005, - "p90": 1958.5280418395996, - "p95": 2686.271905899048, - "p99": 2968.319892883301 + "p50": 119.32799965143204, + "p90": 125.2480000257492, + "p95": 126.17599964141846, + "p99": 128.9599984884262 }, "isolatedSum": { - "p50": 3515.3279304504395, - "p90": 4349.88796710968, - "p95": 5470.623970031738, - "p99": 6347.328186035156 + "p50": 141.88800007104874, + "p90": 147.87200093269348, + "p95": 154.30399775505066, + "p99": 171.87199741601944 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1767.3920392990112, - "p90": 2204.767942428589, - "p95": 2829.9520015716553, - "p99": 3398.303985595703 + "p50": 71.42399996519089, + "p90": 78.46400141716003, + "p95": 79.9039974808693, + "p99": 101.79200023412704 }, "combine": { - "p50": 1764.0960216522217, - "p90": 1887.1040344238281, - "p95": 2647.615909576416, - "p99": 3015.5839920043945 + "p50": 80.06399869918823, + "p90": 83.16799998283386, + "p95": 89.6959975361824, + "p99": 93.44000369310379 }, "roundtrip": { - "p50": 1835.6800079345703, - "p90": 1997.1840381622314, - "p95": 2681.3440322875977, - "p99": 2967.072010040283 + "p50": 147.2640037536621, + "p90": 150.11200308799744, + "p95": 151.58399939537048, + "p99": 160.3199988603592 }, "isolatedSum": { - "p50": 3531.488060951233, - "p90": 4091.871976852417, - "p95": 5477.567911148071, - "p99": 6413.887977600098 + "p50": 151.48799866437912, + "p90": 161.6320013999939, + "p95": 169.5999950170517, + "p99": 195.23200392723083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -1093,35 +1067,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 1790.7520532608032, - "p90": 2270.848035812378, - "p95": 2845.247983932495, - "p99": 3459.712028503418 + "p50": 90.84799885749817, + "p90": 98.39999675750732, + "p95": 100.35199671983719, + "p99": 104.73600029945374 }, "combine": { - "p50": 1809.7599744796753, - "p90": 1956.9599628448486, - "p95": 2685.7919692993164, - "p99": 3029.952049255371 + "p50": 102.9760017991066, + "p90": 103.93600165843964, + "p95": 104.67199981212616, + "p99": 114.62400108575821 }, "roundtrip": { - "p50": 1890.3039693832397, - "p90": 2169.4719791412354, - "p95": 2888.256072998047, - "p99": 3985.24808883667 + "p50": 170.01600563526154, + "p90": 178.20799350738525, + "p95": 180.09600043296814, + "p99": 193.31200420856476 }, "isolatedSum": { - "p50": 3600.5120277404785, - "p90": 4227.807998657227, - "p95": 5531.0399532318115, - "p99": 6489.664077758789 + "p50": 193.82400065660477, + "p90": 202.33599841594696, + "p95": 205.02399653196335, + "p99": 219.36000138521194 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1129,15 +1103,15 @@ ] }, { - "id": "cx-b81422f4", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "id": "cx-24853ec9", + "identity": "b300|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", "colorKey": "b300_c9569580", - "comparisonKey": "d97d7a8231265a6c", + "comparisonKey": "862206160efb203e", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:13.336317+00:00", + "generatedAt": "2026-06-27T11:13:44.096050+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_13", + "runner": "b300-nv_11", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -1151,8 +1125,9 @@ "worldSize": 8, "epSize": 8, "label": "B300 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -1161,7 +1136,7 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "zeros", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { @@ -1181,8 +1156,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -1190,42 +1165,42 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272142980", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272142980", - "createdAt": "2026-06-27T00:04:57Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287497246", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287497246", + "createdAt": "2026-06-27T11:13:44.096050+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.63999915122986, - "p90": 59.26400050520897, - "p95": 62.04799935221672, - "p99": 73.85600358247757 + "p50": 57.440001517534256, + "p90": 59.29600074887276, + "p95": 61.28000095486641, + "p99": 64.41599875688553 }, "combine": { - "p50": 66.43199920654297, - "p90": 67.4239993095398, - "p95": 68.25599819421768, - "p99": 78.04799824953079 - }, + "p50": 65.8240020275116, + "p90": 67.07199662923813, + "p95": 67.19999760389328, + "p99": 77.47200131416321 + }, "roundtrip": { - "p50": 106.78400099277496, - "p90": 111.39199882745743, - "p95": 113.34399878978729, - "p99": 117.0239970088005 + "p50": 108.25599730014801, + "p90": 113.3119985461235, + "p95": 114.30399864912033, + "p99": 123.71200323104858 }, "isolatedSum": { - "p50": 123.07199835777283, - "p90": 126.68799981474876, - "p95": 130.3039975464344, - "p99": 151.90400183200836 + "p50": 123.26400354504585, + "p90": 126.36799737811089, + "p95": 128.4799985587597, + "p99": 141.88800007104874 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, "fanoutMean": 5.5, "recvTokensMax": 7, "stragglerRank": 7, @@ -1233,39 +1208,150 @@ "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.66399949789047, + "p90": 60.28800085186958, + "p95": 62.94400244951248, + "p99": 71.1359977722168 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 67.16799736022949, + "p95": 67.26399809122086, + "p99": 69.63200122117996 + }, + "roundtrip": { + "p50": 107.4879989027977, + "p90": 113.15199732780457, + "p95": 114.17599767446518, + "p99": 118.9119964838028 + }, + "isolatedSum": { + "p50": 123.90399724245071, + "p90": 127.45599821209908, + "p95": 130.20800054073334, + "p99": 140.76799899339676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.328000992536545, + "p90": 62.17600032687187, + "p95": 65.05600363016129, + "p99": 70.01599669456482 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 68.12799721956253, + "p95": 69.11999732255936, + "p99": 77.27999985218048 + }, + "roundtrip": { + "p50": 108.57599973678589, + "p90": 115.58400094509125, + "p95": 118.1119978427887, + "p99": 128.76799702644348 + }, + "isolatedSum": { + "p50": 126.14400312304497, + "p90": 130.3039975464344, + "p95": 134.17600095272064, + "p99": 147.2959965467453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.55199897289276, - "p90": 61.824001371860504, - "p95": 63.680000603199005, - "p99": 71.07199728488922 + "p50": 59.776000678539276, + "p90": 62.111999839544296, + "p95": 63.90400230884552, + "p99": 70.14399766921997 }, "combine": { - "p50": 68.92800331115723, - "p90": 77.7600035071373, - "p95": 77.95199751853943, - "p99": 78.65600287914276 + "p50": 67.87200272083282, + "p90": 76.19199901819229, + "p95": 77.18399912118912, + "p99": 79.55200225114822 }, "roundtrip": { - "p50": 120.03199756145477, - "p90": 124.4800016283989, - "p95": 125.95200538635254, - "p99": 145.53600549697876 + "p50": 116.31999909877777, + "p90": 122.43200093507767, + "p95": 124.60800260305405, + "p99": 131.77600502967834 }, "isolatedSum": { - "p50": 128.48000228405, - "p90": 139.5840048789978, - "p95": 141.63199812173843, - "p99": 149.72800016403198 + "p50": 127.6480033993721, + "p90": 138.3039988577366, + "p95": 141.08800143003464, + "p99": 149.6959999203682 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 64.19199705123901, + "p90": 69.34399902820587, + "p95": 71.80800288915634, + "p99": 79.52000200748444 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 76.48000121116638, + "p95": 76.9599974155426, + "p99": 79.0719985961914 + }, + "roundtrip": { + "p50": 121.5360015630722, + "p90": 126.52799487113953, + "p95": 127.3919939994812, + "p99": 137.9839926958084 + }, + "isolatedSum": { + "p50": 132.6719969511032, + "p90": 145.82400023937225, + "p95": 148.76800030469894, + "p99": 158.59200060367584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1274,32 +1360,32 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 68.83200258016586, - "p90": 72.38399982452393, - "p95": 75.16799867153168, - "p99": 78.17599922418594 + "p50": 73.53600114583969, + "p90": 77.18399912118912, + "p95": 78.14399898052216, + "p99": 88.128000497818 }, "combine": { - "p50": 78.65600287914276, - "p90": 79.71200346946716, - "p95": 80.57600259780884, - "p99": 100.92800110578537 + "p50": 77.504001557827, + "p90": 79.19999957084656, + "p95": 79.45600152015686, + "p99": 80.25600016117096 }, "roundtrip": { - "p50": 130.72000443935394, - "p90": 134.2719942331314, - "p95": 135.74400544166565, - "p99": 155.7759940624237 + "p50": 123.64800274372101, + "p90": 128.38399410247803, + "p95": 131.1360001564026, + "p99": 140.4159963130951 }, "isolatedSum": { - "p50": 147.48800545930862, - "p90": 152.0960032939911, - "p95": 155.74400126934052, - "p99": 179.1040003299713 + "p50": 151.0400027036667, + "p90": 156.38399869203568, + "p95": 157.60000050067902, + "p99": 168.38400065898895 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, "fanoutMean": 5.375, "recvTokensMax": 182, "stragglerRank": 7, @@ -1307,39 +1393,76 @@ "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.28800171613693, + "p90": 90.91199934482574, + "p95": 91.90399944782257, + "p99": 103.90400141477585 + }, + "combine": { + "p50": 90.91199934482574, + "p90": 91.93599969148636, + "p95": 92.47999638319016, + "p99": 103.2319962978363 + }, + "roundtrip": { + "p50": 147.96799421310425, + "p90": 153.18399667739868, + "p95": 155.4879993200302, + "p99": 161.69600188732147 + }, + "isolatedSum": { + "p50": 179.20000106096268, + "p90": 182.8479990363121, + "p95": 184.38399583101273, + "p99": 207.13599771261215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 93.34400296211243, - "p90": 95.93600034713745, - "p95": 99.2640033364296, - "p99": 107.61599987745285 + "p50": 100.00000149011612, + "p90": 102.68799960613251, + "p95": 103.4879982471466, + "p99": 112.86400258541107 }, "combine": { - "p50": 115.4559999704361, - "p90": 116.44800007343292, - "p95": 117.0559972524643, - "p99": 126.43200159072876 + "p50": 105.82400113344193, + "p90": 114.46399986743927, + "p95": 115.03999680280685, + "p99": 118.23999881744385 }, "roundtrip": { - "p50": 192.9599940776825, - "p90": 198.81600141525269, - "p95": 199.8080015182495, - "p99": 274.1439938545227 + "p50": 185.82400679588318, + "p90": 190.14400243759155, + "p95": 191.00800156593323, + "p99": 196.8960016965866 }, "isolatedSum": { - "p50": 208.80000293254852, - "p90": 212.38400042057037, - "p95": 216.3200005888939, - "p99": 234.0480014681816 + "p50": 205.82400262355804, + "p90": 217.15199947357178, + "p95": 218.52799504995346, + "p99": 231.10400140285492 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1347,15 +1470,15 @@ ] }, { - "id": "cx-a22ca77b", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", - "colorKey": "b300_77566238", - "comparisonKey": "08fb0b4fb4077abb", + "id": "cx-c0dba141", + "identity": "b300|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_307ed708", + "comparisonKey": "62d01cd02a49457a", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:04.079730+00:00", + "generatedAt": "2026-06-27T09:52:49.194497+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_02", + "runner": "b300-nv_01", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -1363,18 +1486,19 @@ "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced", + "label": "B300 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -1399,8 +1523,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2279937619f3971", - "workloadId": "set:4:7af12818400d6348", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -1408,10 +1532,10 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271873027", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271873027", - "createdAt": "2026-06-26T23:56:21Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285721110", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285721110", + "createdAt": "2026-06-27T09:52:49.194497+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { @@ -1420,33 +1544,107 @@ "dispatch": { "p50": 56.41600117087364, "p90": 58.848001062870026, - "p95": 61.216000467538834, - "p99": 80.25600016117096 + "p95": 63.07200342416763, + "p99": 79.29600030183792 }, "combine": { - "p50": 67.6800012588501, - "p90": 69.60000097751617, - "p95": 76.73600316047668, - "p99": 82.62400329113007 + "p50": 65.24799764156342, + "p90": 66.14399701356888, + "p95": 66.3359984755516, + "p99": 68.96000355482101 }, "roundtrip": { - "p50": 106.49599879980087, - "p90": 109.27999764680862, + "p50": 104.76800054311752, + "p90": 111.35999858379364, + "p95": 112.09599673748016, + "p99": 115.7120019197464 + }, + "isolatedSum": { + "p50": 121.66399881243706, + "p90": 124.9919980764389, + "p95": 129.40800189971924, + "p99": 148.25600385665894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 55.87200075387955, + "p90": 57.53599852323532, + "p95": 58.94400179386139, + "p99": 66.91200286149979 + }, + "combine": { + "p50": 65.69600105285645, + "p90": 66.3359984755516, + "p95": 66.68800115585327, + "p99": 78.20799946784973 + }, + "roundtrip": { + "p50": 105.56799918413162, + "p90": 110.23999750614166, "p95": 111.13599687814713, - "p99": 124.1919994354248 + "p99": 129.5360028743744 }, "isolatedSum": { - "p50": 124.09600242972374, - "p90": 128.4480020403862, - "p95": 137.95200362801552, - "p99": 162.88000345230103 + "p50": 121.56800180673599, + "p90": 123.87199699878693, + "p95": 125.63200294971466, + "p99": 145.12000232934952 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 56.73599988222122, + "p90": 58.36800113320351, + "p95": 59.42400172352791, + "p99": 68.4799998998642 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 67.16799736022949, + "p95": 68.15999746322632, + "p99": 78.17599922418594 + }, + "roundtrip": { + "p50": 106.59199953079224, + "p90": 109.43999886512756, + "p95": 111.84000223875046, + "p99": 120.7680031657219 + }, + "isolatedSum": { + "p50": 122.8799968957901, + "p90": 125.535998493433, + "p95": 127.58399918675423, + "p99": 146.65599912405014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1455,35 +1653,72 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.43200162053108, - "p90": 60.70400029420853, - "p95": 62.6240000128746, - "p99": 78.65600287914276 + "p50": 57.631999254226685, + "p90": 60.19200012087822, + "p95": 61.59999966621399, + "p99": 72.83200323581696 }, "combine": { - "p50": 77.98399776220322, - "p90": 78.72000336647034, - "p95": 78.84799689054489, - "p99": 81.4720019698143 + "p50": 66.78400188684464, + "p90": 68.31999868154526, + "p95": 69.50400024652481, + "p99": 77.82399654388428 }, "roundtrip": { - "p50": 118.07999759912491, - "p90": 122.91199713945389, - "p95": 124.1919994354248, - "p99": 131.99999928474426 + "p50": 115.68000167608261, + "p90": 122.11199849843979, + "p95": 123.03999811410904, + "p99": 135.04000008106232 }, "isolatedSum": { - "p50": 136.4159993827343, - "p90": 139.42400366067886, - "p95": 141.4719969034195, - "p99": 160.12800484895706 + "p50": 124.41600114107132, + "p90": 128.51199880242348, + "p95": 131.1039999127388, + "p99": 150.65599977970123 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 58.94400179386139, + "p90": 64.89600241184235, + "p95": 65.88800251483917, + "p99": 72.4480003118515 + }, + "combine": { + "p50": 67.64800101518631, + "p90": 76.25599950551987, + "p95": 76.92799717187881, + "p99": 78.43200117349625 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 124.35200065374374, + "p95": 125.40799379348755, + "p99": 136.73600554466248 + }, + "isolatedSum": { + "p50": 126.5920028090477, + "p90": 141.1520019173622, + "p95": 142.815999686718, + "p99": 150.88000148534775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1492,72 +1727,109 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.82400268316269, - "p90": 71.87200337648392, - "p95": 73.7600028514862, - "p99": 84.25600081682205 + "p50": 68.54400038719177, + "p90": 75.42400062084198, + "p95": 75.9039968252182, + "p99": 78.015998005867 }, "combine": { - "p50": 79.16799932718277, - "p90": 81.08799904584885, - "p95": 81.91999793052673, - "p99": 90.71999788284302 + "p50": 77.56800204515457, + "p90": 78.36800068616867, + "p95": 78.52800190448761, + "p99": 80.70400357246399 }, "roundtrip": { - "p50": 133.82400572299957, - "p90": 140.09599387645721, - "p95": 141.92000031471252, - "p99": 145.82400023937225 + "p50": 125.34399330615997, + "p90": 131.84000551700592, + "p95": 133.53599607944489, + "p99": 144.22400295734406 }, "isolatedSum": { - "p50": 148.99200201034546, - "p90": 152.96000242233276, - "p95": 155.68000078201294, - "p99": 174.97599869966507 + "p50": 146.11200243234634, + "p90": 153.79200130701065, + "p95": 154.4319987297058, + "p99": 158.720001578331 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.91999858617783, + "p90": 88.79999816417694, + "p95": 89.56799656152725, + "p99": 97.69599884748459 + }, + "combine": { + "p50": 90.2400016784668, + "p90": 92.28800237178802, + "p95": 94.97600048780441, + "p99": 102.36799716949463 + }, + "roundtrip": { + "p50": 149.79200065135956, + "p90": 162.81600296497345, + "p95": 167.4560010433197, + "p99": 173.66400361061096 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 181.08800053596497, + "p95": 184.54399704933167, + "p99": 200.06399601697922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.76000064611435, - "p90": 107.71200060844421, - "p95": 109.02400314807892, - "p99": 114.78400230407715 + "p50": 98.7199991941452, + "p90": 101.21600329875946, + "p95": 102.14400291442871, + "p99": 113.24799805879593 }, "combine": { - "p50": 130.36799430847168, - "p90": 139.615997672081, - "p95": 140.03199338912964, - "p99": 143.13599467277527 + "p50": 105.56799918413162, + "p90": 113.98400366306305, + "p95": 114.49600011110306, + "p99": 114.94400352239609 }, "roundtrip": { - "p50": 230.68800568580627, - "p90": 234.52800512313843, - "p95": 235.55199801921844, - "p99": 240.09600281715393 + "p50": 184.4159960746765, + "p90": 188.92799317836761, + "p95": 190.08000195026398, + "p99": 197.24799692630768 }, "isolatedSum": { - "p50": 236.12799495458603, - "p90": 247.3279982805252, - "p95": 249.05599653720856, - "p99": 257.9199969768524 + "p50": 204.28799837827682, + "p90": 215.2000069618225, + "p95": 216.64000302553177, + "p99": 228.19200158119202 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1565,15 +1837,15 @@ ] }, { - "id": "cx-c5ecae32", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", - "colorKey": "b300_a314501b", - "comparisonKey": "a145623f8abcc709", + "id": "cx-3f6620d0", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "11fb97077712804e", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:12.406102+00:00", + "generatedAt": "2026-06-27T00:06:34.883169+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_06", + "runner": "b300-nv_05", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -1586,18 +1858,19 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "fp8-saturation", "combineQuantMode": "none" }, "resourceProfile": { @@ -1617,8 +1890,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d02a66236b524b8", - "workloadId": "set:4:2eebbed158fe1320", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -1626,9 +1899,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271879618", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271879618", - "createdAt": "2026-06-26T23:56:35Z", + "id": "28272154473", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272154473", + "createdAt": "2026-06-27T00:06:34.883169+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -1636,35 +1909,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 62.97600269317627, - "p90": 65.21599739789963, - "p95": 66.01600348949432, - "p99": 75.74400305747986 - }, - "combine": { - "p50": 54.336000233888626, - "p90": 55.26399984955788, - "p95": 56.60799890756607, - "p99": 65.5359998345375 + "p50": 57.40800127387047, + "p90": 59.26400050520897, + "p95": 61.055999249219894, + "p99": 69.66400146484375 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 67.32799857854843, + "p95": 68.25599819421768, + "p99": 77.02399790287018 }, "roundtrip": { - "p50": 94.94400024414062, - "p90": 98.27200323343277, - "p95": 100.63999891281128, - "p99": 111.93600296974182 + "p50": 106.88000172376633, + "p90": 111.35999858379364, + "p95": 112.96000331640244, + "p99": 129.31199371814728 }, "isolatedSum": { - "p50": 117.3120029270649, - "p90": 120.4799972474575, - "p95": 122.6240023970604, - "p99": 141.28000289201736 + "p50": 123.71199950575829, + "p90": 126.5919990837574, + "p95": 129.31199744343758, + "p99": 146.68799936771393 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1673,34 +1946,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.39200147986412, - "p90": 61.63199990987778, - "p95": 62.65600025653839, - "p99": 71.68000191450119 + "p50": 58.33600088953972, + "p90": 60.67200005054474, + "p95": 62.68800050020218, + "p99": 68.15999746322632 }, "combine": { - "p50": 56.73599988222122, - "p90": 65.34399837255478, - "p95": 65.95200300216675, - "p99": 85.4400023818016 + "p50": 67.84000247716904, + "p90": 77.2159993648529, + "p95": 77.88799703121185, + "p99": 78.75200361013412 }, "roundtrip": { - "p50": 108.57599973678589, - "p90": 113.56800049543381, - "p95": 114.84800279140472, - "p99": 120.12799829244614 + "p50": 121.88799679279327, + "p90": 125.05599856376648, + "p95": 126.08000636100769, + "p99": 136.99199259281158 }, "isolatedSum": { - "p50": 116.12800136208534, - "p90": 126.97599828243256, - "p95": 128.60800325870514, - "p99": 157.1200042963028 + "p50": 126.17600336670876, + "p90": 137.88799941539764, + "p95": 140.57599753141403, + "p99": 146.91200107336044 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -1710,35 +1983,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.72000205516815, - "p90": 76.57600194215775, - "p95": 77.88799703121185, - "p99": 85.31200140714645 + "p50": 69.85600292682648, + "p90": 74.27199929952621, + "p95": 75.3600001335144, + "p99": 82.97599852085114 }, "combine": { - "p50": 66.6240006685257, - "p90": 67.32799857854843, - "p95": 67.61600077152252, - "p99": 78.84799689054489 + "p50": 78.52800190448761, + "p90": 79.19999957084656, + "p95": 79.99999821186066, + "p99": 82.8159973025322 }, "roundtrip": { - "p50": 120.51200121641159, - "p90": 123.99999797344208, - "p95": 124.64000284671783, - "p99": 130.0159990787506 + "p50": 131.3599944114685, + "p90": 135.903999209404, + "p95": 136.76799833774567, + "p99": 147.5519984960556 }, "isolatedSum": { - "p50": 137.34400272369385, - "p90": 143.90400052070618, - "p95": 145.50399780273438, - "p99": 164.15999829769135 + "p50": 148.3840048313141, + "p90": 153.47199887037277, + "p95": 155.35999834537506, + "p99": 165.79199582338333 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 1, - "recvTokensMax": 32, - "stragglerRank": 5, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1747,34 +2020,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 70.11199742555618, - "p90": 71.87200337648392, - "p95": 73.79200309515, - "p99": 79.64800298213959 + "p50": 94.24000233411789, + "p90": 96.79999947547913, + "p95": 99.39199686050415, + "p99": 103.74400019645691 }, "combine": { - "p50": 68.35199892520905, - "p90": 70.04799693822861, - "p95": 76.92799717187881, - "p99": 79.1039988398552 + "p50": 115.35999923944473, + "p90": 116.12799763679504, + "p95": 116.73600226640701, + "p99": 127.29600071907043 }, "roundtrip": { - "p50": 122.23999947309494, - "p90": 129.5360028743744, - "p95": 131.32800161838531, - "p99": 142.87999272346497 + "p50": 193.4400051832199, + "p90": 198.91199469566345, + "p95": 199.71199333667755, + "p99": 208.3200067281723 }, "isolatedSum": { - "p50": 138.46399635076523, - "p90": 141.92000031471252, - "p95": 150.7200002670288, - "p99": 158.75200182199478 + "p50": 209.60000157356262, + "p90": 212.92799711227417, + "p95": 216.12799912691116, + "p99": 231.04000091552734 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -1783,15 +2056,15 @@ ] }, { - "id": "cx-72792847", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", - "colorKey": "b300_5b993222", - "comparisonKey": "10e590b8f933d382", + "id": "cx-854f00de", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "afbd085a57d290fd", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:30.886921+00:00", + "generatedAt": "2026-06-26T23:57:27.937449+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_10", + "runner": "b300-nv_17", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -1804,13 +2077,14 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -1835,8 +2109,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2ad5ef98d328fa1", - "workloadId": "set:4:286be993cd819ed9", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -1844,9 +2118,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271900377", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271900377", - "createdAt": "2026-06-26T23:57:16Z", + "id": "28271865772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271865772", + "createdAt": "2026-06-26T23:57:27.937449+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -1854,34 +2128,34 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.96000158786774, - "p90": 59.10399928689003, - "p95": 62.272001057863235, - "p99": 71.68000191450119 + "p50": 55.58399856090546, + "p90": 57.40800127387047, + "p95": 59.13599953055382, + "p99": 65.63200056552887 }, "combine": { - "p50": 66.39999896287918, - "p90": 67.07199662923813, - "p95": 67.45599955320358, - "p99": 90.17600119113922 + "p50": 66.14399701356888, + "p90": 67.55200028419495, + "p95": 68.38399916887283, + "p99": 77.2159993648529 }, "roundtrip": { - "p50": 106.91200196743011, - "p90": 113.40799927711487, - "p95": 117.18399822711945, - "p99": 195.77600061893463 + "p50": 105.18400371074677, + "p90": 111.29599809646606, + "p95": 113.50400000810623, + "p99": 132.1280002593994 }, "isolatedSum": { - "p50": 123.36000055074692, - "p90": 126.17599591612816, - "p95": 129.72800061106682, - "p99": 161.8560031056404 + "p50": 121.72799557447433, + "p90": 124.96000155806541, + "p95": 127.51999869942665, + "p99": 142.84799993038177 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -1891,34 +2165,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.58399921655655, - "p90": 62.65600025653839, - "p95": 65.34399837255478, - "p99": 81.85599744319916 + "p50": 58.400001376867294, + "p90": 60.99199876189232, + "p95": 62.880001962184906, + "p99": 73.05599749088287 }, "combine": { - "p50": 68.00000369548798, - "p90": 77.11999863386154, - "p95": 77.79199630022049, - "p99": 79.9039974808693 + "p50": 67.29599833488464, + "p90": 77.15199887752533, + "p95": 77.72800326347351, + "p99": 79.64800298213959 }, "roundtrip": { - "p50": 122.36800044775009, - "p90": 125.791996717453, - "p95": 127.71199643611908, - "p99": 145.82400023937225 + "p50": 117.95199662446976, + "p90": 122.72000312805176, + "p95": 123.9359974861145, + "p99": 138.46400380134583 }, "isolatedSum": { - "p50": 127.58400291204453, - "p90": 139.77599889039993, - "p95": 143.13599467277527, - "p99": 161.75999492406845 + "p50": 125.69599971175194, + "p90": 138.14399763941765, + "p95": 140.60800522565842, + "p99": 152.70400047302246 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -1928,34 +2202,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.63200122117996, - "p90": 75.32799988985062, - "p95": 77.27999985218048, - "p99": 98.08000177145004 + "p50": 67.9360032081604, + "p90": 71.16799801588058, + "p95": 73.72800260782242, + "p99": 86.5280032157898 }, "combine": { - "p50": 78.62400263547897, - "p90": 79.26400005817413, - "p95": 79.45600152015686, - "p99": 89.75999802350998 + "p50": 77.95199751853943, + "p90": 79.19999957084656, + "p95": 80.06399869918823, + "p99": 83.8719978928566 }, "roundtrip": { - "p50": 133.53599607944489, - "p90": 137.15200126171112, - "p95": 138.5280042886734, - "p99": 155.10399639606476 + "p50": 128.7039965391159, + "p90": 131.1360001564026, + "p95": 132.76800513267517, + "p99": 140.6400054693222 }, "isolatedSum": { - "p50": 148.25600385665894, - "p90": 154.59199994802475, - "p95": 156.73600137233734, - "p99": 187.83999979496002 + "p50": 145.88800072669983, + "p90": 150.36799758672714, + "p95": 153.79200130701065, + "p99": 170.4000011086464 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -1965,34 +2239,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 103.39199751615524, - "p90": 104.96000200510025, - "p95": 106.62399977445602, - "p99": 110.81600189208984 + "p50": 92.70399808883667, + "p90": 97.63199836015701, + "p95": 99.71199929714203, + "p99": 135.42400300502777 }, "combine": { - "p50": 127.80800461769104, - "p90": 129.2160004377365, - "p95": 130.5920034646988, - "p99": 150.62400698661804 + "p50": 114.78400230407715, + "p90": 116.70400202274323, + "p95": 118.97599697113037, + "p99": 164.0319973230362 }, "roundtrip": { - "p50": 215.87200462818146, - "p90": 223.07200729846954, - "p95": 224.7679978609085, - "p99": 231.32799565792084 + "p50": 190.62399864196777, + "p90": 196.60800695419312, + "p95": 197.66399264335632, + "p99": 203.99999618530273 }, "isolatedSum": { - "p50": 231.20000213384628, - "p90": 234.17600244283676, - "p95": 237.21600323915482, - "p99": 261.4400088787079 + "p50": 207.48800039291382, + "p90": 214.33600038290024, + "p95": 218.6879962682724, + "p99": 299.45600032806396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -2001,15 +2275,15 @@ ] }, { - "id": "cx-cc647506", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", - "colorKey": "b300_8d2811e3", - "comparisonKey": "478acd4108c50326", + "id": "cx-bbb0479e", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_c9569580", + "comparisonKey": "c777627e39152404", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:32.426052+00:00", + "generatedAt": "2026-06-27T10:26:04.332610+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_05", + "runner": "b300-nv_04", "sku": "b300", "backend": "deepep", "phase": "decode", @@ -2022,13 +2296,14 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -2053,346 +2328,276 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "1fa7fe74d0e30a3", - "workloadId": "set:4:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "2.0.0+af9a040", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271886823", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823", - "createdAt": "2026-06-26T23:56:49Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28286434915", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286434915", + "createdAt": "2026-06-27T10:26:04.332610+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.703999638557434, - "p90": 59.90400165319443, - "p95": 62.65600025653839, - "p99": 69.98399645090103 + "p50": 56.86400085687637, + "p90": 59.51999872922897, + "p95": 61.824001371860504, + "p99": 71.32799923419952 }, "combine": { - "p50": 65.88800251483917, - "p90": 66.43199920654297, - "p95": 66.72000139951706, - "p99": 73.7600028514862 + "p50": 67.64800101518631, + "p90": 69.72800195217133, + "p95": 71.07199728488922, + "p99": 90.55999666452408 }, "roundtrip": { - "p50": 107.16799646615982, - "p90": 112.83200234174728, - "p95": 114.14399743080139, - "p99": 120.44800072908401 + "p50": 109.21599715948105, + "p90": 114.656001329422, + "p95": 115.87200313806534, + "p99": 121.08799815177917 }, "isolatedSum": { - "p50": 122.5920021533966, - "p90": 126.3360008597374, - "p95": 129.37600165605545, - "p99": 143.74399930238724 + "p50": 124.51200187206268, + "p90": 129.2480006814003, + "p95": 132.89599865674973, + "p99": 161.8879958987236 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 58.848001062870026, - "p90": 60.80000102519989, - "p95": 62.84800171852112, - "p99": 74.40000027418137 + "p50": 57.95200169086456, + "p90": 60.83200126886368, + "p95": 64.70400094985962, + "p99": 79.6160027384758 }, "combine": { "p50": 68.00000369548798, - "p90": 70.30399888753891, - "p95": 76.99199765920639, - "p99": 78.5600021481514 + "p90": 69.40799951553345, + "p95": 70.30399888753891, + "p99": 80.70400357246399 }, "roundtrip": { - "p50": 116.54400080442429, - "p90": 123.29600006341934, - "p95": 124.83199685811996, - "p99": 130.46400249004364 + "p50": 108.89600217342377, + "p90": 112.47999966144562, + "p95": 117.47200042009354, + "p99": 286.20800375938416 }, "isolatedSum": { - "p50": 126.848004758358, - "p90": 131.1039999127388, - "p95": 139.8399993777275, - "p99": 152.96000242233276 + "p50": 125.95200538635254, + "p90": 130.24000078439713, + "p95": 135.00799983739853, + "p99": 160.3200063109398 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 75.9039968252182, - "p90": 78.27199995517731, - "p95": 79.52000200748444, - "p99": 87.5839963555336 + "p50": 59.90400165319443, + "p90": 63.87200206518173, + "p95": 66.880002617836, + "p99": 74.72000271081924 }, "combine": { - "p50": 78.40000092983246, - "p90": 79.19999957084656, - "p95": 79.71200346946716, - "p99": 83.64800363779068 + "p50": 69.2799985408783, + "p90": 78.46400141716003, + "p95": 78.87999713420868, + "p99": 91.26400202512741 }, "roundtrip": { - "p50": 134.24000144004822, - "p90": 138.20800185203552, - "p95": 139.5840048789978, - "p99": 144.3520039319992 + "p50": 123.58400225639343, + "p90": 127.77599692344666, + "p95": 128.9920061826706, + "p99": 140.70400595664978 }, "isolatedSum": { - "p50": 154.30399775505066, - "p90": 157.47199952602386, - "p95": 159.2320054769516, - "p99": 171.23199999332428 + "p50": 129.18400019407272, + "p90": 142.33600348234177, + "p95": 145.75999975204468, + "p99": 165.98400473594666 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 103.07200253009796, - "p90": 105.98400235176086, - "p95": 107.04000294208527, - "p99": 113.21599781513214 + "p50": 59.7120001912117, + "p90": 61.91999837756157, + "p95": 64.2239972949028, + "p99": 73.53600114583969 }, "combine": { - "p50": 127.13600695133209, - "p90": 128.1599998474121, - "p95": 128.57599556446075, - "p99": 131.04000687599182 + "p50": 70.68800181150436, + "p90": 79.16799932718277, + "p95": 79.77599650621414, + "p99": 83.39200168848038 }, "roundtrip": { - "p50": 209.1200053691864, - "p90": 214.30400013923645, - "p95": 216.12800657749176, - "p99": 229.66399788856506 + "p50": 121.21599912643433, + "p90": 126.49600207805634, + "p95": 127.20000743865967, + "p99": 133.91999900341034 }, "isolatedSum": { - "p50": 230.20800948143005, - "p90": 234.14400219917297, - "p95": 235.61599850654602, - "p99": 244.25600469112396 + "p50": 130.40000200271606, + "p90": 141.08799770474434, + "p95": 143.99999380111694, + "p99": 156.92800283432007 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-3bfb4348", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", - "colorKey": "b300_2e44c039", - "comparisonKey": "5c5e6a7ecdec195f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:26.448327+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "47fddabb3277bec", - "workloadId": "set:4:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271893428", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271893428", - "createdAt": "2026-06-26T23:57:02Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 56.832000613212585, - "p90": 63.71200084686279, - "p95": 64.54399973154068, - "p99": 69.88800317049026 + "p50": 66.94400310516357, + "p90": 73.5040009021759, + "p95": 75.26399940252304, + "p99": 77.69600301980972 }, "combine": { - "p50": 55.67999929189682, - "p90": 58.20799991488457, - "p95": 64.86400216817856, - "p99": 68.89600306749344 + "p50": 78.62400263547897, + "p90": 79.71200346946716, + "p95": 79.93599772453308, + "p99": 82.94399827718735 }, "roundtrip": { - "p50": 94.52799707651138, - "p90": 99.2640033364296, - "p95": 101.56799852848053, - "p99": 107.04000294208527 + "p50": 121.11999839544296, + "p90": 123.32800030708313, + "p95": 124.41600114107132, + "p99": 129.7920048236847 }, "isolatedSum": { - "p50": 112.5119999051094, - "p90": 121.92000076174736, - "p95": 129.40800189971924, - "p99": 138.7840062379837 + "p50": 145.56800574064255, + "p90": 153.21600437164307, + "p95": 155.19999712705612, + "p99": 160.64000129699707 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 56.92800134420395, - "p90": 59.039998799562454, - "p95": 60.5119988322258, - "p99": 66.04799628257751 + "p50": 69.76000219583511, + "p90": 71.80800288915634, + "p95": 74.46400076150894, + "p99": 86.84799820184708 }, "combine": { - "p50": 56.63999915122986, - "p90": 66.23999774456024, - "p95": 66.56000018119812, - "p99": 78.91199737787247 + "p50": 80.22399991750717, + "p90": 83.10399949550629, + "p95": 90.30400216579437, + "p99": 92.6079973578453 }, "roundtrip": { - "p50": 107.80800133943558, - "p90": 113.43999952077866, - "p95": 114.656001329422, - "p99": 124.22399967908859 + "p50": 132.38400220870972, + "p90": 137.05599308013916, + "p95": 138.72000575065613, + "p99": 158.9439958333969 }, "isolatedSum": { - "p50": 113.56800049543381, - "p90": 125.2799965441227, - "p95": 127.07199901342392, - "p99": 144.95999366044998 + "p50": 149.98400211334229, + "p90": 154.91200238466263, + "p95": 164.7680029273033, + "p99": 179.45599555969238 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 67.90400296449661, - "p90": 74.07999783754349, - "p95": 75.93599706888199, - "p99": 82.2720006108284 + "p50": 82.56000280380249, + "p90": 89.75999802350998, + "p95": 91.0400003194809, + "p99": 103.20000350475311 }, "combine": { - "p50": 67.90400296449661, - "p90": 70.0799971818924, - "p95": 77.05599814653397, - "p99": 79.26400005817413 + "p50": 92.6399976015091, + "p90": 94.97600048780441, + "p95": 102.55999863147736, + "p99": 106.36799782514572 }, "roundtrip": { - "p50": 120.4800009727478, - "p90": 124.89599734544754, - "p95": 126.27199292182922, - "p99": 140.99200069904327 + "p50": 160.16000509262085, + "p90": 165.0560051202774, + "p95": 166.75199568271637, + "p99": 179.77599799633026 }, "isolatedSum": { - "p50": 135.80800592899323, - "p90": 144.15999501943588, - "p95": 152.99199521541595, - "p99": 161.53600066900253 + "p50": 175.20000040531158, + "p90": 184.7359985113144, + "p95": 193.59999895095825, + "p99": 209.56800132989883 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 4, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2401,35 +2606,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 91.77599847316742, - "p90": 94.17600184679031, - "p95": 95.74399888515472, - "p99": 114.20799791812897 + "p50": 94.46399658918381, + "p90": 96.79999947547913, + "p95": 98.88000041246414, + "p99": 114.656001329422 }, "combine": { - "p50": 116.28799885511398, - "p90": 119.19999867677689, - "p95": 126.36800110340118, - "p99": 130.43199479579926 + "p50": 116.03199690580368, + "p90": 117.44000017642975, + "p95": 117.91999638080597, + "p99": 127.83999741077423 }, "roundtrip": { - "p50": 194.0159946680069, - "p90": 201.08799636363983, - "p95": 202.84800231456757, - "p99": 212.92799711227417 + "p50": 195.6160068511963, + "p90": 200.41599869728088, + "p95": 201.664000749588, + "p99": 227.35999524593353 }, "isolatedSum": { - "p50": 208.0639973282814, - "p90": 213.3760005235672, - "p95": 222.1119999885559, - "p99": 244.63999271392822 + "p50": 210.4959934949875, + "p90": 214.23999965190887, + "p95": 216.7999967932701, + "p99": 242.49599874019623 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2437,28 +2642,29 @@ ] }, { - "id": "cx-f0dd83d8", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_c1ad910f", - "comparisonKey": "80e2eefb7447672f", + "id": "cx-2fa7319c", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "b300_c9569580", + "comparisonKey": "89fa2de88509570c", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:41:08.828331+00:00", + "generatedAt": "2026-06-27T00:54:19.552522+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_15", + "runner": "b300-nv_01", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -2473,14 +2679,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -2489,8 +2695,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -2498,45 +2704,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254469772", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", - "createdAt": "2026-06-26T17:29:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28273513209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273513209", + "createdAt": "2026-06-27T00:54:19.552522+00:00", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.992001831531525, - "p90": 59.039998799562454, - "p95": 61.824001371860504, - "p99": 73.44000041484833 + "p50": 56.41600117087364, + "p90": 58.46399813890457, + "p95": 60.95999851822853, + "p99": 71.55200093984604 }, "combine": { - "p50": 66.3359984755516, - "p90": 67.4239993095398, - "p95": 68.15999746322632, - "p99": 77.47200131416321 + "p50": 66.27199798822403, + "p90": 67.55200028419495, + "p95": 68.28799843788147, + "p99": 77.27999985218048 }, "roundtrip": { - "p50": 106.81600123643875, - "p90": 113.08799684047699, - "p95": 114.23999816179276, - "p99": 135.6479972600937 + "p50": 105.85600137710571, + "p90": 112.28799819946289, + "p95": 113.3119985461235, + "p99": 124.09599870443344 }, "isolatedSum": { - "p50": 123.32800030708313, - "p90": 126.46399810910225, - "p95": 129.98399883508682, - "p99": 150.91200172901154 + "p50": 122.68799915909767, + "p90": 126.01599842309952, + "p95": 129.24799695611, + "p99": 148.83200079202652 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2545,35 +2751,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.992001831531525, - "p90": 58.78400057554245, - "p95": 60.92799827456474, - "p99": 73.21599870920181 + "p50": 56.60799890756607, + "p90": 58.04799869656563, + "p95": 59.39200147986412, + "p99": 63.64800035953522 }, "combine": { - "p50": 67.32799857854843, - "p90": 69.11999732255936, - "p95": 70.65600156784058, - "p99": 79.93599772453308 + "p50": 67.03999638557434, + "p90": 68.7360018491745, + "p95": 69.15199756622314, + "p99": 77.2159993648529 }, "roundtrip": { - "p50": 106.9440022110939, - "p90": 109.40799862146378, - "p95": 110.88000237941742, - "p99": 119.39200013875961 + "p50": 107.04000294208527, + "p90": 109.76000130176544, + "p95": 111.35999858379364, + "p99": 119.19999867677689 }, "isolatedSum": { - "p50": 124.32000041007996, - "p90": 127.9039978981018, - "p95": 131.58399984240532, - "p99": 153.1519964337349 + "p50": 123.64799529314041, + "p90": 126.78400054574013, + "p95": 128.54399904608727, + "p99": 140.86399972438812 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2582,35 +2788,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.792000472545624, - "p90": 59.39200147986412, - "p95": 61.28000095486641, - "p99": 68.09599697589874 + "p50": 58.81600081920624, + "p90": 64.44799900054932, + "p95": 66.01600348949432, + "p99": 71.61600142717361 }, "combine": { - "p50": 67.80800223350525, - "p90": 69.66400146484375, - "p95": 76.99199765920639, - "p99": 78.75200361013412 + "p50": 67.26399809122086, + "p90": 69.63200122117996, + "p95": 77.15199887752533, + "p99": 78.91199737787247 }, "roundtrip": { - "p50": 116.22399836778641, - "p90": 122.68800288438797, - "p95": 124.35200065374374, - "p99": 127.93600559234619 + "p50": 122.20799922943115, + "p90": 125.18399953842163, + "p95": 125.91999769210815, + "p99": 130.3360015153885 }, "isolatedSum": { - "p50": 125.60000270605087, - "p90": 129.05600294470787, - "p95": 138.2719986140728, - "p99": 146.84800058603287 + "p50": 126.0799989104271, + "p90": 134.08000022172928, + "p95": 143.16800236701965, + "p99": 150.52799880504608 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2619,28 +2825,28 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.29600074887276, - "p90": 61.15199998021126, - "p95": 62.39999830722809, - "p99": 68.1919977068901 + "p50": 59.42400172352791, + "p90": 64.25599753856659, + "p95": 67.87200272083282, + "p99": 74.62400197982788 }, "combine": { - "p50": 68.38399916887283, - "p90": 77.31200009584427, - "p95": 77.72800326347351, - "p99": 78.78399640321732 + "p50": 68.9919963479042, + "p90": 78.015998005867, + "p95": 78.62400263547897, + "p99": 81.88799768686295 }, "roundtrip": { - "p50": 120.25599926710129, - "p90": 125.82400441169739, - "p95": 126.75200402736664, - "p99": 133.44000279903412 + "p50": 119.39200013875961, + "p90": 125.05599856376648, + "p95": 126.17599964141846, + "p99": 130.36799430847168 }, "isolatedSum": { - "p50": 127.67999991774559, - "p90": 138.46400007605553, - "p95": 140.1280015707016, - "p99": 146.97599411010742 + "p50": 128.4159980714321, + "p90": 142.2719955444336, + "p95": 146.4960053563118, + "p99": 156.51199966669083 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, @@ -2656,109 +2862,217 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 62.78400123119354, - "p90": 69.023996591568, - "p95": 71.03999704122543, - "p99": 76.73600316047668 + "p50": 66.68800115585327, + "p90": 73.7600028514862, + "p95": 75.13599842786789, + "p99": 80.35200089216232 }, "combine": { - "p50": 77.2479996085167, + "p50": 69.88800317049026, "p90": 78.5600021481514, - "p95": 78.72000336647034, - "p99": 80.86399734020233 + "p95": 78.75200361013412, + "p99": 82.56000280380249 }, "roundtrip": { - "p50": 119.61600184440613, - "p90": 122.72000312805176, - "p95": 124.35200065374374, - "p99": 131.29599392414093 + "p50": 119.26399916410446, + "p90": 121.47200107574463, + "p95": 123.52000176906586, + "p99": 127.68000364303589 }, "isolatedSum": { - "p50": 140.03200083971024, - "p90": 147.5839987397194, - "p95": 149.76000040769577, - "p99": 157.60000050067902 + "p50": 136.57600432634354, + "p90": 152.3200049996376, + "p95": 153.888002038002, + "p99": 162.9120036959648 }, "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 5, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-dc6ca42c", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "8a9fa1be98f83eb3", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:17.025326+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272146490", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272146490", + "createdAt": "2026-06-27T00:06:17.025326+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 69.24799829721451, - "p90": 70.91200351715088, - "p95": 73.69600236415863, - "p99": 81.69600367546082 + "p50": 55.84000051021576, + "p90": 57.95200169086456, + "p95": 60.54399907588959, + "p99": 68.09599697589874 }, "combine": { - "p50": 78.59200239181519, - "p90": 79.80799674987793, - "p95": 80.73599636554718, - "p99": 90.94399958848953 + "p50": 66.20799750089645, + "p90": 66.94400310516357, + "p95": 67.52000004053116, + "p99": 90.87999910116196 }, "roundtrip": { - "p50": 130.68799674510956, - "p90": 135.23200154304504, - "p95": 136.51199638843536, - "p99": 140.47999680042267 - }, - "isolatedSum": { - "p50": 147.8400006890297, - "p90": 150.7200002670288, - "p95": 154.4319987297058, - "p99": 172.64000326395035 + "p50": 106.04800283908844, + "p90": 111.07199639081955, + "p95": 112.67200112342834, + "p99": 125.15200674533844 + }, + "isolatedSum": { + "p50": 122.04799801111221, + "p90": 124.89600479602814, + "p95": 128.06399911642075, + "p99": 158.9759960770607 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 82.49600231647491, - "p90": 92.70399808883667, - "p95": 95.0080007314682, - "p99": 99.45599734783173 + "p50": 59.07199904322624, + "p90": 62.3680017888546, + "p95": 65.08799642324448, + "p99": 71.00799679756165 }, "combine": { - "p50": 92.25600212812424, - "p90": 100.09600222110748, - "p95": 102.36799716949463, - "p99": 106.65600001811981 + "p50": 69.18399780988693, + "p90": 78.14399898052216, + "p95": 78.59200239181519, + "p99": 88.22400122880936 }, "roundtrip": { - "p50": 158.65600109100342, - "p90": 163.00800442695618, - "p95": 164.19200599193573, - "p99": 169.50400173664093 + "p50": 119.07199770212173, + "p90": 124.32000041007996, + "p95": 125.37600100040436, + "p99": 140.06400108337402 }, "isolatedSum": { - "p50": 174.75200444459915, - "p90": 192.80000030994415, - "p95": 197.37599790096283, - "p99": 206.11199736595154 + "p50": 128.25599685311317, + "p90": 140.51200076937675, + "p95": 143.67999881505966, + "p99": 159.231998026371 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.70400160551071, + "p90": 73.66400212049484, + "p95": 75.13599842786789, + "p99": 93.56799721717834 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 79.6160027384758, + "p95": 81.44000172615051, + "p99": 91.48799628019333 + }, + "roundtrip": { + "p50": 130.65600395202637, + "p90": 135.71199774742126, + "p95": 136.76799833774567, + "p99": 144.1279947757721 + }, + "isolatedSum": { + "p50": 147.32800424098969, + "p90": 153.28000485897064, + "p95": 156.5760001540184, + "p99": 185.05599349737167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2767,28 +3081,28 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 93.91999989748001, - "p90": 95.83999961614609, - "p95": 98.04800152778625, - "p99": 104.99200224876404 + "p50": 93.08800101280212, + "p90": 98.78399968147278, + "p95": 100.63999891281128, + "p99": 110.17599701881409 }, "combine": { - "p50": 115.35999923944473, - "p90": 115.93600362539291, - "p95": 116.60800129175186, - "p99": 119.45600062608719 + "p50": 115.39199948310852, + "p90": 116.28799885511398, + "p95": 117.21599847078323, + "p99": 126.39999389648438 }, "roundtrip": { - "p50": 192.51200556755066, - "p90": 198.88000190258026, - "p95": 199.48799908161163, - "p99": 209.47200059890747 + "p50": 192.25600361824036, + "p90": 198.2080042362213, + "p95": 198.7839937210083, + "p99": 203.61599326133728 }, "isolatedSum": { - "p50": 209.27999913692474, - "p90": 211.776003241539, - "p95": 214.65600281953812, - "p99": 224.44800287485123 + "p50": 208.48000049591064, + "p90": 215.07199853658676, + "p95": 217.8559973835945, + "p99": 236.57599091529846 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, @@ -2803,50 +3117,51 @@ ] }, { - "id": "cx-dede7717", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", - "colorKey": "b300_0622d929", - "comparisonKey": "c4ede73885f09b56", + "id": "cx-a995e296", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "fe9431c5beaaf675", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:12:16.850895+00:00", + "generatedAt": "2026-06-27T00:06:39.072562+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_17", + "runner": "b300-nv_03", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "wide-dynamic-range", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -2855,8 +3170,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -2864,267 +3179,338 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254508907", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", - "createdAt": "2026-06-26T17:30:32Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28272150514", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272150514", + "createdAt": "2026-06-27T00:06:39.072562+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.69599974155426, - "p90": 60.06399914622307, - "p95": 61.664000153541565, - "p99": 77.7600035071373 + "p50": 1758.687973022461, + "p90": 2565.7920837402344, + "p95": 2910.815954208374, + "p99": 3400.576114654541 }, "combine": { - "p50": 68.03199648857117, - "p90": 69.76000219583511, - "p95": 76.92799717187881, - "p99": 78.52800190448761 + "p50": 1759.8719596862793, + "p90": 1907.871961593628, + "p95": 2670.1760292053223, + "p99": 2940.095901489258 }, "roundtrip": { - "p50": 107.80800133943558, - "p90": 110.59200018644333, - "p95": 112.19199746847153, - "p99": 128.76799702644348 + "p50": 1802.39999294281, + "p90": 1987.0719909667969, + "p95": 2666.1760807037354, + "p99": 2924.000024795532 }, "isolatedSum": { - "p50": 125.72799623012543, - "p90": 129.82400134205818, - "p95": 138.59199732542038, - "p99": 156.2880054116249 + "p50": 3518.5599327087402, + "p90": 4473.664045333862, + "p95": 5580.991983413696, + "p99": 6340.672016143799 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 58.559998869895935, - "p90": 60.15999987721443, - "p95": 61.664000153541565, - "p99": 72.76800274848938 + "p50": 1754.8799514770508, + "p90": 2488.703966140747, + "p95": 2823.359966278076, + "p99": 3391.4880752563477 }, "combine": { - "p50": 68.25599819421768, - "p90": 76.86399668455124, - "p95": 77.53600180149078, - "p99": 79.9039974808693 + "p50": 1760.4479789733887, + "p90": 1861.184000968933, + "p95": 2647.264003753662, + "p99": 2955.8401107788086 }, "roundtrip": { - "p50": 116.22399836778641, - "p90": 122.11199849843979, - "p95": 123.07199835777283, - "p99": 127.9039978981018 + "p50": 1819.2960023880005, + "p90": 1958.5280418395996, + "p95": 2686.271905899048, + "p99": 2968.319892883301 }, "isolatedSum": { - "p50": 126.81599706411362, - "p90": 137.02399656176567, - "p95": 139.20000195503235, - "p99": 152.67200022935867 + "p50": 3515.3279304504395, + "p90": 4349.88796710968, + "p95": 5470.623970031738, + "p99": 6347.328186035156 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 58.59199911355972, - "p90": 60.5119988322258, - "p95": 61.664000153541565, - "p99": 69.66400146484375 + "p50": 1767.3920392990112, + "p90": 2204.767942428589, + "p95": 2829.9520015716553, + "p99": 3398.303985595703 }, "combine": { - "p50": 70.01599669456482, - "p90": 78.40000092983246, - "p95": 78.52800190448761, - "p99": 81.216000020504 + "p50": 1764.0960216522217, + "p90": 1887.1040344238281, + "p95": 2647.615909576416, + "p99": 3015.5839920043945 }, "roundtrip": { - "p50": 121.66400253772736, - "p90": 125.37600100040436, - "p95": 127.20000743865967, - "p99": 135.74400544166565 + "p50": 1835.6800079345703, + "p90": 1997.1840381622314, + "p95": 2681.3440322875977, + "p99": 2967.072010040283 }, "isolatedSum": { - "p50": 128.60799580812454, - "p90": 138.91199976205826, - "p95": 140.19200205802917, - "p99": 150.88000148534775 + "p50": 3531.488060951233, + "p90": 4091.871976852417, + "p95": 5477.567911148071, + "p99": 6413.887977600098 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 59.61599946022034, - "p90": 61.95199862122536, - "p95": 63.90400230884552, - "p99": 71.52000069618225 + "p50": 1790.7520532608032, + "p90": 2270.848035812378, + "p95": 2845.247983932495, + "p99": 3459.712028503418 }, "combine": { - "p50": 77.40800082683563, - "p90": 78.65600287914276, - "p95": 78.94399762153625, - "p99": 89.28000181913376 + "p50": 1809.7599744796753, + "p90": 1956.9599628448486, + "p95": 2685.7919692993164, + "p99": 3029.952049255371 }, "roundtrip": { - "p50": 119.80800330638885, - "p90": 122.65600264072418, - "p95": 124.83199685811996, - "p99": 136.83199882507324 + "p50": 1890.3039693832397, + "p90": 2169.4719791412354, + "p95": 2888.256072998047, + "p99": 3985.24808883667 }, "isolatedSum": { - "p50": 137.02400028705597, - "p90": 140.60800150036812, - "p95": 142.84799993038177, - "p99": 160.800002515316 + "p50": 3600.5120277404785, + "p90": 4227.807998657227, + "p95": 5531.0399532318115, + "p99": 6489.664077758789 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-b81422f4", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "b300_c9569580", + "comparisonKey": "d97d7a8231265a6c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:13.336317+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_13", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272142980", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272142980", + "createdAt": "2026-06-27T00:06:13.336317+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 73.91999661922455, - "p90": 76.09599828720093, - "p95": 78.04799824953079, - "p99": 85.24800091981888 + "p50": 56.63999915122986, + "p90": 59.26400050520897, + "p95": 62.04799935221672, + "p99": 73.85600358247757 }, "combine": { - "p50": 78.40000092983246, - "p90": 79.1039988398552, - "p95": 79.39200103282928, - "p99": 85.08799970149994 + "p50": 66.43199920654297, + "p90": 67.4239993095398, + "p95": 68.25599819421768, + "p99": 78.04799824953079 }, "roundtrip": { - "p50": 121.44000083208084, - "p90": 126.94400548934937, - "p95": 128.92800569534302, - "p99": 145.31199634075165 + "p50": 106.78400099277496, + "p90": 111.39199882745743, + "p95": 113.34399878978729, + "p99": 117.0239970088005 }, "isolatedSum": { - "p50": 152.319997549057, - "p90": 155.19999712705612, - "p95": 157.43999928236008, - "p99": 170.33600062131882 + "p50": 123.07199835777283, + "p90": 126.68799981474876, + "p95": 130.3039975464344, + "p99": 151.90400183200836 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 5, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 71.07199728488922, - "p90": 72.86400347948074, - "p95": 73.47200065851212, - "p99": 82.40000158548355 + "p50": 59.55199897289276, + "p90": 61.824001371860504, + "p95": 63.680000603199005, + "p99": 71.07199728488922 }, "combine": { - "p50": 80.06399869918823, - "p90": 81.37600123882294, - "p95": 81.82399719953537, - "p99": 89.88799899816513 + "p50": 68.92800331115723, + "p90": 77.7600035071373, + "p95": 77.95199751853943, + "p99": 78.65600287914276 }, "roundtrip": { - "p50": 134.36800241470337, - "p90": 141.56800508499146, - "p95": 143.99999380111694, - "p99": 148.80000054836273 + "p50": 120.03199756145477, + "p90": 124.4800016283989, + "p95": 125.95200538635254, + "p99": 145.53600549697876 }, "isolatedSum": { - "p50": 151.13599598407745, - "p90": 154.24000471830368, - "p95": 155.29599785804749, - "p99": 172.28800058364868 + "p50": 128.48000228405, + "p90": 139.5840048789978, + "p95": 141.63199812173843, + "p99": 149.72800016403198 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 85.34400165081024, - "p90": 89.63199704885483, - "p95": 91.93599969148636, - "p99": 96.57599776983261 + "p50": 68.83200258016586, + "p90": 72.38399982452393, + "p95": 75.16799867153168, + "p99": 78.17599922418594 }, "combine": { - "p50": 93.98400038480759, - "p90": 103.10400277376175, - "p95": 103.29599678516388, - "p99": 105.92000186443329 + "p50": 78.65600287914276, + "p90": 79.71200346946716, + "p95": 80.57600259780884, + "p99": 100.92800110578537 }, "roundtrip": { - "p50": 169.3439930677414, - "p90": 172.89599776268005, - "p95": 175.87199807167053, - "p99": 196.16000354290009 + "p50": 130.72000443935394, + "p90": 134.2719942331314, + "p95": 135.74400544166565, + "p99": 155.7759940624237 }, "isolatedSum": { - "p50": 179.32800203561783, - "p90": 192.73599982261658, - "p95": 195.23199647665024, - "p99": 202.4959996342659 + "p50": 147.48800545930862, + "p90": 152.0960032939911, + "p95": 155.74400126934052, + "p99": 179.1040003299713 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 6, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3133,35 +3519,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 107.13600367307663, - "p90": 109.79200154542923, - "p95": 111.7120012640953, - "p99": 131.96800649166107 + "p50": 93.34400296211243, + "p90": 95.93600034713745, + "p95": 99.2640033364296, + "p99": 107.61599987745285 }, "combine": { - "p50": 130.49599528312683, - "p90": 139.52000439167023, - "p95": 139.8719996213913, - "p99": 140.54399728775024 + "p50": 115.4559999704361, + "p90": 116.44800007343292, + "p95": 117.0559972524643, + "p99": 126.43200159072876 }, "roundtrip": { - "p50": 231.1680018901825, - "p90": 235.00800132751465, - "p95": 236.7040067911148, - "p99": 257.6960027217865 + "p50": 192.9599940776825, + "p90": 198.81600141525269, + "p95": 199.8080015182495, + "p99": 274.1439938545227 }, "isolatedSum": { - "p50": 237.63199895620346, - "p90": 249.31200593709946, - "p95": 251.5840008854866, - "p99": 272.5120037794113 + "p50": 208.80000293254852, + "p90": 212.38400042057037, + "p95": 216.3200005888939, + "p99": 234.0480014681816 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3169,34 +3555,35 @@ ] }, { - "id": "cx-e56568fe", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", - "colorKey": "b300_01ab5b1a", - "comparisonKey": "1f56c3705f670037", + "id": "cx-53b3c366", + "identity": "b300|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_307ed708", + "comparisonKey": "8f32ac097503699d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:38:03.696815+00:00", + "generatedAt": "2026-06-27T09:50:59.262697+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_07", + "runner": "b300-nv_14", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -3205,14 +3592,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -3221,8 +3608,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -3230,45 +3617,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271231753", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", - "createdAt": "2026-06-26T23:36:29Z", - "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + "id": "28285677323", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285677323", + "createdAt": "2026-06-27T09:50:59.262697+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 55.904000997543335, - "p90": 59.776000678539276, - "p95": 65.72800129652023, - "p99": 85.11999994516373 + "p50": 57.50399827957153, + "p90": 59.67999994754791, + "p95": 61.69600039720535, + "p99": 81.7599967122078 }, "combine": { - "p50": 65.60000032186508, - "p90": 66.3679987192154, - "p95": 66.91200286149979, - "p99": 76.86399668455124 + "p50": 67.00800359249115, + "p90": 68.1919977068901, + "p95": 69.5360004901886, + "p99": 77.63200253248215 }, "roundtrip": { - "p50": 105.05600273609161, - "p90": 111.35999858379364, - "p95": 112.96000331640244, - "p99": 121.05599790811539 + "p50": 107.51999914646149, + "p90": 112.92800307273865, + "p95": 114.49600011110306, + "p99": 130.68799674510956 }, "isolatedSum": { - "p50": 121.50400131940842, - "p90": 126.14399939775467, - "p95": 132.64000415802002, - "p99": 161.98399662971497 + "p50": 124.51200187206268, + "p90": 127.87199765443802, + "p95": 131.23200088739395, + "p99": 159.39199924468994 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 0, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3277,35 +3664,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 55.84000051021576, - "p90": 57.56799876689911, - "p95": 60.095999389886856, - "p99": 72.4480003118515 + "p50": 57.50399827957153, + "p90": 59.58399921655655, + "p95": 61.983998864889145, + "p99": 74.20799881219864 }, "combine": { - "p50": 65.69600105285645, - "p90": 66.3679987192154, - "p95": 66.84800237417221, - "p99": 69.2799985408783 + "p50": 67.32799857854843, + "p90": 69.43999975919724, + "p95": 76.9599974155426, + "p99": 81.50400221347809 }, "roundtrip": { - "p50": 104.76800054311752, - "p90": 109.40799862146378, - "p95": 112.03200370073318, - "p99": 159.19999778270721 + "p50": 108.06400328874588, + "p90": 110.88000237941742, + "p95": 113.50400000810623, + "p99": 120.51200121641159 }, "isolatedSum": { - "p50": 121.5360015630722, - "p90": 123.9359974861145, - "p95": 126.94400176405907, - "p99": 141.7279988527298 + "p50": 124.83199685811996, + "p90": 129.02399897575378, + "p95": 138.94399628043175, + "p99": 155.71200102567673 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 7, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3314,35 +3701,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.37600103020668, - "p90": 60.80000102519989, - "p95": 65.76000154018402, - "p99": 95.8079993724823 + "p50": 59.51999872922897, + "p90": 61.76000088453293, + "p95": 63.64800035953522, + "p99": 68.12799721956253 }, "combine": { - "p50": 66.59200042486191, - "p90": 77.18399912118912, - "p95": 77.82399654388428, - "p99": 79.16799932718277 + "p50": 68.86400282382965, + "p90": 77.37600058317184, + "p95": 78.04799824953079, + "p99": 80.54400235414505 }, "roundtrip": { - "p50": 106.91200196743011, - "p90": 112.38399893045425, - "p95": 115.23199826478958, - "p99": 124.22399967908859 + "p50": 123.90399724245071, + "p90": 126.75200402736664, + "p95": 127.20000743865967, + "p99": 130.94399869441986 }, "isolatedSum": { - "p50": 123.96800145506859, - "p90": 137.984000146389, - "p95": 143.5839980840683, - "p99": 174.97599869966507 + "p50": 128.38400155305862, + "p90": 139.13600146770477, + "p95": 141.695998609066, + "p99": 148.67199957370758 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3351,34 +3738,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.6559996008873, - "p90": 63.231997191905975, - "p95": 65.60000032186508, - "p99": 69.47200000286102 + "p50": 60.447998344898224, + "p90": 65.5680000782013, + "p95": 67.71200150251389, + "p99": 73.88799637556076 }, "combine": { - "p50": 68.12799721956253, - "p90": 76.48000121116638, - "p95": 77.15199887752533, - "p99": 84.1279998421669 + "p50": 69.023996591568, + "p90": 77.63200253248215, + "p95": 78.27199995517731, + "p99": 79.68000322580338 }, "roundtrip": { - "p50": 122.11199849843979, - "p90": 125.34399330615997, - "p95": 128.4479945898056, - "p99": 151.5520066022873 + "p50": 120.7360029220581, + "p90": 126.11199915409088, + "p95": 127.48800218105316, + "p99": 135.6160044670105 }, "isolatedSum": { - "p50": 126.78399682044983, - "p90": 139.71199840307236, - "p95": 142.7519991993904, - "p99": 153.59999984502792 + "p50": 129.47199493646622, + "p90": 143.20000261068344, + "p95": 145.9840014576912, + "p99": 153.56799960136414 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -3388,35 +3775,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 59.487998485565186, - "p90": 65.24799764156342, - "p95": 67.00800359249115, - "p99": 73.56800138950348 + "p50": 62.55999952554703, + "p90": 69.08799707889557, + "p95": 71.35999947786331, + "p99": 78.23999971151352 }, "combine": { - "p50": 68.12799721956253, - "p90": 77.34400033950806, - "p95": 77.88799703121185, - "p99": 89.53599631786346 + "p50": 77.66400277614594, + "p90": 79.1039988398552, + "p95": 79.45600152015686, + "p99": 81.216000020504 }, "roundtrip": { - "p50": 119.1679984331131, - "p90": 124.67200309038162, - "p95": 125.69600343704224, - "p99": 134.5600038766861 + "p50": 120.25599926710129, + "p90": 122.65600264072418, + "p95": 124.15999919176102, + "p99": 136.63999736309052 }, "isolatedSum": { - "p50": 127.61599570512772, - "p90": 142.59199798107147, - "p95": 144.896000623703, - "p99": 163.10399770736694 + "p50": 140.22400230169296, + "p90": 148.19199591875076, + "p95": 150.81600099802017, + "p99": 159.45599973201752 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3425,35 +3812,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 74.52800124883652, - "p90": 76.51200145483017, - "p95": 77.18399912118912, - "p99": 81.7599967122078 + "p50": 69.92000341415405, + "p90": 75.13599842786789, + "p95": 75.80800354480743, + "p99": 80.73599636554718 }, "combine": { - "p50": 77.91999727487564, - "p90": 78.78399640321732, - "p95": 79.26400005817413, - "p99": 81.85599744319916 + "p50": 78.91199737787247, + "p90": 79.80799674987793, + "p95": 80.35200089216232, + "p99": 83.71199667453766 }, "roundtrip": { - "p50": 132.32000172138214, - "p90": 135.6160044670105, - "p95": 136.31999492645264, - "p99": 141.66399836540222 + "p50": 131.26400113105774, + "p90": 136.06399297714233, + "p95": 137.79200613498688, + "p99": 158.78400206565857 }, "isolatedSum": { - "p50": 152.44799852371216, - "p90": 155.29599785804749, - "p95": 156.44799917936325, - "p99": 163.61599415540695 + "p50": 148.83200079202652, + "p90": 154.94399517774582, + "p95": 156.16000443696976, + "p99": 164.44799304008484 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3462,35 +3849,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 80.19199967384338, - "p90": 81.88799768686295, - "p95": 83.52000266313553, - "p99": 90.30400216579437 + "p50": 88.16000074148178, + "p90": 91.13600105047226, + "p95": 92.3520028591156, + "p99": 106.46399855613708 }, "combine": { - "p50": 90.59199690818787, - "p90": 91.67999774217606, - "p95": 92.57599711418152, - "p99": 101.21600329875946 + "p50": 92.47999638319016, + "p90": 100.96000134944916, + "p95": 102.04800218343735, + "p99": 116.19199812412262 }, "roundtrip": { - "p50": 155.45600652694702, - "p90": 160.5760008096695, - "p95": 161.98399662971497, - "p99": 169.53599452972412 + "p50": 159.8079949617386, + "p90": 163.42400014400482, + "p95": 164.8319959640503, + "p99": 172.03199863433838 }, "isolatedSum": { - "p50": 170.78399658203125, - "p90": 173.567995429039, - "p95": 176.09599977731705, - "p99": 191.52000546455383 + "p50": 180.63999712467194, + "p90": 192.09600239992142, + "p95": 194.40000504255295, + "p99": 222.6559966802597 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 7, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3499,35 +3886,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 101.69599950313568, - "p90": 105.15200346708298, - "p95": 106.04800283908844, - "p99": 115.167997777462 + "p50": 94.46399658918381, + "p90": 100.03200173377991, + "p95": 102.04800218343735, + "p99": 111.68000102043152 }, "combine": { - "p50": 126.81600451469421, - "p90": 127.77599692344666, - "p95": 128.12800705432892, - "p99": 131.71200454235077 + "p50": 115.48800021409988, + "p90": 116.5120005607605, + "p95": 117.18399822711945, + "p99": 127.61600315570831 }, "roundtrip": { - "p50": 207.58399367332458, - "p90": 212.41599321365356, - "p95": 215.45599400997162, - "p99": 240.79999327659607 + "p50": 195.23200392723083, + "p90": 199.13600385189056, + "p95": 200.1280039548874, + "p99": 208.25600624084473 }, "isolatedSum": { - "p50": 228.5120040178299, - "p90": 232.92800039052963, - "p95": 234.17600989341736, - "p99": 246.88000231981277 + "p50": 209.9519968032837, + "p90": 216.5440022945404, + "p95": 219.2320004105568, + "p99": 239.29600417613983 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3535,50 +3922,51 @@ ] }, { - "id": "cx-a499b6fe", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", - "colorKey": "b300_085c12d4", - "comparisonKey": "f41671f558a3c8d2", + "id": "cx-bb4293a3", + "identity": "b300|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_c9569580", + "comparisonKey": "9212a9f938273ac4", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:23:15.234137+00:00", + "generatedAt": "2026-06-27T11:14:04.417572+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_10", + "runner": "b300-nv_12", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "B300 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -3587,54 +3975,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255311146", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", - "createdAt": "2026-06-26T17:45:43Z", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28287503016", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503016", + "createdAt": "2026-06-27T11:14:04.417572+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.86400085687637, - "p90": 59.7120001912117, - "p95": 63.32799792289734, - "p99": 72.64000177383423 + "p50": 57.82400071620941, + "p90": 59.90400165319443, + "p95": 62.97600269317627, + "p99": 70.592001080513 }, "combine": { - "p50": 64.83200192451477, - "p90": 66.46399945020676, - "p95": 66.94400310516357, - "p99": 76.51200145483017 + "p50": 66.52799993753433, + "p90": 67.58400052785873, + "p95": 68.9919963479042, + "p99": 78.87999713420868 }, "roundtrip": { - "p50": 105.12000322341919, - "p90": 110.72000116109848, - "p95": 111.7440015077591, - "p99": 122.56000190973282 + "p50": 107.90400207042694, + "p90": 114.20799791812897, + "p95": 114.94400352239609, + "p99": 125.21600723266602 }, "isolatedSum": { - "p50": 121.69600278139114, - "p90": 126.17599964141846, - "p95": 130.2720010280609, - "p99": 149.1520032286644 + "p50": 124.35200065374374, + "p90": 127.48800218105316, + "p95": 131.96799904108047, + "p99": 149.47199821472168 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3643,35 +4031,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.5999990105629, - "p90": 59.808000922203064, - "p95": 62.07999959588051, - "p99": 71.45600020885468 + "p50": 58.079998940229416, + "p90": 61.3120011985302, + "p95": 64.03200328350067, + "p99": 79.29600030183792 }, "combine": { - "p50": 66.27199798822403, - "p90": 67.00800359249115, - "p95": 67.29599833488464, - "p99": 76.92799717187881 + "p50": 67.10399687290192, + "p90": 68.41599941253662, + "p95": 69.98399645090103, + "p99": 85.50400286912918 }, "roundtrip": { - "p50": 106.27199709415436, - "p90": 108.22399705648422, - "p95": 110.01600325107574, - "p99": 132.54399597644806 + "p50": 108.03200304508209, + "p90": 110.944002866745, + "p95": 113.15199732780457, + "p99": 129.15199995040894 }, "isolatedSum": { - "p50": 123.87199699878693, - "p90": 126.81600451469421, - "p95": 129.37599793076515, - "p99": 148.3839973807335 + "p50": 125.18399581313133, + "p90": 129.72800061106682, + "p95": 134.0159997344017, + "p99": 164.8000031709671 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, "recvTokensMax": 14, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3680,35 +4068,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.53599852323532, - "p90": 59.808000922203064, - "p95": 60.70400029420853, - "p99": 67.87200272083282 + "p50": 59.7120001912117, + "p90": 61.85600161552429, + "p95": 63.1679967045784, + "p99": 75.39200037717819 }, "combine": { - "p50": 66.43199920654297, - "p90": 67.45599955320358, - "p95": 69.31199878454208, - "p99": 78.78399640321732 + "p50": 68.31999868154526, + "p90": 77.11999863386154, + "p95": 77.7600035071373, + "p99": 89.59999680519104 }, "roundtrip": { - "p50": 106.6880002617836, - "p90": 109.50399935245514, - "p95": 111.87200248241425, - "p99": 125.08800625801086 + "p50": 123.48800152540207, + "p90": 127.61600315570831, + "p95": 128.4479945898056, + "p99": 141.9840008020401 }, "isolatedSum": { - "p50": 123.96799772977829, - "p90": 127.26400047540665, - "p95": 130.0159990787506, - "p99": 146.65599912405014 + "p50": 128.03199887275696, + "p90": 138.97600024938583, + "p95": 140.9280002117157, + "p99": 164.99199718236923 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 7, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3717,35 +4105,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.848001062870026, - "p90": 61.15199998021126, - "p95": 64.41599875688553, - "p99": 78.14399898052216 + "p50": 59.967998415231705, + "p90": 62.81600147485733, + "p95": 66.39999896287918, + "p99": 73.53600114583969 }, "combine": { - "p50": 68.28799843788147, - "p90": 76.25599950551987, - "p95": 76.92799717187881, - "p99": 79.64800298213959 + "p50": 68.44799965620041, + "p90": 76.92799717187881, + "p95": 77.34400033950806, + "p99": 82.75199681520462 }, "roundtrip": { - "p50": 116.28799885511398, - "p90": 122.8799968957901, - "p95": 124.70400333404541, - "p99": 145.08800208568573 + "p50": 122.17599898576736, + "p90": 127.07200646400452, + "p95": 128.25599312782288, + "p99": 142.68800616264343 }, "isolatedSum": { - "p50": 127.1359995007515, - "p90": 137.40799948573112, - "p95": 141.34399592876434, - "p99": 157.79200196266174 + "p50": 128.4159980714321, + "p90": 139.74399864673615, + "p95": 143.74399930238724, + "p99": 156.2879979610443 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 7, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3754,35 +4142,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 60.54399907588959, - "p90": 66.14399701356888, - "p95": 68.67200136184692, - "p99": 83.29600095748901 + "p50": 62.880001962184906, + "p90": 68.80000233650208, + "p95": 71.03999704122543, + "p99": 74.65600222349167 }, "combine": { - "p50": 68.64000111818314, - "p90": 77.2159993648529, - "p95": 77.82399654388428, - "p99": 78.91199737787247 + "p50": 69.11999732255936, + "p90": 78.40000092983246, + "p95": 78.97599786520004, + "p99": 82.40000158548355 }, "roundtrip": { - "p50": 123.16799908876419, - "p90": 126.0479986667633, - "p95": 127.16799974441528, - "p99": 131.1040073633194 + "p50": 121.11999839544296, + "p90": 125.34399330615997, + "p95": 127.13600695133209, + "p99": 134.8479986190796 }, "isolatedSum": { - "p50": 129.18400019407272, - "p90": 143.35999637842178, - "p95": 146.4959979057312, - "p99": 162.20799833536148 + "p50": 131.99999928474426, + "p90": 147.20000326633453, + "p95": 150.01599490642548, + "p99": 157.05600380897522 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, "recvTokensMax": 91, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3791,35 +4179,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.31199878454208, - "p90": 75.52000135183334, - "p95": 76.4160007238388, - "p99": 83.20000022649765 + "p50": 70.94399631023407, + "p90": 76.48000121116638, + "p95": 77.37600058317184, + "p99": 80.06399869918823 }, "combine": { - "p50": 78.46400141716003, - "p90": 79.26400005817413, - "p95": 79.45600152015686, - "p99": 82.40000158548355 + "p50": 79.3600007891655, + "p90": 80.22399991750717, + "p95": 81.28000050783157, + "p99": 91.90399944782257 }, "roundtrip": { - "p50": 132.192000746727, - "p90": 135.6479972600937, - "p95": 136.3839954137802, - "p99": 147.20000326633453 + "p50": 134.46399569511414, + "p90": 138.20800185203552, + "p95": 139.71200585365295, + "p99": 151.2320041656494 }, "isolatedSum": { - "p50": 147.77600020170212, - "p90": 154.78400141000748, - "p95": 155.87200224399567, - "p99": 165.6000018119812 + "p50": 150.30399709939957, + "p90": 156.70400112867355, + "p95": 158.65600109100342, + "p99": 171.9679981470108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, "recvTokensMax": 178, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3828,35 +4216,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 87.8399983048439, - "p90": 90.30400216579437, - "p95": 91.87199920415878, - "p99": 100.0640019774437 + "p50": 88.8959988951683, + "p90": 91.61599725484848, + "p95": 93.24800223112106, + "p99": 102.94400155544281 }, "combine": { - "p50": 91.2960022687912, - "p90": 93.08800101280212, - "p95": 93.85599941015244, - "p99": 108.12799632549286 + "p50": 92.38400310277939, + "p90": 100.63999891281128, + "p95": 101.6639992594719, + "p99": 104.73600029945374 }, "roundtrip": { - "p50": 157.44000673294067, - "p90": 162.4639928340912, - "p95": 163.71199488639832, - "p99": 168.89600455760956 + "p50": 161.31199896335602, + "p90": 165.0879979133606, + "p95": 166.46400094032288, + "p99": 185.7919991016388 }, "isolatedSum": { - "p50": 179.1360005736351, - "p90": 183.3920031785965, - "p95": 185.72799861431122, - "p99": 208.19199830293655 + "p50": 181.2800019979477, + "p90": 192.25599616765976, + "p95": 194.91200149059296, + "p99": 207.68000185489655 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3865,35 +4253,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 93.9520001411438, - "p90": 98.39999675750732, - "p95": 100.00000149011612, - "p99": 105.53599894046783 + "p50": 95.58399766683578, + "p90": 100.67199915647507, + "p95": 101.79200023412704, + "p99": 108.15999656915665 }, "combine": { - "p50": 115.29599875211716, - "p90": 116.12799763679504, - "p95": 116.48000031709671, - "p99": 127.87200510501862 + "p50": 115.64800143241882, + "p90": 116.57600104808807, + "p95": 117.3119992017746, + "p99": 128.00000607967377 }, "roundtrip": { - "p50": 193.08799505233765, - "p90": 199.90399479866028, - "p95": 201.50400698184967, - "p99": 214.1759991645813 + "p50": 197.05599546432495, + "p90": 200.95999538898468, + "p95": 202.84800231456757, + "p99": 227.90400683879852 }, "isolatedSum": { - "p50": 209.24799889326096, - "p90": 214.52799439430237, - "p95": 216.48000180721283, - "p99": 233.40800404548645 + "p50": 211.2319990992546, + "p90": 217.24800020456314, + "p95": 219.10399943590164, + "p99": 236.1600026488304 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 7, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3901,32 +4289,33 @@ ] }, { - "id": "cx-8481f6a4", - "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_63f1354f", - "comparisonKey": "63f9b5a5300d4d4b", + "id": "cx-22c8469b", + "identity": "b300|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_307ed708", + "comparisonKey": "382d98414c6b61e6", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:09:35.317427+00:00", + "generatedAt": "2026-06-27T09:51:28.371280+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_16", + "runner": "b300-nv_02", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "label": "B300 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -3937,14 +4326,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -3953,8 +4342,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -3962,44 +4351,44 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254489726", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", - "createdAt": "2026-06-26T17:30:12Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285688277", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285688277", + "createdAt": "2026-06-27T09:51:28.371280+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 50.303999334573746, - "p90": 52.06400156021118, - "p95": 53.82400006055832, - "p99": 65.05600363016129 + "p50": 57.440001517534256, + "p90": 61.63199990987778, + "p95": 64.64000046253204, + "p99": 82.33600109815598 }, "combine": { - "p50": 66.56000018119812, - "p90": 68.2239979505539, - "p95": 68.76800209283829, - "p99": 77.95199751853943 + "p50": 66.20799750089645, + "p90": 66.880002617836, + "p95": 68.41599941253662, + "p99": 80.32000064849854 }, "roundtrip": { - "p50": 99.84000027179718, - "p90": 103.90400141477585, - "p95": 107.51999914646149, - "p99": 117.11999773979187 + "p50": 107.51999914646149, + "p90": 115.03999680280685, + "p95": 117.40799993276596, + "p99": 124.7360035777092 }, "isolatedSum": { - "p50": 116.86399951577187, - "p90": 120.28799951076508, - "p95": 122.5920021533966, - "p99": 143.0080011487007 + "p50": 123.64799901843071, + "p90": 128.51200252771378, + "p95": 133.05599987506866, + "p99": 162.6560017466545 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4009,34 +4398,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 51.263999193906784, - "p90": 52.89600044488907, - "p95": 55.32800033688545, - "p99": 65.18399715423584 + "p50": 57.631999254226685, + "p90": 60.60799956321716, + "p95": 63.32799792289734, + "p99": 80.70400357246399 }, "combine": { - "p50": 66.97600334882736, - "p90": 68.7360018491745, - "p95": 69.11999732255936, - "p99": 78.11199873685837 + "p50": 66.27199798822403, + "p90": 67.26399809122086, + "p95": 68.12799721956253, + "p99": 78.015998005867 }, "roundtrip": { - "p50": 100.99200159311295, - "p90": 103.26399654150009, - "p95": 105.76000064611435, - "p99": 113.6000007390976 + "p50": 106.81600123643875, + "p90": 109.98400300741196, + "p95": 112.47999966144562, + "p99": 124.79999661445618 }, "isolatedSum": { - "p50": 118.24000254273415, - "p90": 121.63200229406357, - "p95": 124.44799765944481, - "p99": 143.2959958910942 + "p50": 123.90399724245071, + "p90": 127.87199765443802, + "p95": 131.45599514245987, + "p99": 158.720001578331 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4046,35 +4435,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 51.4880008995533, - "p90": 53.408000618219376, - "p95": 54.9440011382103, - "p99": 61.63199990987778 + "p50": 57.95200169086456, + "p90": 61.37600168585777, + "p95": 65.50399959087372, + "p99": 79.42400127649307 }, "combine": { - "p50": 67.6800012588501, - "p90": 69.60000097751617, - "p95": 76.89599692821503, - "p99": 79.16799932718277 + "p50": 66.97600334882736, + "p90": 69.34399902820587, + "p95": 76.67200267314911, + "p99": 89.63199704885483 }, "roundtrip": { - "p50": 108.73600095510483, - "p90": 115.80800265073776, - "p95": 117.0239970088005, - "p99": 124.35200065374374 + "p50": 111.26399785280228, + "p90": 115.90400338172913, + "p95": 119.00799721479416, + "p99": 129.15199995040894 }, "isolatedSum": { - "p50": 119.1680021584034, - "p90": 123.00800159573555, - "p95": 131.83999806642532, - "p99": 140.79999923706055 + "p50": 124.92800503969193, + "p90": 130.72000071406364, + "p95": 142.17600226402283, + "p99": 169.0559983253479 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4083,34 +4472,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 52.639998495578766, - "p90": 55.64799904823303, - "p95": 59.39200147986412, - "p99": 68.00000369548798 + "p50": 58.9120015501976, + "p90": 61.055999249219894, + "p95": 62.81600147485733, + "p99": 81.88799768686295 }, "combine": { - "p50": 68.25599819421768, - "p90": 77.08799839019775, - "p95": 77.60000228881836, - "p99": 78.94399762153625 + "p50": 67.64800101518631, + "p90": 69.63200122117996, + "p95": 76.9599974155426, + "p99": 78.72000336647034 }, "roundtrip": { - "p50": 113.69600147008896, - "p90": 117.66400188207626, - "p95": 118.72000247240067, - "p99": 121.18399888277054 + "p50": 123.6800029873848, + "p90": 125.98399817943573, + "p95": 126.8479973077774, + "p99": 133.18400084972382 }, "isolatedSum": { - "p50": 120.89599668979645, - "p90": 132.7359974384308, - "p95": 136.99200376868248, - "p99": 146.94400131702423 + "p50": 126.56000256538391, + "p90": 130.68800047039986, + "p95": 139.77599889039993, + "p99": 160.60800105333328 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4120,35 +4509,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 61.792001128196716, - "p90": 67.90400296449661, - "p95": 68.67200136184692, - "p99": 71.1359977722168 + "p50": 60.736000537872314, + "p90": 66.91200286149979, + "p95": 68.28799843788147, + "p99": 72.83200323581696 }, "combine": { - "p50": 70.46400010585785, - "p90": 78.40000092983246, - "p95": 78.59200239181519, - "p99": 81.44000172615051 + "p50": 68.76800209283829, + "p90": 77.95199751853943, + "p95": 78.43200117349625, + "p99": 78.78399640321732 }, "roundtrip": { - "p50": 113.18399757146835, - "p90": 115.9679964184761, - "p95": 117.53600090742111, - "p99": 127.87200510501862 + "p50": 119.77600306272507, + "p90": 124.67200309038162, + "p95": 127.10399925708771, + "p99": 141.37600362300873 }, "isolatedSum": { - "p50": 132.25600123405457, - "p90": 146.30400389432907, - "p95": 147.2640037536621, - "p99": 152.5759994983673 + "p50": 129.5040026307106, + "p90": 144.86400038003922, + "p95": 146.71999961137772, + "p99": 151.61599963903427 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4157,34 +4546,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 62.65600025653839, - "p90": 64.92800265550613, - "p95": 66.880002617836, - "p99": 73.69600236415863 + "p50": 69.76000219583511, + "p90": 71.19999825954437, + "p95": 73.79200309515, + "p99": 84.57600325345993 }, "combine": { - "p50": 78.59200239181519, - "p90": 79.74400371313095, - "p95": 80.64000308513641, - "p99": 85.63199639320374 + "p50": 78.49600166082382, + "p90": 79.45600152015686, + "p95": 80.4160013794899, + "p99": 102.33599692583084 }, "roundtrip": { - "p50": 124.28800016641617, - "p90": 127.93600559234619, - "p95": 130.43199479579926, - "p99": 138.5599970817566 + "p50": 130.97600638866425, + "p90": 135.68000495433807, + "p95": 137.1839940547943, + "p99": 148.41599762439728 }, "isolatedSum": { - "p50": 141.24800264835358, - "p90": 144.67200636863708, - "p95": 147.5200057029724, - "p99": 159.32799875736237 + "p50": 148.25600385665894, + "p90": 150.65599977970123, + "p95": 154.2080044746399, + "p99": 186.91200017929077 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4194,34 +4583,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 75.77600330114365, - "p90": 83.16799998283386, - "p95": 83.96799862384796, - "p99": 96.3520035147667 + "p50": 86.2400010228157, + "p90": 89.4400030374527, + "p95": 90.59199690818787, + "p99": 99.71199929714203 }, "combine": { - "p50": 91.48799628019333, - "p90": 93.6959981918335, - "p95": 95.90400010347366, - "p99": 104.76800054311752 + "p50": 91.61599725484848, + "p90": 93.56799721717834, + "p95": 95.0080007314682, + "p99": 104.3199971318245 }, "roundtrip": { - "p50": 150.11200308799744, - "p90": 153.28000485897064, - "p95": 154.91199493408203, - "p99": 159.96800363063812 + "p50": 160.288006067276, + "p90": 166.4319932460785, + "p95": 173.34400117397308, + "p99": 184.86399948596954 }, "isolatedSum": { - "p50": 167.26399958133698, - "p90": 176.86399817466736, - "p95": 179.87199872732162, - "p99": 201.12000405788422 + "p50": 177.85599827766418, + "p90": 183.00800025463104, + "p95": 185.59999763965607, + "p99": 204.03199642896652 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4231,34 +4620,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 87.36000210046768, - "p90": 89.31200206279755, - "p95": 92.3520028591156, - "p99": 98.36799651384354 + "p50": 94.14400160312653, + "p90": 97.50399738550186, + "p95": 99.87200051546097, + "p99": 113.76000195741653 }, "combine": { - "p50": 115.32799899578094, - "p90": 115.9679964184761, - "p95": 117.21599847078323, - "p99": 126.49600207805634 + "p50": 115.26399850845337, + "p90": 115.93600362539291, + "p95": 117.18399822711945, + "p99": 131.20000064373016 }, "roundtrip": { - "p50": 186.14399433135986, - "p90": 191.67999923229218, - "p95": 193.05600225925446, - "p99": 199.072003364563 + "p50": 192.89599359035492, + "p90": 198.68800044059753, + "p95": 200.19200444221497, + "p99": 209.18400585651398 }, "isolatedSum": { - "p50": 202.68800109624863, - "p90": 205.27999848127365, - "p95": 209.56800132989883, - "p99": 224.86399859189987 + "p50": 209.4080001115799, + "p90": 213.44000101089478, + "p95": 217.0559987425804, + "p99": 244.9600026011467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4267,50 +4656,51 @@ ] }, { - "id": "cx-1911c35d", - "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_eee29686", - "comparisonKey": "37f5e47990ede677", + "id": "cx-a22ca77b", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", + "colorKey": "b300_77566238", + "comparisonKey": "08fb0b4fb4077abb", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:41:38.976776+00:00", + "generatedAt": "2026-06-26T23:58:04.079730+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_01", + "runner": "b300-nv_02", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm)", + "label": "B300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -4319,8 +4709,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "2279937619f3971", + "workloadId": "set:4:7af12818400d6348", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -4328,118 +4718,44 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254479346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", - "createdAt": "2026-06-26T17:30:02Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271873027", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271873027", + "createdAt": "2026-06-26T23:58:04.079730+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.03199824690819, - "p90": 58.240000158548355, - "p95": 59.61599946022034, - "p99": 69.56800073385239 + "p50": 56.41600117087364, + "p90": 58.848001062870026, + "p95": 61.216000467538834, + "p99": 80.25600016117096 }, "combine": { - "p50": 61.40799820423126, - "p90": 63.4239986538887, - "p95": 64.35199826955795, - "p99": 77.53600180149078 - }, - "roundtrip": { - "p50": 121.18399888277054, - "p90": 123.4240010380745, - "p95": 124.64000284671783, - "p99": 131.48799538612366 - }, - "isolatedSum": { - "p50": 117.43999645113945, - "p90": 121.66399881243706, - "p95": 123.96799772977829, - "p99": 147.10400253534317 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 56.92800134420395, - "p90": 59.23200026154518, - "p95": 60.19200012087822, - "p99": 68.4799998998642 - }, - "combine": { - "p50": 62.24000081419945, - "p90": 64.19199705123901, - "p95": 65.05600363016129, - "p99": 69.69600170850754 - }, - "roundtrip": { - "p50": 122.65600264072418, - "p90": 124.79999661445618, - "p95": 125.98399817943573, - "p99": 135.1040005683899 - }, - "isolatedSum": { - "p50": 119.1680021584034, - "p90": 123.4239973127842, - "p95": 125.2480037510395, - "p99": 138.17600160837173 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 57.631999254226685, - "p90": 60.03199890255928, - "p95": 61.37600168585777, - "p99": 67.16799736022949 - }, - "combine": { - "p50": 63.93600255250931, - "p90": 65.43999910354614, - "p95": 65.88800251483917, - "p99": 69.023996591568 + "p50": 67.6800012588501, + "p90": 69.60000097751617, + "p95": 76.73600316047668, + "p99": 82.62400329113007 }, "roundtrip": { - "p50": 125.50400197505951, - "p90": 128.51199507713318, - "p95": 132.06399977207184, - "p99": 143.10400187969208 + "p50": 106.49599879980087, + "p90": 109.27999764680862, + "p95": 111.13599687814713, + "p99": 124.1919994354248 }, "isolatedSum": { - "p50": 121.56800180673599, - "p90": 125.47199800610542, - "p95": 127.26400420069695, - "p99": 136.19199395179749 + "p50": 124.09600242972374, + "p90": 128.4480020403862, + "p95": 137.95200362801552, + "p99": 162.88000345230103 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4449,145 +4765,71 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.487998485565186, - "p90": 61.88800185918808, - "p95": 62.81600147485733, - "p99": 73.2479989528656 + "p50": 58.43200162053108, + "p90": 60.70400029420853, + "p95": 62.6240000128746, + "p99": 78.65600287914276 }, "combine": { - "p50": 66.46399945020676, - "p90": 67.80800223350525, - "p95": 68.89600306749344, - "p99": 71.71200215816498 + "p50": 77.98399776220322, + "p90": 78.72000336647034, + "p95": 78.84799689054489, + "p99": 81.4720019698143 }, "roundtrip": { - "p50": 128.60800325870514, - "p90": 130.65600395202637, - "p95": 131.80799782276154, - "p99": 144.3520039319992 + "p50": 118.07999759912491, + "p90": 122.91199713945389, + "p95": 124.1919994354248, + "p99": 131.99999928474426 }, "isolatedSum": { - "p50": 125.95199793577194, - "p90": 129.69600409269333, - "p95": 131.71200454235077, - "p99": 144.96000111103058 + "p50": 136.4159993827343, + "p90": 139.42400366067886, + "p95": 141.4719969034195, + "p99": 160.12800484895706 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 60.67200005054474, - "p90": 62.880001962184906, - "p95": 63.74400109052658, - "p99": 69.82400268316269 - }, - "combine": { - "p50": 67.64800101518631, - "p90": 69.63200122117996, - "p95": 70.91200351715088, - "p99": 79.71200346946716 - }, - "roundtrip": { - "p50": 130.87999820709229, - "p90": 133.15199315547943, - "p95": 134.43200290203094, - "p99": 141.88799262046814 - }, - "isolatedSum": { - "p50": 128.32000106573105, - "p90": 132.51200318336487, - "p95": 134.65600460767746, - "p99": 149.53600615262985 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, { "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 63.61600011587143, - "p90": 65.47199934720993, - "p95": 66.23999774456024, - "p99": 72.54400104284286 - }, - "combine": { - "p50": 72.31999933719635, - "p90": 74.14399832487106, - "p95": 75.23199915885925, - "p99": 79.6160027384758 - }, - "roundtrip": { - "p50": 142.87999272346497, - "p90": 145.85599303245544, - "p95": 147.16799557209015, - "p99": 155.29599785804749 - }, - "isolatedSum": { - "p50": 135.93599945306778, - "p90": 139.615997672081, - "p95": 141.4719969034195, - "p99": 152.16000378131866 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 72.64000177383423, - "p90": 75.3600001335144, - "p95": 76.51200145483017, - "p99": 82.65600353479385 + "p50": 69.82400268316269, + "p90": 71.87200337648392, + "p95": 73.7600028514862, + "p99": 84.25600081682205 }, "combine": { - "p50": 87.90399879217148, - "p90": 90.08000046014786, - "p95": 90.84799885749817, - "p99": 101.15200281143188 + "p50": 79.16799932718277, + "p90": 81.08799904584885, + "p95": 81.91999793052673, + "p99": 90.71999788284302 }, "roundtrip": { - "p50": 172.83199727535248, - "p90": 175.4239946603775, - "p95": 176.41599476337433, - "p99": 181.43999576568604 + "p50": 133.82400572299957, + "p90": 140.09599387645721, + "p95": 141.92000031471252, + "p99": 145.82400023937225 }, "isolatedSum": { - "p50": 160.5440005660057, - "p90": 165.44000059366226, - "p95": 167.36000031232834, - "p99": 183.80800634622574 + "p50": 148.99200201034546, + "p90": 152.96000242233276, + "p95": 155.68000078201294, + "p99": 174.97599869966507 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4597,34 +4839,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 83.20000022649765, - "p90": 86.07999980449677, - "p95": 87.0399996638298, - "p99": 90.17600119113922 + "p50": 105.76000064611435, + "p90": 107.71200060844421, + "p95": 109.02400314807892, + "p99": 114.78400230407715 }, "combine": { - "p50": 108.70400071144104, - "p90": 110.97600311040878, - "p95": 112.06399649381638, - "p99": 116.41599982976913 + "p50": 130.36799430847168, + "p90": 139.615997672081, + "p95": 140.03199338912964, + "p99": 143.13599467277527 }, "roundtrip": { - "p50": 218.07999908924103, - "p90": 221.343994140625, - "p95": 222.97599911689758, - "p99": 235.52000522613525 + "p50": 230.68800568580627, + "p90": 234.52800512313843, + "p95": 235.55199801921844, + "p99": 240.09600281715393 }, "isolatedSum": { - "p50": 191.9040009379387, - "p90": 197.05600291490555, - "p95": 199.10399615764618, - "p99": 206.59200102090836 + "p50": 236.12799495458603, + "p90": 247.3279982805252, + "p95": 249.05599653720856, + "p99": 257.9199969768524 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4633,50 +4875,51 @@ ] }, { - "id": "cx-fe6f5351", - "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_84b10b26", - "comparisonKey": "abf92acc41d9d301", + "id": "cx-42672aa9", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_77566238", + "comparisonKey": "3fe3497798f4d1dd", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:10:48.557544+00:00", + "generatedAt": "2026-06-27T09:48:00.348230+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_01", + "runner": "b300-nv_08", "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "label": "B300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -4685,8 +4928,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -4694,45 +4937,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254499301", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", - "createdAt": "2026-06-26T17:30:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285609982", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285609982", + "createdAt": "2026-06-27T09:48:00.348230+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 49.56800118088722, - "p90": 51.263999193906784, - "p95": 51.83999985456467, - "p99": 57.920001447200775 + "p50": 57.50399827957153, + "p90": 59.20000001788139, + "p95": 61.055999249219894, + "p99": 68.60800087451935 }, "combine": { - "p50": 62.24000081419945, - "p90": 63.680000603199005, - "p95": 64.51199948787689, - "p99": 66.3679987192154 + "p50": 67.74400174617767, + "p90": 69.66400146484375, + "p95": 76.83199644088745, + "p99": 81.40800148248672 }, "roundtrip": { - "p50": 114.78400230407715, - "p90": 116.86400324106216, - "p95": 118.01599711179733, - "p99": 126.68800354003906 + "p50": 107.87200182676315, + "p90": 110.11199653148651, + "p95": 112.28799819946289, + "p99": 123.9359974861145 }, "isolatedSum": { - "p50": 111.80800199508667, - "p90": 114.94399979710579, - "p95": 116.35199934244156, - "p99": 124.28800016641617 + "p50": 125.2480000257492, + "p90": 128.86400148272514, + "p95": 137.88799569010735, + "p99": 150.01600235700607 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4741,35 +4984,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 49.92000013589859, - "p90": 51.80799961090088, - "p95": 52.76799947023392, - "p99": 58.9120015501976 + "p50": 57.760000228881836, + "p90": 60.095999389886856, + "p95": 62.111999839544296, + "p99": 75.6480023264885 }, "combine": { - "p50": 63.040003180503845, - "p90": 64.89600241184235, - "p95": 65.24799764156342, - "p99": 74.11199808120728 + "p50": 67.96800345182419, + "p90": 76.64000242948532, + "p95": 77.44000107049942, + "p99": 78.72000336647034 }, "roundtrip": { - "p50": 116.64000153541565, - "p90": 119.00799721479416, - "p95": 121.08799815177917, - "p99": 136.57599687576294 + "p50": 117.40799993276596, + "p90": 123.19999933242798, + "p95": 123.87199699878693, + "p99": 141.27999544143677 }, "isolatedSum": { - "p50": 112.96000331640244, - "p90": 116.70400202274323, - "p95": 118.01599711179733, - "p99": 133.02399963140488 + "p50": 125.72800368070602, + "p90": 136.73600181937218, + "p95": 139.55200091004372, + "p99": 154.36800569295883 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4778,35 +5021,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 51.42400041222572, - "p90": 53.63199859857559, - "p95": 54.655998945236206, - "p99": 65.76000154018402 + "p50": 59.42400172352791, + "p90": 61.43999844789505, + "p95": 63.29599767923355, + "p99": 74.78400319814682 }, "combine": { - "p50": 63.10400366783142, - "p90": 64.96000289916992, - "p95": 65.63200056552887, - "p99": 75.93599706888199 + "p50": 77.85599678754807, + "p90": 78.68800312280655, + "p95": 78.97599786520004, + "p99": 93.82399916648865 }, "roundtrip": { - "p50": 117.53600090742111, - "p90": 119.87199634313583, - "p95": 120.86399644613266, - "p99": 132.192000746727 + "p50": 119.61600184440613, + "p90": 124.51200187206268, + "p95": 125.76000392436981, + "p99": 132.1599930524826 }, "isolatedSum": { - "p50": 114.52800408005714, - "p90": 118.59200149774551, - "p95": 120.28799951076508, - "p99": 141.695998609066 + "p50": 137.27999851107597, + "p90": 140.1280015707016, + "p95": 142.2719955444336, + "p99": 168.60800236463547 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4815,35 +5058,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 52.41600051522255, - "p90": 54.496001452207565, - "p95": 55.71199953556061, - "p99": 60.5119988322258 + "p50": 59.776000678539276, + "p90": 62.04799935221672, + "p95": 63.10400366783142, + "p99": 72.86400347948074 }, "combine": { - "p50": 65.72800129652023, - "p90": 67.48799979686737, - "p95": 67.9360032081604, - "p99": 73.21599870920181 + "p50": 78.3040001988411, + "p90": 78.72000336647034, + "p95": 78.97599786520004, + "p99": 82.14399963617325 }, "roundtrip": { - "p50": 122.04799801111221, - "p90": 124.38400089740753, - "p95": 126.52799487113953, - "p99": 147.16799557209015 + "p50": 119.84000355005264, + "p90": 122.75200337171555, + "p95": 125.37600100040436, + "p99": 154.40000593662262 }, "isolatedSum": { - "p50": 118.14400181174278, - "p90": 121.98400124907494, - "p95": 123.64800274372101, - "p99": 133.7279975414276 + "p50": 138.08000087738037, + "p90": 140.76800271868706, + "p95": 142.08000153303146, + "p99": 155.008003115654 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4852,35 +5095,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 53.69599908590317, - "p90": 55.84000051021576, - "p95": 56.86400085687637, - "p99": 65.0240033864975 + "p50": 72.67200201749802, + "p90": 76.4480009675026, + "p95": 77.27999985218048, + "p99": 82.75199681520462 }, "combine": { - "p50": 67.16799736022949, - "p90": 68.9919963479042, - "p95": 69.69600170850754, - "p99": 77.98399776220322 + "p50": 78.40000092983246, + "p90": 78.87999713420868, + "p95": 79.26400005817413, + "p99": 82.65600353479385 }, "roundtrip": { - "p50": 123.36000055074692, - "p90": 125.66399574279785, - "p95": 127.16799974441528, - "p99": 140.70400595664978 + "p50": 121.40800058841705, + "p90": 127.03999876976013, + "p95": 128.7039965391159, + "p99": 145.1839953660965 }, "isolatedSum": { - "p50": 120.86399644613266, - "p90": 124.83199685811996, - "p95": 126.56000256538391, - "p99": 143.0080011487007 + "p50": 151.07200294733047, + "p90": 155.32799810171127, + "p95": 156.54399991035461, + "p99": 165.40800034999847 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4889,35 +5132,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 57.66399949789047, - "p90": 59.776000678539276, - "p95": 60.63999980688095, - "p99": 65.72800129652023 + "p50": 70.94399631023407, + "p90": 72.76800274848938, + "p95": 74.17599856853485, + "p99": 80.99199831485748 }, "combine": { - "p50": 72.89600372314453, - "p90": 74.14399832487106, - "p95": 75.55200159549713, - "p99": 83.96799862384796 + "p50": 80.06399869918823, + "p90": 81.66400343179703, + "p95": 89.24800157546997, + "p99": 106.36799782514572 }, "roundtrip": { - "p50": 138.40000331401825, - "p90": 140.60799777507782, - "p95": 141.66399836540222, - "p99": 149.53599870204926 + "p50": 134.33599472045898, + "p90": 141.4719969034195, + "p95": 143.36000382900238, + "p99": 156.25600516796112 }, "isolatedSum": { - "p50": 130.560003221035, - "p90": 133.91999900341034, - "p95": 136.19200140237808, - "p99": 149.6959999203682 + "p50": 151.0079950094223, + "p90": 154.4320061802864, + "p95": 163.42400014400482, + "p99": 187.3599961400032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4926,35 +5169,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 66.04799628257751, - "p90": 68.15999746322632, - "p95": 69.88800317049026, - "p99": 74.91199672222137 + "p50": 85.53600311279297, + "p90": 89.05600011348724, + "p95": 92.25600212812424, + "p99": 105.76000064611435 }, "combine": { - "p50": 87.93599903583527, - "p90": 90.08000046014786, - "p95": 91.74399822950363, - "p99": 98.24000298976898 + "p50": 94.08000111579895, + "p90": 103.04000228643417, + "p95": 103.29599678516388, + "p99": 114.30399864912033 }, "roundtrip": { - "p50": 164.76799547672272, - "p90": 167.42399334907532, - "p95": 169.3120002746582, - "p99": 185.92000007629395 + "p50": 169.11999881267548, + "p90": 172.63999581336975, + "p95": 174.75199699401855, + "p99": 194.17600333690643 }, "isolatedSum": { - "p50": 153.98399531841278, - "p90": 158.23999792337418, - "p95": 161.6320013999939, - "p99": 173.15199971199036 + "p50": 179.61600422859192, + "p90": 192.09600239992142, + "p95": 195.55199891328812, + "p99": 220.06399929523468 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4963,34 +5206,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 77.63200253248215, - "p90": 80.1599994301796, - "p95": 81.50400221347809, - "p99": 98.65599870681763 + "p50": 107.13600367307663, + "p90": 125.88800489902496, + "p95": 139.71200585365295, + "p99": 175.55199563503265 }, "combine": { - "p50": 108.35199803113937, - "p90": 110.78400164842606, - "p95": 111.84000223875046, - "p99": 126.01600587368011 + "p50": 131.3920021057129, + "p90": 139.90400731563568, + "p95": 140.09599387645721, + "p99": 151.61600708961487 }, "roundtrip": { - "p50": 211.42399311065674, - "p90": 214.52799439430237, - "p95": 215.87200462818146, - "p99": 223.1999933719635 + "p50": 231.7119985818863, + "p90": 236.28799617290497, + "p95": 238.75199258327484, + "p99": 258.2719922065735 }, "isolatedSum": { - "p50": 185.98400056362152, - "p90": 190.94400107860565, - "p95": 193.34400445222855, - "p99": 224.67200458049774 + "p50": 238.52800577878952, + "p90": 265.79201221466064, + "p95": 279.80799973011017, + "p99": 327.1680027246475 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -4999,18 +5242,18 @@ ] }, { - "id": "cx-238797ce", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", - "colorKey": "b300_c9569580", - "comparisonKey": "c4fbb2dad9521e3e", + "id": "cx-c5ecae32", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_a314501b", + "comparisonKey": "a145623f8abcc709", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:38.465863+00:00", + "generatedAt": "2026-06-26T23:58:12.406102+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_13", + "runner": "b300-nv_06", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -5020,13 +5263,14 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -5051,8 +5295,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "157ca81687ddb63", - "workloadId": "set:3:a426d66e479dc893", + "traceSignature": "d02a66236b524b8", + "workloadId": "set:4:2eebbed158fe1320", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -5060,119 +5304,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271869301", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271869301", - "createdAt": "2026-06-26T23:56:14Z", + "id": "28271879618", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271879618", + "createdAt": "2026-06-26T23:58:12.406102+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 93.66399794816971, - "p90": 99.42399710416794, - "p95": 101.24800354242325, - "p99": 112.15999722480774 + "p50": 62.97600269317627, + "p90": 65.21599739789963, + "p95": 66.01600348949432, + "p99": 75.74400305747986 }, "combine": { - "p50": 115.7120019197464, - "p90": 116.54400080442429, - "p95": 117.47200042009354, - "p99": 128.7039965391159 + "p50": 54.336000233888626, + "p90": 55.26399984955788, + "p95": 56.60799890756607, + "p99": 65.5359998345375 }, "roundtrip": { - "p50": 195.3279972076416, - "p90": 199.072003364563, - "p95": 200.57600736618042, - "p99": 214.1440063714981 + "p50": 94.94400024414062, + "p90": 98.27200323343277, + "p95": 100.63999891281128, + "p99": 111.93600296974182 }, "isolatedSum": { - "p50": 209.3759998679161, - "p90": 215.96799790859222, - "p95": 218.72000396251678, - "p99": 240.86399376392365 + "p50": 117.3120029270649, + "p90": 120.4799972474575, + "p95": 122.6240023970604, + "p99": 141.28000289201736 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 193.7599927186966, - "p90": 200.3519982099533, - "p95": 202.94399559497833, - "p99": 209.75999534130096 + "p50": 59.39200147986412, + "p90": 61.63199990987778, + "p95": 62.65600025653839, + "p99": 71.68000191450119 }, "combine": { - "p50": 272.92799949645996, - "p90": 275.04000067710876, - "p95": 275.6800055503845, - "p99": 289.4720137119293 + "p50": 56.73599988222122, + "p90": 65.34399837255478, + "p95": 65.95200300216675, + "p99": 85.4400023818016 }, "roundtrip": { - "p50": 434.5279932022095, - "p90": 444.95999813079834, - "p95": 448.1920003890991, - "p99": 461.37601137161255 + "p50": 108.57599973678589, + "p90": 113.56800049543381, + "p95": 114.84800279140472, + "p99": 120.12799829244614 }, "isolatedSum": { - "p50": 466.68799221515656, - "p90": 475.3919988870621, - "p95": 478.62400114536285, - "p99": 499.2320090532303 + "p50": 116.12800136208534, + "p90": 126.97599828243256, + "p95": 128.60800325870514, + "p99": 157.1200042963028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 577.6960253715515, - "p90": 582.6879739761353, - "p95": 584.6400260925293, - "p99": 595.7120060920715 + "p50": 70.72000205516815, + "p90": 76.57600194215775, + "p95": 77.88799703121185, + "p99": 85.31200140714645 }, "combine": { - "p50": 818.336009979248, - "p90": 828.4479975700378, - "p95": 838.3679986000061, - "p99": 852.6399731636047 + "p50": 66.6240006685257, + "p90": 67.32799857854843, + "p95": 67.61600077152252, + "p99": 78.84799689054489 }, "roundtrip": { - "p50": 1377.7920007705688, - "p90": 1387.3920440673828, - "p95": 1397.2480297088623, - "p99": 1410.4640483856201 + "p50": 120.51200121641159, + "p90": 123.99999797344208, + "p95": 124.64000284671783, + "p99": 130.0159990787506 }, "isolatedSum": { - "p50": 1396.0320353507996, - "p90": 1411.135971546173, - "p95": 1423.0080246925354, - "p99": 1448.3519792556763 + "p50": 137.34400272369385, + "p90": 143.90400052070618, + "p95": 145.50399780273438, + "p99": 164.15999829769135 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 70.11199742555618, + "p90": 71.87200337648392, + "p95": 73.79200309515, + "p99": 79.64800298213959 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 70.04799693822861, + "p95": 76.92799717187881, + "p99": 79.1039988398552 + }, + "roundtrip": { + "p50": 122.23999947309494, + "p90": 129.5360028743744, + "p95": 131.32800161838531, + "p99": 142.87999272346497 + }, + "isolatedSum": { + "p50": 138.46399635076523, + "p90": 141.92000031471252, + "p95": 150.7200002670288, + "p99": 158.75200182199478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5180,18 +5461,18 @@ ] }, { - "id": "cx-a989dada", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", - "colorKey": "b300_77566238", - "comparisonKey": "0cdc743c580a47d3", + "id": "cx-db4e17eb", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_592e9a16", + "comparisonKey": "22200746e5037727", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:19.169974+00:00", + "generatedAt": "2026-06-27T09:48:06.153274+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_01", + "runner": "b300-nv_07", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -5201,16 +5482,17 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced", + "label": "B300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 288, "routing": "balanced", - "routingLabel": "balanced", + "routingLabel": "balanced+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -5232,309 +5514,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "9e6ac678a09f7f8", - "workloadId": "set:3:2dad1a73ff872905", + "traceSignature": "f0e66a15078595b", + "workloadId": "set:8:7af12818400d6348", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271876366", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271876366", - "createdAt": "2026-06-26T23:56:28Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285612438", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285612438", + "createdAt": "2026-06-27T09:48:06.153274+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 1816.2239789962769, - "p90": 2297.152042388916, - "p95": 2896.320104598999, - "p99": 3506.6559314727783 + "p50": 57.760000228881836, + "p90": 60.864001512527466, + "p95": 63.13599646091461, + "p99": 69.08799707889557 }, "combine": { - "p50": 1859.1680526733398, - "p90": 2047.4560260772705, - "p95": 2707.1681022644043, - "p99": 3027.2960662841797 + "p50": 55.52000179886818, + "p90": 57.37600103020668, + "p95": 64.44799900054932, + "p99": 66.17599725723267 }, "roundtrip": { - "p50": 1932.8960180282593, - "p90": 2138.335943222046, - "p95": 2772.9599475860596, - "p99": 3193.279981613159 + "p50": 95.29600292444229, + "p90": 98.14400225877762, + "p95": 99.64799880981445, + "p99": 105.05600273609161 }, "isolatedSum": { - "p50": 3675.3920316696167, - "p90": 4344.6080684661865, - "p95": 5603.488206863403, - "p99": 6533.951997756958 + "p50": 113.28000202775002, + "p90": 118.24000254273415, + "p95": 127.58399546146393, + "p99": 135.26399433612823 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 2029.6320915222168, - "p90": 2355.0078868865967, - "p95": 3023.6799716949463, - "p99": 3532.543897628784 + "p50": 57.8560009598732, + "p90": 59.84000116586685, + "p95": 61.72800064086914, + "p99": 72.41600006818771 }, "combine": { - "p50": 2128.671884536743, - "p90": 2460.576057434082, - "p95": 3003.5200119018555, - "p99": 3345.4079627990723 + "p50": 56.76800012588501, + "p90": 65.63200056552887, + "p95": 66.17599725723267, + "p99": 66.94400310516357 }, "roundtrip": { - "p50": 2337.8241062164307, - "p90": 2708.159923553467, - "p95": 3375.744104385376, - "p99": 3673.952102661133 + "p50": 105.34399747848511, + "p90": 112.15999722480774, + "p95": 113.40799927711487, + "p99": 127.26399302482605 }, "isolatedSum": { - "p50": 4158.30397605896, - "p90": 4815.583944320679, - "p95": 6027.199983596802, - "p99": 6877.951860427856 + "p50": 114.62400108575821, + "p90": 125.47200173139572, + "p95": 127.9039978981018, + "p99": 139.3600031733513 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 2545.1838970184326, - "p90": 2883.19993019104, - "p95": 3424.1280555725098, - "p99": 3852.544069290161 + "p50": 57.82400071620941, + "p90": 59.776000678539276, + "p95": 61.535999178886414, + "p99": 68.12799721956253 }, "combine": { - "p50": 2903.520107269287, - "p90": 3124.959945678711, - "p95": 3718.2400226593018, - "p99": 4377.791881561279 + "p50": 65.60000032186508, + "p90": 66.46399945020676, + "p95": 66.97600334882736, + "p99": 77.504001557827 }, "roundtrip": { - "p50": 3660.6719493865967, - "p90": 3928.3199310302734, - "p95": 4631.743907928467, - "p99": 5148.064136505127 + "p50": 111.29599809646606, + "p90": 114.14399743080139, + "p95": 114.84800279140472, + "p99": 123.45600128173828 }, "isolatedSum": { - "p50": 5448.70400428772, - "p90": 6008.159875869751, - "p95": 7142.3680782318115, - "p99": 8230.33595085144 + "p50": 123.4240010380745, + "p90": 126.24000012874603, + "p95": 128.51200252771378, + "p99": 145.63199877738953 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-092ff174", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", - "colorKey": "b300_a314501b", - "comparisonKey": "c51826952291f0ba", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:58.409823+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "7aa44c7b86748b9", - "workloadId": "set:3:388ff74baef05c72", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271883343", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271883343", - "createdAt": "2026-06-26T23:56:42Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 69.37599927186966, - "p90": 71.03999704122543, - "p95": 73.37599992752075, - "p99": 81.69600367546082 + "p50": 59.23200026154518, + "p90": 60.864001512527466, + "p95": 62.55999952554703, + "p99": 69.18399780988693 }, "combine": { - "p50": 67.61600077152252, - "p90": 69.60000097751617, - "p95": 77.02399790287018, - "p99": 83.39200168848038 + "p50": 65.88800251483917, + "p90": 66.59200042486191, + "p95": 66.94400310516357, + "p99": 69.5360004901886 }, "roundtrip": { - "p50": 119.93599683046341, - "p90": 126.01600587368011, - "p95": 128.48000228405, - "p99": 135.55200397968292 + "p50": 107.07200318574905, + "p90": 109.50399935245514, + "p95": 111.29599809646606, + "p99": 122.52800166606903 }, "isolatedSum": { - "p50": 136.99200004339218, - "p90": 140.6399980187416, - "p95": 150.39999783039093, - "p99": 165.0880053639412 + "p50": 125.12000277638435, + "p90": 127.45600193738937, + "p95": 129.5040026307106, + "p99": 138.71999830007553 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 93.98400038480759, - "p90": 98.68799895048141, - "p95": 100.28800368309021, - "p99": 105.72800040245056 + "p50": 60.575999319553375, + "p90": 63.64800035953522, + "p95": 66.20799750089645, + "p99": 75.58400183916092 }, "combine": { - "p50": 115.52000045776367, - "p90": 116.5120005607605, - "p95": 116.73600226640701, - "p99": 123.48800152540207 + "p50": 66.17599725723267, + "p90": 66.97600334882736, + "p95": 67.19999760389328, + "p99": 70.14399766921997 }, "roundtrip": { - "p50": 193.08799505233765, - "p90": 197.88800179958344, - "p95": 198.59200716018677, - "p99": 204.0960043668747 + "p50": 108.09600353240967, + "p90": 110.20799726247787, + "p95": 112.2559979557991, + "p99": 118.94399672746658 }, "isolatedSum": { - "p50": 209.50400084257126, - "p90": 215.1999995112419, - "p95": 217.02400594949722, - "p99": 229.21600192785263 + "p50": 126.75199657678604, + "p90": 130.62400370836258, + "p95": 133.40799510478973, + "p99": 145.7279995083809 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 1, - "recvTokensMax": 512, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 197.37599790096283, - "p90": 199.96799528598785, - "p95": 200.80000162124634, - "p99": 207.10399746894836 + "p50": 60.67200005054474, + "p90": 62.752000987529755, + "p95": 64.03200328350067, + "p99": 73.95199686288834 }, "combine": { - "p50": 248.1600046157837, - "p90": 249.9839961528778, - "p95": 250.68798661231995, - "p99": 253.79198789596558 + "p50": 66.23999774456024, + "p90": 67.1359971165657, + "p95": 67.61600077152252, + "p99": 78.14399898052216 }, "roundtrip": { - "p50": 429.8880100250244, - "p90": 434.30399894714355, - "p95": 436.2879991531372, - "p99": 442.84799695014954 + "p50": 108.89600217342377, + "p90": 111.39199882745743, + "p95": 113.69600147008896, + "p99": 122.52800166606903 }, "isolatedSum": { - "p50": 445.5360025167465, - "p90": 449.95199143886566, - "p95": 451.4879882335663, - "p99": 460.89598536491394 + "p50": 126.91199779510498, + "p90": 129.88799810409546, + "p95": 131.6480040550232, + "p99": 152.0959958434105 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 1, - "recvTokensMax": 2048, - "stragglerRank": 7, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.64000242948532, + "p90": 78.52800190448761, + "p95": 79.29600030183792, + "p99": 89.37600255012512 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 76.80000364780426, + "p95": 77.47200131416321, + "p99": 79.39200103282928 + }, + "roundtrip": { + "p50": 124.25599992275238, + "p90": 128.9599984884262, + "p95": 129.7920048236847, + "p99": 141.59999787807465 + }, + "isolatedSum": { + "p50": 144.6400061249733, + "p90": 155.32800555229187, + "p95": 156.76800161600113, + "p99": 168.7680035829544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.18399977684021, + "p90": 87.55200356245041, + "p95": 89.47200328111649, + "p99": 95.74399888515472 + }, + "combine": { + "p50": 81.98399841785431, + "p90": 90.7839983701706, + "p95": 91.0400003194809, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 146.08000218868256, + "p90": 148.28799664974213, + "p95": 150.81599354743958, + "p99": 159.743994474411 + }, + "isolatedSum": { + "p50": 163.16799819469452, + "p90": 178.336001932621, + "p95": 180.51200360059738, + "p99": 198.5279992222786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5542,18 +5828,18 @@ ] }, { - "id": "cx-eac6e215", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "id": "cx-72792847", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", "colorKey": "b300_5b993222", - "comparisonKey": "d3d6cc25fee96bc7", + "comparisonKey": "10e590b8f933d382", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:52.035249+00:00", + "generatedAt": "2026-06-26T23:58:30.886921+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_09", + "runner": "b300-nv_10", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -5564,6 +5850,7 @@ "worldSize": 8, "epSize": 8, "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -5594,8 +5881,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "38fd0bcf7109c32", - "workloadId": "set:3:b952d4a43d688b50", + "traceSignature": "2ad5ef98d328fa1", + "workloadId": "set:4:286be993cd819ed9", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -5603,119 +5890,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271903494", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271903494", - "createdAt": "2026-06-26T23:57:23Z", + "id": "28271900377", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271900377", + "createdAt": "2026-06-26T23:58:30.886921+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 106.08000308275223, - "p90": 107.87200182676315, - "p95": 109.15199667215347, - "p99": 120.28799951076508 + "p50": 56.96000158786774, + "p90": 59.10399928689003, + "p95": 62.272001057863235, + "p99": 71.68000191450119 }, "combine": { - "p50": 127.83999741077423, - "p90": 129.85600531101227, - "p95": 130.97600638866425, - "p99": 139.5840048789978 + "p50": 66.39999896287918, + "p90": 67.07199662923813, + "p95": 67.45599955320358, + "p99": 90.17600119113922 }, "roundtrip": { - "p50": 219.39200162887573, - "p90": 224.16000068187714, - "p95": 225.055992603302, - "p99": 235.35999655723572 + "p50": 106.91200196743011, + "p90": 113.40799927711487, + "p95": 117.18399822711945, + "p99": 195.77600061893463 }, "isolatedSum": { - "p50": 233.92000049352646, - "p90": 237.72800713777542, - "p95": 240.12800306081772, - "p99": 259.8720043897629 + "p50": 123.36000055074692, + "p90": 126.17599591612816, + "p95": 129.72800061106682, + "p99": 161.8560031056404 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 217.95199811458588, - "p90": 224.03199970722198, - "p95": 228.83200645446777, - "p99": 252.70399451255798 + "p50": 59.58399921655655, + "p90": 62.65600025653839, + "p95": 65.34399837255478, + "p99": 81.85599744319916 }, "combine": { - "p50": 336.38399839401245, - "p90": 338.49599957466125, - "p95": 339.9040102958679, - "p99": 348.4160006046295 + "p50": 68.00000369548798, + "p90": 77.11999863386154, + "p95": 77.79199630022049, + "p99": 79.9039974808693 }, "roundtrip": { - "p50": 535.8399748802185, - "p90": 546.0159778594971, - "p95": 551.3280034065247, - "p99": 558.3680272102356 + "p50": 122.36800044775009, + "p90": 125.791996717453, + "p95": 127.71199643611908, + "p99": 145.82400023937225 }, "isolatedSum": { - "p50": 554.3359965085983, - "p90": 562.5279992818832, - "p95": 568.7360167503357, - "p99": 601.1199951171875 + "p50": 127.58400291204453, + "p90": 139.77599889039993, + "p95": 143.13599467277527, + "p99": 161.75999492406845 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 685.0559711456299, - "p90": 694.5599913597107, - "p95": 696.3199973106384, - "p99": 705.3760290145874 + "p50": 69.63200122117996, + "p90": 75.32799988985062, + "p95": 77.27999985218048, + "p99": 98.08000177145004 }, "combine": { - "p50": 1085.4400396347046, - "p90": 1086.3360166549683, - "p95": 1087.6480340957642, - "p99": 1096.7680215835571 + "p50": 78.62400263547897, + "p90": 79.26400005817413, + "p95": 79.45600152015686, + "p99": 89.75999802350998 }, "roundtrip": { - "p50": 1752.511978149414, - "p90": 1760.3199481964111, - "p95": 1762.0480060577393, - "p99": 1772.6080417633057 + "p50": 133.53599607944489, + "p90": 137.15200126171112, + "p95": 138.5280042886734, + "p99": 155.10399639606476 }, "isolatedSum": { - "p50": 1770.4960107803345, - "p90": 1780.896008014679, - "p95": 1783.9680314064026, - "p99": 1802.1440505981445 + "p50": 148.25600385665894, + "p90": 154.59199994802475, + "p95": 156.73600137233734, + "p99": 187.83999979496002 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 4, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.39199751615524, + "p90": 104.96000200510025, + "p95": 106.62399977445602, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 127.80800461769104, + "p90": 129.2160004377365, + "p95": 130.5920034646988, + "p99": 150.62400698661804 + }, + "roundtrip": { + "p50": 215.87200462818146, + "p90": 223.07200729846954, + "p95": 224.7679978609085, + "p99": 231.32799565792084 + }, + "isolatedSum": { + "p50": 231.20000213384628, + "p90": 234.17600244283676, + "p95": 237.21600323915482, + "p99": 261.4400088787079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5723,18 +6047,18 @@ ] }, { - "id": "cx-4cb883eb", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", - "colorKey": "b300_8d2811e3", - "comparisonKey": "c2361bc487e04e6e", + "id": "cx-f390f28a", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_5b993222", + "comparisonKey": "82de9b5581f31438", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:36.475166+00:00", + "generatedAt": "2026-06-27T09:50:17.677386+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_15", + "runner": "b300-nv_03", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -5744,13 +6068,14 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -5775,8 +6100,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "4caecd33bedf786", - "workloadId": "set:3:830e36e88869e222", + "traceSignature": "b6caf944f6bb621", + "workloadId": "set:8:286be993cd819ed9", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -5784,300 +6109,304 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271889990", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271889990", - "createdAt": "2026-06-26T23:56:56Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285661360", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285661360", + "createdAt": "2026-06-27T09:50:17.677386+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 103.87200117111206, - "p90": 106.88000172376633, - "p95": 109.3439981341362, - "p99": 126.62400305271149 + "p50": 56.48000165820122, + "p90": 58.88000130653381, + "p95": 61.37600168585777, + "p99": 68.89600306749344 }, "combine": { - "p50": 126.91199779510498, - "p90": 128.1919926404953, - "p95": 128.57599556446075, - "p99": 139.615997672081 + "p50": 66.17599725723267, + "p90": 66.78400188684464, + "p95": 67.32799857854843, + "p99": 69.95200365781784 }, "roundtrip": { - "p50": 209.6640020608902, - "p90": 213.95200490951538, - "p95": 215.488001704216, - "p99": 220.47999501228333 + "p50": 105.56799918413162, + "p90": 112.19199746847153, + "p95": 112.70400136709213, + "p99": 120.7360029220581 }, "isolatedSum": { - "p50": 230.78399896621704, - "p90": 235.07199436426163, - "p95": 237.91999369859695, - "p99": 266.2400007247925 + "p50": 122.65599891543388, + "p90": 125.66400319337845, + "p95": 128.7040002644062, + "p99": 138.84800672531128 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 204.73599433898926, - "p90": 212.44800090789795, - "p95": 213.98399770259857, - "p99": 221.02400660514832 + "p50": 56.76800012588501, + "p90": 59.007998555898666, + "p95": 62.04799935221672, + "p99": 67.52000004053116 }, "combine": { - "p50": 325.28001070022583, - "p90": 336.41600608825684, - "p95": 336.70398592948914, - "p99": 340.4799997806549 + "p50": 66.3679987192154, + "p90": 67.64800101518631, + "p95": 68.2239979505539, + "p99": 77.66400277614594 }, "roundtrip": { - "p50": 510.528028011322, - "p90": 517.087996006012, - "p95": 519.1680192947388, - "p99": 526.4639854431152 + "p50": 106.27199709415436, + "p90": 120.60800194740295, + "p95": 129.56799566745758, + "p99": 144.99199390411377 }, "isolatedSum": { - "p50": 530.0160050392151, - "p90": 548.8640069961548, - "p95": 550.6879836320877, - "p99": 561.5040063858032 + "p50": 123.1359988451004, + "p90": 126.65599957108498, + "p95": 130.27199730277061, + "p99": 145.1840028166771 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 6, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 648.1919884681702, - "p90": 659.0080261230469, - "p95": 662.6240015029907, - "p99": 672.5760102272034 + "p50": 57.53599852323532, + "p90": 59.4559982419014, + "p95": 61.69600039720535, + "p99": 68.44799965620041 }, "combine": { - "p50": 1063.8400316238403, - "p90": 1073.248028755188, - "p95": 1073.6639499664307, - "p99": 1096.60804271698 + "p50": 67.61600077152252, + "p90": 69.50400024652481, + "p95": 77.2159993648529, + "p99": 91.13600105047226 }, "roundtrip": { - "p50": 1698.815941810608, - "p90": 1708.1600427627563, - "p95": 1712.4799489974976, - "p99": 1786.7519855499268 + "p50": 113.76000195741653, + "p90": 118.8800036907196, + "p95": 121.69600278139114, + "p99": 124.9919980764389 }, "isolatedSum": { - "p50": 1712.0320200920105, - "p90": 1732.2560548782349, - "p95": 1736.2879514694214, - "p99": 1769.1840529441833 + "p50": 125.15199929475784, + "p90": 128.9599984884262, + "p95": 138.91199976205826, + "p99": 159.58400070667267 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 6, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-f7ec6aaf", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", - "colorKey": "b300_2e44c039", - "comparisonKey": "b198376a27b75c7f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:40.218743+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "3dd868cb33839a3", - "workloadId": "set:3:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271897134", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271897134", - "createdAt": "2026-06-26T23:57:10Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 92.3520028591156, - "p90": 95.90400010347366, - "p95": 98.78399968147278, - "p99": 113.34399878978729 + "p50": 58.04799869656563, + "p90": 60.54399907588959, + "p95": 63.61600011587143, + "p99": 71.16799801588058 }, "combine": { - "p50": 116.19199812412262, - "p90": 120.2239990234375, - "p95": 126.39999389648438, - "p99": 127.68000364303589 + "p50": 67.58400052785873, + "p90": 69.56800073385239, + "p95": 72.89600372314453, + "p99": 80.89599758386612 }, "roundtrip": { - "p50": 194.5279985666275, - "p90": 202.43200659751892, - "p95": 204.22400534152985, - "p99": 214.23999965190887 + "p50": 122.30399996042252, + "p90": 125.11999905109406, + "p95": 126.3359934091568, + "p99": 137.28000223636627 }, "isolatedSum": { - "p50": 208.54400098323822, - "p90": 216.12799912691116, - "p95": 225.18399357795715, - "p99": 241.02400243282318 + "p50": 125.63199922442436, + "p90": 130.11199980974197, + "p95": 136.51200383901596, + "p99": 152.0639955997467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 180.09600043296814, - "p90": 188.6720061302185, - "p95": 190.46400487422943, - "p99": 204.83200252056122 + "p50": 59.776000678539276, + "p90": 67.10399687290192, + "p95": 69.7920024394989, + "p99": 78.11199873685837 }, "combine": { - "p50": 302.94400453567505, - "p90": 311.42398715019226, - "p95": 311.67998909950256, - "p99": 315.16799330711365 + "p50": 68.44799965620041, + "p90": 78.14399898052216, + "p95": 78.46400141716003, + "p99": 79.64800298213959 }, "roundtrip": { - "p50": 473.1520116329193, - "p90": 481.6960096359253, - "p95": 485.0560128688812, - "p99": 493.696004152298 + "p50": 119.55200135707855, + "p90": 124.38400089740753, + "p95": 125.56800246238708, + "p99": 129.18399274349213 }, "isolatedSum": { - "p50": 483.0400049686432, - "p90": 500.09599328041077, - "p95": 502.143993973732, - "p99": 519.9999958276749 + "p50": 128.22400033473969, + "p90": 145.24799585342407, + "p95": 148.25600385665894, + "p99": 157.76000171899796 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 570.0479745864868, - "p90": 580.4160237312317, - "p95": 583.7439894676208, - "p99": 621.0560202598572 + "p50": 68.9919963479042, + "p90": 74.11199808120728, + "p95": 75.99999755620956, + "p99": 90.87999910116196 }, "combine": { - "p50": 1098.7199544906616, - "p90": 1109.1840267181396, - "p95": 1109.663963317871, - "p99": 1124.4159936904907 + "p50": 78.5600021481514, + "p90": 79.19999957084656, + "p95": 79.77599650621414, + "p99": 91.26400202512741 }, "roundtrip": { - "p50": 1622.8159666061401, - "p90": 1629.3760538101196, - "p95": 1632.2239637374878, - "p99": 1643.3279514312744 + "p50": 130.3360015153885, + "p90": 134.20799374580383, + "p95": 136.00000739097595, + "p99": 146.33600413799286 }, "isolatedSum": { - "p50": 1668.7679290771484, - "p90": 1689.6000504493713, - "p95": 1693.407952785492, - "p99": 1745.472013950348 + "p50": 147.5519984960556, + "p90": 153.31199765205383, + "p95": 155.7759940624237, + "p99": 182.14400112628937 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 7, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.03999900817871, + "p90": 88.60799670219421, + "p95": 89.56799656152725, + "p99": 95.16800194978714 + }, + "combine": { + "p50": 91.67999774217606, + "p90": 93.9520001411438, + "p95": 94.55999732017517, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 159.19999778270721, + "p90": 164.000004529953, + "p95": 166.24000668525696, + "p99": 175.20000040531158 + }, + "isolatedSum": { + "p50": 174.71999675035477, + "p90": 182.559996843338, + "p95": 184.12799388170242, + "p99": 197.63199985027313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.3759977221489, + "p90": 106.6880002617836, + "p95": 108.2879975438118, + "p99": 117.76000261306763 + }, + "combine": { + "p50": 127.9039978981018, + "p90": 131.1360001564026, + "p95": 138.20800185203552, + "p99": 144.03200149536133 + }, + "roundtrip": { + "p50": 220.47999501228333, + "p90": 224.41600263118744, + "p95": 225.69599747657776, + "p99": 234.65600609779358 + }, + "isolatedSum": { + "p50": 233.2799956202507, + "p90": 237.8240004181862, + "p95": 246.49599939584732, + "p99": 261.79200410842896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6085,50 +6414,51 @@ ] }, { - "id": "cx-e7727ce9", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_c1ad910f", - "comparisonKey": "9532205a80f3d757", + "id": "cx-6a4bc237", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_39a5906c", + "comparisonKey": "f7e177d587167ca7", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:38:48.516779+00:00", + "generatedAt": "2026-06-27T09:50:24.903917+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_15", + "runner": "b300-nv_06", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -6137,239 +6467,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "e41f5099a9733ac", + "workloadId": "set:8:286be993cd819ed9", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254469772", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", - "createdAt": "2026-06-26T17:29:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285664068", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285664068", + "createdAt": "2026-06-27T09:50:24.903917+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 94.11200135946274, - "p90": 98.9760011434555, - "p95": 100.54399818181992, - "p99": 116.44800007343292 + "p50": 55.64799904823303, + "p90": 57.37600103020668, + "p95": 59.647999703884125, + "p99": 68.12799721956253 }, "combine": { - "p50": 115.1999980211258, - "p90": 115.9679964184761, - "p95": 116.89600348472595, - "p99": 129.02399897575378 + "p50": 65.60000032186508, + "p90": 66.20799750089645, + "p95": 66.68800115585327, + "p99": 77.27999985218048 }, "roundtrip": { - "p50": 193.2159960269928, - "p90": 198.43199849128723, - "p95": 199.8080015182495, - "p99": 217.50399470329285 + "p50": 104.12800312042236, + "p90": 109.92000252008438, + "p95": 111.35999858379364, + "p99": 116.35199934244156 }, "isolatedSum": { - "p50": 209.31199938058853, - "p90": 214.9439975619316, - "p95": 217.44000166654587, - "p99": 245.4719990491867 + "p50": 121.24799937009811, + "p90": 123.58399853110313, + "p95": 126.3360008597374, + "p99": 145.407997071743 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 135.42400300502777, - "p90": 138.75199854373932, - "p95": 141.184002161026, - "p99": 151.0079950094223 + "p50": 55.80800026655197, + "p90": 58.111999183893204, + "p95": 60.19200012087822, + "p99": 80.60800284147263 }, "combine": { - "p50": 154.59200739860535, - "p90": 163.90399634838104, - "p95": 164.5440012216568, - "p99": 176.54399573802948 + "p50": 65.92000275850296, + "p90": 67.35999882221222, + "p95": 68.1919977068901, + "p99": 78.27199995517731 }, "roundtrip": { - "p50": 271.67999744415283, - "p90": 277.6319980621338, - "p95": 280.70399165153503, - "p99": 291.3599908351898 + "p50": 104.80000078678131, + "p90": 107.16799646615982, + "p95": 109.56799983978271, + "p99": 119.6800023317337 }, "isolatedSum": { - "p50": 290.0160104036331, - "p90": 302.65599489212036, - "p95": 305.7280033826828, - "p99": 327.5519907474518 + "p50": 121.72800302505493, + "p90": 125.47199800610542, + "p95": 128.38399782776833, + "p99": 158.88000279664993 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 193.24800372123718, - "p90": 199.61600005626678, - "p95": 200.80000162124634, - "p99": 206.68800175189972 + "p50": 56.57599866390228, + "p90": 59.776000678539276, + "p95": 64.7360011935234, + "p99": 73.18399846553802 }, "combine": { - "p50": 265.8880054950714, - "p90": 274.59201216697693, - "p95": 275.2000093460083, - "p99": 286.78399324417114 + "p50": 66.3679987192154, + "p90": 67.77600198984146, + "p95": 68.4799998998642, + "p99": 82.33600109815598 }, "roundtrip": { - "p50": 442.59199500083923, - "p90": 448.96000623703003, - "p95": 455.00800013542175, - "p99": 461.40798926353455 + "p50": 111.84000223875046, + "p90": 116.67200177907944, + "p95": 120.51200121641159, + "p99": 148.15999567508698 }, "isolatedSum": { - "p50": 459.1360092163086, - "p90": 474.2080122232437, - "p95": 476.00001096725464, - "p99": 493.47199499607086 + "p50": 122.94399738311768, + "p90": 127.55200266838074, + "p95": 133.2160010933876, + "p99": 155.519999563694 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 326.2079954147339, - "p90": 329.75998520851135, - "p95": 331.6799998283386, - "p99": 341.6000008583069 + "p50": 57.72799998521805, + "p90": 59.808000922203064, + "p95": 61.664000153541565, + "p99": 71.3919997215271 }, "combine": { - "p50": 457.66401290893555, - "p90": 459.77601408958435, - "p95": 469.760000705719, - "p99": 473.7600088119507 + "p50": 67.10399687290192, + "p90": 69.11999732255936, + "p95": 76.31999999284744, + "p99": 80.9599980711937 }, "roundtrip": { - "p50": 762.5920176506042, - "p90": 771.7440128326416, - "p95": 774.2080092430115, - "p99": 789.6320223808289 + "p50": 121.79200351238251, + "p90": 124.4800016283989, + "p95": 125.2480000257492, + "p99": 135.77599823474884 }, "isolatedSum": { - "p50": 783.8720083236694, - "p90": 789.5359992980957, - "p95": 801.4400005340576, - "p99": 815.3600096702576 + "p50": 124.83199685811996, + "p90": 128.92799824476242, + "p95": 137.984000146389, + "p99": 152.3519977927208 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 577.1200060844421, - "p90": 582.5920104980469, - "p95": 583.5520029067993, - "p99": 591.2960171699524 + "p50": 65.05600363016129, + "p90": 69.95200365781784, + "p95": 73.56800138950348, + "p99": 77.11999863386154 }, "combine": { - "p50": 817.2799944877625, - "p90": 828.4159898757935, - "p95": 831.8719863891602, - "p99": 913.4079813957214 + "p50": 67.48799979686737, + "p90": 77.63200253248215, + "p95": 77.85599678754807, + "p99": 78.49600166082382 }, "roundtrip": { - "p50": 1376.9279718399048, - "p90": 1386.9119882583618, - "p95": 1392.7680253982544, - "p99": 1453.8240432739258 + "p50": 118.9119964838028, + "p90": 122.04799801111221, + "p95": 123.99999797344208, + "p99": 128.86400520801544 }, "isolatedSum": { - "p50": 1394.4000005722046, - "p90": 1411.0080003738403, - "p95": 1415.4239892959595, - "p99": 1504.7039985656738 + "p50": 132.54400342702866, + "p90": 147.5840061903, + "p95": 151.42399817705154, + "p99": 155.61600029468536 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.00000369548798, + "p90": 71.03999704122543, + "p95": 73.72800260782242, + "p99": 91.39200299978256 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 78.68800312280655, + "p95": 79.3600007891655, + "p99": 89.63199704885483 + }, + "roundtrip": { + "p50": 128.83199751377106, + "p90": 133.66399705410004, + "p95": 135.0719928741455, + "p99": 143.26399564743042 + }, + "isolatedSum": { + "p50": 145.9520012140274, + "p90": 149.72800016403198, + "p95": 153.08800339698792, + "p99": 181.0240000486374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1069.5040225982666, - "p90": 1078.0160427093506, - "p95": 1080.2559852600098, - "p99": 1090.880036354065 + "p50": 81.216000020504, + "p90": 87.93599903583527, + "p95": 89.15200084447861, + "p99": 95.0080007314682 }, "combine": { - "p50": 1528.8959741592407, - "p90": 1540.4479503631592, - "p95": 1542.688012123108, - "p99": 1554.751992225647 + "p50": 91.48799628019333, + "p90": 93.88799965381622, + "p95": 100.96000134944916, + "p99": 101.95200145244598 }, "roundtrip": { - "p50": 2581.9520950317383, - "p90": 2594.6240425109863, - "p95": 2602.303981781006, - "p99": 2637.9199028015137 + "p50": 156.25600516796112, + "p90": 160.25599837303162, + "p95": 161.98399662971497, + "p99": 176.92799866199493 }, "isolatedSum": { - "p50": 2598.3999967575073, - "p90": 2618.4639930725098, - "p95": 2622.9439973831177, - "p99": 2645.632028579712 + "p50": 172.70399630069733, + "p90": 181.8239986896515, + "p95": 190.11200219392776, + "p99": 196.96000218391418 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 6, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.79999881982803, + "p90": 94.91200000047684, + "p95": 96.6079980134964, + "p99": 106.23999685049057 + }, + "combine": { + "p50": 114.78400230407715, + "p90": 116.41599982976913, + "p95": 117.66400188207626, + "p99": 128.60800325870514 + }, + "roundtrip": { + "p50": 190.8479928970337, + "p90": 196.73599302768707, + "p95": 197.82400131225586, + "p99": 204.51200008392334 + }, + "isolatedSum": { + "p50": 207.58400112390518, + "p90": 211.32799983024597, + "p95": 214.27199989557266, + "p99": 234.8480001091957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6377,50 +6781,51 @@ ] }, { - "id": "cx-5fd5a06c", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", - "colorKey": "b300_0622d929", - "comparisonKey": "8c83b99af9d27709", + "id": "cx-f11d8dc8", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_e3d449ce", + "comparisonKey": "6570d3a11ae9f14f", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:11:00.153293+00:00", + "generatedAt": "2026-06-27T09:47:37.037332+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_10", + "runner": "b300-nv_01", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "label": "B300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -6429,238 +6834,312 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", + "traceSignature": "73351bbcd4d02de", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254508907", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", - "createdAt": "2026-06-26T17:30:32Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285602756", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285602756", + "createdAt": "2026-06-27T09:47:37.037332+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 105.79200088977814, - "p90": 108.83200168609619, - "p95": 111.00800335407257, - "p99": 118.9119964838028 + "p50": 58.400001376867294, + "p90": 59.967998415231705, + "p95": 62.431998550891876, + "p99": 78.65600287914276 }, "combine": { - "p50": 130.0159990787506, - "p90": 139.20000195503235, - "p95": 139.74399864673615, - "p99": 150.84800124168396 + "p50": 66.04799628257751, + "p90": 66.59200042486191, + "p95": 67.71200150251389, + "p99": 77.56800204515457 }, "roundtrip": { - "p50": 228.38400304317474, - "p90": 234.65600609779358, - "p95": 235.61599850654602, - "p99": 252.28801369667053 + "p50": 107.19999670982361, + "p90": 113.18399757146835, + "p95": 114.62400108575821, + "p99": 137.5039964914322 }, "isolatedSum": { - "p50": 235.80799996852875, - "p90": 248.03200364112854, - "p95": 250.75200200080872, - "p99": 269.75999772548676 + "p50": 124.44799765944481, + "p90": 126.55999884009361, + "p95": 130.14400005340576, + "p99": 156.22400492429733 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 159.36000645160675, - "p90": 162.56000101566315, - "p95": 163.90399634838104, - "p99": 170.59199512004852 + "p50": 58.75200033187866, + "p90": 60.03199890255928, + "p95": 60.67200005054474, + "p99": 65.60000032186508 }, "combine": { - "p50": 201.34399831295013, - "p90": 203.96800339221954, - "p95": 211.45600080490112, - "p99": 224.86400604248047 + "p50": 66.20799750089645, + "p90": 67.71200150251389, + "p95": 68.60800087451935, + "p99": 88.86399865150452 }, "roundtrip": { - "p50": 334.879994392395, - "p90": 340.03201127052307, - "p95": 342.0479893684387, - "p99": 360.28799414634705 + "p50": 108.0000028014183, + "p90": 112.2559979557991, + "p95": 115.167997777462, + "p99": 124.03199821710587 }, "isolatedSum": { - "p50": 360.7040047645569, - "p90": 366.5280044078827, - "p95": 375.35999715328217, - "p99": 395.456001162529 + "p50": 124.95999783277512, + "p90": 127.74400040507317, + "p95": 129.2800009250641, + "p99": 154.4639989733696 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 232.67200589179993, - "p90": 240.76800048351288, - "p95": 244.60799992084503, - "p99": 252.22399830818176 + "p50": 59.167999774217606, + "p90": 61.85600161552429, + "p95": 63.840001821517944, + "p99": 70.3359991312027 }, "combine": { - "p50": 338.01600337028503, - "p90": 347.8719890117645, - "p95": 348.7040102481842, - "p99": 361.407995223999 + "p50": 68.12799721956253, + "p90": 76.48000121116638, + "p95": 76.92799717187881, + "p99": 77.91999727487564 }, "roundtrip": { - "p50": 553.9519786834717, - "p90": 560.2239966392517, - "p95": 564.3839836120605, - "p99": 589.8879766464233 + "p50": 120.44800072908401, + "p90": 124.09599870443344, + "p95": 125.59999525547028, + "p99": 134.33599472045898 }, "isolatedSum": { - "p50": 570.688009262085, - "p90": 588.6399894952774, - "p95": 593.3120101690292, - "p99": 613.6319935321808 + "p50": 127.29599699378014, + "p90": 138.33600282669067, + "p95": 140.76799899339676, + "p99": 148.25599640607834 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 409.5360040664673, - "p90": 415.0719940662384, - "p95": 416.76801443099976, - "p99": 433.50398540496826 + "p50": 60.864001512527466, + "p90": 68.31999868154526, + "p95": 70.97599655389786, + "p99": 79.99999821186066 }, "combine": { - "p50": 594.3359732627869, - "p90": 599.7120141983032, - "p95": 606.2399744987488, - "p99": 619.2640066146851 + "p50": 68.31999868154526, + "p90": 77.11999863386154, + "p95": 77.79199630022049, + "p99": 79.42400127649307 }, "roundtrip": { - "p50": 986.1119985580444, - "p90": 993.5680031776428, - "p95": 998.8160133361816, - "p99": 1015.8400535583496 + "p50": 121.60000205039978, + "p90": 125.91999769210815, + "p95": 127.03999876976013, + "p99": 133.08799266815186 }, "isolatedSum": { - "p50": 1003.8719773292542, - "p90": 1014.7840082645416, - "p95": 1023.0079889297485, - "p99": 1052.7679920196533 + "p50": 129.18400019407272, + "p90": 145.4399973154068, + "p95": 148.76799285411835, + "p99": 159.42399948835373 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 67.45599955320358, + "p90": 73.37599992752075, + "p95": 74.5600014925003, + "p99": 81.40800148248672 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 78.20799946784973, + "p95": 78.46400141716003, + "p99": 81.15199953317642 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 124.25599992275238, + "p95": 126.01600587368011, + "p99": 138.97599279880524 + }, + "isolatedSum": { + "p50": 136.4479959011078, + "p90": 151.58399939537048, + "p95": 153.02400290966034, + "p99": 162.56000101566315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 756.384015083313, - "p90": 767.3280239105225, - "p95": 769.6639895439148, - "p99": 787.7439856529236 + "p50": 70.43199986219406, + "p90": 71.80800288915634, + "p95": 73.47200065851212, + "p99": 81.44000172615051 }, "combine": { - "p50": 1112.671971321106, - "p90": 1122.8480339050293, - "p95": 1133.6640119552612, - "p99": 1208.4800004959106 + "p50": 78.46400141716003, + "p90": 79.39200103282928, + "p95": 80.99199831485748, + "p99": 92.12800115346909 }, "roundtrip": { - "p50": 1856.0960292816162, - "p90": 1870.6879615783691, - "p95": 1877.087950706482, - "p99": 1941.5040016174316 + "p50": 132.7359974384308, + "p90": 137.56799697875977, + "p95": 138.62399756908417, + "p99": 143.71199905872345 }, "isolatedSum": { - "p50": 1869.055986404419, - "p90": 1890.1760578155518, - "p95": 1903.328001499176, - "p99": 1996.2239861488342 + "p50": 148.8960012793541, + "p90": 151.20000392198563, + "p95": 154.4639989733696, + "p99": 173.5680028796196 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 6, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1458.8799476623535, - "p90": 1475.0720262527466, - "p95": 1481.4079999923706, - "p99": 1536.8640422821045 + "p50": 83.16799998283386, + "p90": 88.95999938249588, + "p95": 89.82399851083755, + "p99": 93.85599941015244 }, "combine": { - "p50": 2142.047882080078, - "p90": 2154.560089111328, - "p95": 2158.9438915252686, - "p99": 2215.9039974212646 + "p50": 91.58399701118469, + "p90": 93.82399916648865, + "p95": 101.1200025677681, + "p99": 114.68800157308578 }, "roundtrip": { - "p50": 3584.160089492798, - "p90": 3605.760097503662, - "p95": 3613.152027130127, - "p99": 3669.503927230835 + "p50": 159.42400693893433, + "p90": 163.10399770736694, + "p95": 164.35199975967407, + "p99": 169.37600076198578 }, "isolatedSum": { - "p50": 3600.9278297424316, - "p90": 3629.6321153640747, - "p95": 3640.351891517639, - "p99": 3752.768039703369 + "p50": 174.75199699401855, + "p90": 182.78399854898453, + "p95": 190.94400107860565, + "p99": 208.54400098323822 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.2640026807785, + "p90": 97.59999811649323, + "p95": 99.90400075912476, + "p99": 116.28799885511398 + }, + "combine": { + "p50": 115.23199826478958, + "p90": 115.84000289440155, + "p95": 116.38399958610535, + "p99": 126.20800733566284 + }, + "roundtrip": { + "p50": 193.9840018749237, + "p90": 199.64799284934998, + "p95": 200.6399929523468, + "p99": 210.52800118923187 + }, + "isolatedSum": { + "p50": 210.49600094556808, + "p90": 213.44000101089478, + "p95": 216.2880003452301, + "p99": 242.49600619077682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -6669,28 +7148,29 @@ ] }, { - "id": "cx-6620cae5", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", - "colorKey": "b300_01ab5b1a", - "comparisonKey": "5702bf02b3927f32", + "id": "cx-7d11224e", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_8d2811e3", + "comparisonKey": "801e704d68c28ca9", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:38:15.541333+00:00", + "generatedAt": "2026-06-27T09:48:25.920368+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_06", + "runner": "b300-nv_09", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf", + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -6705,14 +7185,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -6721,8 +7201,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -6730,281 +7210,356 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271231753", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", - "createdAt": "2026-06-26T23:36:29Z", - "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + "id": "28285620595", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285620595", + "createdAt": "2026-06-27T09:48:25.920368+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 101.3759970664978, - "p90": 104.76800054311752, - "p95": 106.01600259542465, - "p99": 111.90400272607803 + "p50": 56.384000927209854, + "p90": 58.81600081920624, + "p95": 61.37600168585777, + "p99": 80.60800284147263 }, "combine": { - "p50": 126.11199915409088, - "p90": 127.3919939994812, - "p95": 127.83999741077423, - "p99": 129.18399274349213 + "p50": 65.47199934720993, + "p90": 66.3679987192154, + "p95": 66.72000139951706, + "p99": 68.09599697589874 }, "roundtrip": { - "p50": 207.8080028295517, - "p90": 212.6079946756363, - "p95": 213.69600296020508, - "p99": 224.2559939622879 + "p50": 107.42399841547012, + "p90": 111.84000223875046, + "p95": 112.96000331640244, + "p99": 126.14400684833527 }, "isolatedSum": { - "p50": 227.48799622058868, - "p90": 232.15999454259872, - "p95": 233.85600000619888, - "p99": 241.08799546957016 + "p50": 121.85600027441978, + "p90": 125.18399953842163, + "p95": 128.09600308537483, + "p99": 148.70399981737137 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 136.31999492645264, - "p90": 139.80799913406372, - "p95": 140.86399972438812, - "p99": 150.43200552463531 + "p50": 56.2559999525547, + "p90": 58.33600088953972, + "p95": 60.447998344898224, + "p99": 72.83200323581696 }, "combine": { - "p50": 176.35199427604675, - "p90": 178.78399789333344, - "p95": 180.03199994564056, - "p99": 188.60800564289093 + "p50": 66.01600348949432, + "p90": 66.68800115585327, + "p95": 67.48799979686737, + "p99": 91.90399944782257 }, "roundtrip": { - "p50": 297.5679934024811, - "p90": 303.45600843429565, - "p95": 306.46398663520813, - "p99": 319.2960023880005 + "p50": 105.02400249242783, + "p90": 112.41599917411804, + "p95": 113.0559965968132, + "p99": 119.64800208806992 }, "isolatedSum": { - "p50": 312.6719892024994, - "p90": 318.59199702739716, - "p95": 320.8959996700287, - "p99": 339.04001116752625 + "p50": 122.27200344204903, + "p90": 125.02400204539299, + "p95": 127.9359981417656, + "p99": 164.73600268363953 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 203.90400290489197, - "p90": 211.58400177955627, - "p95": 212.51200139522552, - "p99": 223.32799434661865 + "p50": 56.73599988222122, + "p90": 59.29600074887276, + "p95": 60.99199876189232, + "p99": 73.11999797821045 }, "combine": { - "p50": 325.1839876174927, - "p90": 335.55200695991516, - "p95": 335.80800890922546, - "p99": 337.8559947013855 + "p50": 66.20799750089645, + "p90": 67.55200028419495, + "p95": 68.80000233650208, + "p99": 79.74400371313095 }, "roundtrip": { - "p50": 506.20800256729126, - "p90": 514.4960284233093, - "p95": 519.7759866714478, - "p99": 534.0160131454468 + "p50": 105.85600137710571, + "p90": 108.73600095510483, + "p95": 110.43199896812439, + "p99": 124.92799758911133 }, "isolatedSum": { - "p50": 529.0879905223846, - "p90": 547.1360087394714, - "p95": 548.320010304451, - "p99": 561.1839890480042 + "p50": 122.94399738311768, + "p90": 126.8480010330677, + "p95": 129.7920010983944, + "p99": 152.8640016913414 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 348.86398911476135, - "p90": 353.40800881385803, - "p95": 354.7520041465759, - "p99": 364.22398686408997 + "p50": 58.30400064587593, + "p90": 64.57599997520447, + "p95": 65.85600227117538, + "p99": 70.88000327348709 }, "combine": { - "p50": 582.4000239372253, - "p90": 585.9519839286804, - "p95": 593.0879712104797, - "p99": 594.5919752120972 + "p50": 66.81600213050842, + "p90": 68.51200014352798, + "p95": 69.023996591568, + "p99": 78.17599922418594 }, "roundtrip": { - "p50": 909.5680117607117, - "p90": 917.2160029411316, - "p95": 918.5600280761719, - "p99": 924.127995967865 + "p50": 114.56000059843063, + "p90": 121.15199863910675, + "p95": 122.5920021533966, + "p99": 138.72000575065613 }, "isolatedSum": { - "p50": 931.2640130519867, - "p90": 939.3599927425385, - "p95": 947.8399753570557, - "p99": 958.8159620761871 + "p50": 125.12000277638435, + "p90": 133.08800011873245, + "p95": 134.87999886274338, + "p99": 149.05600249767303 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 641.8560147285461, - "p90": 648.639976978302, - "p95": 655.135989189148, - "p99": 660.256028175354 + "p50": 59.29600074887276, + "p90": 63.45599889755249, + "p95": 66.3679987192154, + "p99": 85.82399785518646 }, "combine": { - "p50": 1062.7520084381104, - "p90": 1072.7039575576782, - "p95": 1073.4080076217651, - "p99": 1076.5119791030884 + "p50": 67.03999638557434, + "p90": 69.023996591568, + "p95": 70.3359991312027, + "p99": 79.93599772453308 }, "roundtrip": { - "p50": 1693.343997001648, - "p90": 1700.6080150604248, - "p95": 1702.847957611084, - "p99": 1706.6559791564941 + "p50": 122.6240023970604, + "p90": 125.66399574279785, + "p95": 126.65599584579468, + "p99": 131.9359987974167 }, "isolatedSum": { - "p50": 1704.6080231666565, - "p90": 1721.3439345359802, - "p95": 1728.543996810913, - "p99": 1736.7680072784424 + "p50": 126.3359971344471, + "p90": 132.47999548912048, + "p95": 136.7039978504181, + "p99": 165.75999557971954 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1252.1920204162598, - "p90": 1262.719988822937, - "p95": 1264.7360563278198, - "p99": 1276.8640518188477 + "p50": 74.14399832487106, + "p90": 76.54400169849396, + "p95": 77.85599678754807, + "p99": 89.4400030374527 }, "combine": { - "p50": 2043.4560775756836, - "p90": 2045.151948928833, - "p95": 2047.1999645233154, - "p99": 2067.392110824585 + "p50": 77.11999863386154, + "p90": 78.52800190448761, + "p95": 78.68800312280655, + "p99": 89.4400030374527 }, "roundtrip": { - "p50": 3284.6720218658447, - "p90": 3295.1040267944336, - "p95": 3299.0400791168213, - "p99": 3313.3440017700195 + "p50": 127.10399925708771, + "p90": 132.1280002593994, + "p95": 133.760005235672, + "p99": 136.3839954137802 }, "isolatedSum": { - "p50": 3295.6480979919434, - "p90": 3307.87193775177, - "p95": 3311.9360208511353, - "p99": 3344.2561626434326 + "p50": 151.2639969587326, + "p90": 155.07200360298157, + "p95": 156.54399991035461, + "p99": 178.8800060749054 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.77599650621414, + "p90": 81.53600245714188, + "p95": 82.8159973025322, + "p99": 89.9839997291565 + }, + "combine": { + "p50": 90.87999910116196, + "p90": 102.88000106811523, + "p95": 104.41599786281586, + "p99": 115.58400094509125 + }, + "roundtrip": { + "p50": 157.95199573040009, + "p90": 162.59199380874634, + "p95": 164.19200599193573, + "p99": 182.68799781799316 + }, + "isolatedSum": { + "p50": 170.6559956073761, + "p90": 184.4160035252571, + "p95": 187.23199516534805, + "p99": 205.56800067424774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.24800354242325, + "p90": 104.22399640083313, + "p95": 105.3759977221489, + "p99": 124.67200309038162 + }, + "combine": { + "p50": 126.17599964141846, + "p90": 127.71199643611908, + "p95": 128.31999361515045, + "p99": 139.93600010871887 + }, + "roundtrip": { + "p50": 208.92800390720367, + "p90": 213.76000344753265, + "p95": 214.78399634361267, + "p99": 229.0239930152893 + }, + "isolatedSum": { + "p50": 227.4240031838417, + "p90": 231.9359928369522, + "p95": 233.69599133729935, + "p99": 264.6080031991005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-9b7dbfc5", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", - "colorKey": "b300_085c12d4", - "comparisonKey": "afb8d29f702ca3c1", + "id": "cx-cc647506", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "b300_8d2811e3", + "comparisonKey": "478acd4108c50326", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:21:45.459593+00:00", + "generatedAt": "2026-06-26T23:58:32.426052+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_16", + "runner": "b300-nv_05", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, + "experts": 256, "routing": "zipf", - "routingLabel": "zipf+eplb", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -7013,238 +7568,164 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255311146", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", - "createdAt": "2026-06-26T17:45:43Z", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28271886823", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823", + "createdAt": "2026-06-26T23:58:32.426052+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 93.28000247478485, - "p90": 96.16000205278397, - "p95": 98.78399968147278, - "p99": 129.2479932308197 - }, - "combine": { - "p50": 114.94400352239609, - "p90": 115.55200070142746, - "p95": 115.93600362539291, - "p99": 126.3359934091568 - }, - "roundtrip": { - "p50": 195.6160068511963, - "p90": 199.42399859428406, - "p95": 200.83199441432953, - "p99": 215.16799926757812 - }, - "isolatedSum": { - "p50": 208.22400599718094, - "p90": 211.71200275421143, - "p95": 214.7200033068657, - "p99": 255.5839866399765 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 136.25599443912506, - "p90": 139.00800049304962, - "p95": 141.50400459766388, - "p99": 155.03999590873718 + "p50": 56.703999638557434, + "p90": 59.90400165319443, + "p95": 62.65600025653839, + "p99": 69.98399645090103 }, "combine": { - "p50": 153.72799336910248, - "p90": 163.2319986820221, - "p95": 163.80800306797028, - "p99": 167.67999529838562 + "p50": 65.88800251483917, + "p90": 66.43199920654297, + "p95": 66.72000139951706, + "p99": 73.7600028514862 }, "roundtrip": { - "p50": 269.9199914932251, - "p90": 275.64799785614014, - "p95": 276.92800760269165, - "p99": 291.77600145339966 + "p50": 107.16799646615982, + "p90": 112.83200234174728, + "p95": 114.14399743080139, + "p99": 120.44800072908401 }, "isolatedSum": { - "p50": 289.98398780822754, - "p90": 302.2399991750717, - "p95": 305.31200766563416, - "p99": 322.7199912071228 + "p50": 122.5920021533966, + "p90": 126.3360008597374, + "p95": 129.37600165605545, + "p99": 143.74399930238724 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 192.671999335289, - "p90": 200.095996260643, - "p95": 201.1840045452118, - "p99": 211.99999749660492 + "p50": 58.848001062870026, + "p90": 60.80000102519989, + "p95": 62.84800171852112, + "p99": 74.40000027418137 }, "combine": { - "p50": 264.70398902893066, - "p90": 274.2399871349335, - "p95": 274.9119997024536, - "p99": 286.3999903202057 + "p50": 68.00000369548798, + "p90": 70.30399888753891, + "p95": 76.99199765920639, + "p99": 78.5600021481514 }, "roundtrip": { - "p50": 439.7439956665039, - "p90": 445.279985666275, - "p95": 447.519987821579, - "p99": 459.9039852619171 + "p50": 116.54400080442429, + "p90": 123.29600006341934, + "p95": 124.83199685811996, + "p99": 130.46400249004364 }, "isolatedSum": { - "p50": 457.37598836421967, - "p90": 474.3359833955765, - "p95": 476.0960042476654, - "p99": 498.3999878168106 + "p50": 126.848004758358, + "p90": 131.1039999127388, + "p95": 139.8399993777275, + "p99": 152.96000242233276 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 319.90399956703186, - "p90": 325.8560001850128, - "p95": 327.1999955177307, - "p99": 333.44000577926636 + "p50": 75.9039968252182, + "p90": 78.27199995517731, + "p95": 79.52000200748444, + "p99": 87.5839963555336 }, "combine": { - "p50": 450.78399777412415, - "p90": 458.8800072669983, - "p95": 459.77601408958435, - "p99": 482.87999629974365 + "p50": 78.40000092983246, + "p90": 79.19999957084656, + "p95": 79.71200346946716, + "p99": 83.64800363779068 }, "roundtrip": { - "p50": 756.1600208282471, - "p90": 761.5039944648743, - "p95": 763.5840177536011, - "p99": 783.5519909858704 + "p50": 134.24000144004822, + "p90": 138.20800185203552, + "p95": 139.5840048789978, + "p99": 144.3520039319992 }, "isolatedSum": { - "p50": 770.687997341156, - "p90": 784.7360074520111, - "p95": 786.9760096073151, - "p99": 816.32000207901 + "p50": 154.30399775505066, + "p90": 157.47199952602386, + "p95": 159.2320054769516, + "p99": 171.23199999332428 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 573.0559825897217, - "p90": 581.6959738731384, - "p95": 583.7119817733765, - "p99": 671.4879870414734 - }, - "combine": { - "p50": 827.4880051612854, - "p90": 838.6240005493164, - "p95": 839.9040102958679, - "p99": 863.4560108184814 - }, - "roundtrip": { - "p50": 1382.9760551452637, - "p90": 1392.9920196533203, - "p95": 1396.8960046768188, - "p99": 1428.1599521636963 - }, - "isolatedSum": { - "p50": 1400.543987751007, - "p90": 1420.3199744224548, - "p95": 1423.6159920692444, - "p99": 1534.9439978599548 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1061.8879795074463, - "p90": 1068.7040090560913, - "p95": 1075.9040117263794, - "p99": 1094.048023223877 + "p50": 103.07200253009796, + "p90": 105.98400235176086, + "p95": 107.04000294208527, + "p99": 113.21599781513214 }, "combine": { - "p50": 1530.2079916000366, - "p90": 1540.7040119171143, - "p95": 1551.2640476226807, - "p99": 1662.6559495925903 + "p50": 127.13600695133209, + "p90": 128.1599998474121, + "p95": 128.57599556446075, + "p99": 131.04000687599182 }, "roundtrip": { - "p50": 2579.9999237060547, - "p90": 2593.7600135803223, - "p95": 2600.543975830078, - "p99": 2645.440101623535 + "p50": 209.1200053691864, + "p90": 214.30400013923645, + "p95": 216.12800657749176, + "p99": 229.66399788856506 }, "isolatedSum": { - "p50": 2592.095971107483, - "p90": 2609.4080209732056, - "p95": 2627.16805934906, - "p99": 2756.7039728164673 + "p50": 230.20800948143005, + "p90": 234.14400219917297, + "p95": 235.61599850654602, + "p99": 244.25600469112396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -7253,34 +7734,35 @@ ] }, { - "id": "cx-07a9b9e5", - "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_63f1354f", - "comparisonKey": "e1e888fe005f12d0", + "id": "cx-17ec2f4d", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_2e44c039", + "comparisonKey": "c7065362244c808a", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:43:21.918392+00:00", + "generatedAt": "2026-06-27T09:49:49.525819+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_01", + "runner": "b300-nv_09", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -7289,14 +7771,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -7305,8 +7787,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "22da8b58646609c", + "workloadId": "set:8:6b84350720aa8233", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -7314,229 +7796,303 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254489726", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", - "createdAt": "2026-06-26T17:30:12Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285651441", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285651441", + "createdAt": "2026-06-27T09:49:49.525819+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 86.43200248479843, - "p90": 88.95999938249588, - "p95": 91.58399701118469, - "p99": 99.55199807882309 + "p50": 58.62399935722351, + "p90": 66.04799628257751, + "p95": 68.03199648857117, + "p99": 76.19199901819229 }, "combine": { - "p50": 115.35999923944473, - "p90": 116.03199690580368, - "p95": 116.38399958610535, - "p99": 121.56800180673599 + "p50": 56.352000683546066, + "p90": 64.7680014371872, + "p95": 64.99200314283371, + "p99": 68.06399673223495 }, "roundtrip": { - "p50": 186.8479996919632, - "p90": 192.47999787330627, - "p95": 193.31200420856476, - "p99": 215.45599400997162 + "p50": 95.45599669218063, + "p90": 100.96000134944916, + "p95": 103.04000228643417, + "p99": 108.96000266075134 }, "isolatedSum": { - "p50": 201.79200172424316, - "p90": 204.99199628829956, - "p95": 207.96799659729004, - "p99": 221.11999988555908 + "p50": 114.97600004076958, + "p90": 130.8159977197647, + "p95": 133.02399963140488, + "p99": 144.25599575042725 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 128.60800325870514, - "p90": 131.48799538612366, - "p95": 132.79999792575836, - "p99": 147.20000326633453 + "p50": 57.24800005555153, + "p90": 59.7120001912117, + "p95": 62.49599903821945, + "p99": 69.72800195217133 }, "combine": { - "p50": 156.19200468063354, - "p90": 164.48000073432922, - "p95": 164.76799547672272, - "p99": 167.71200299263 + "p50": 56.223999708890915, + "p90": 64.7680014371872, + "p95": 65.0240033864975, + "p99": 66.17599725723267 }, "roundtrip": { - "p50": 264.8000121116638, - "p90": 271.232008934021, - "p95": 274.6239900588989, - "p99": 307.20001459121704 + "p50": 98.04800152778625, + "p90": 104.25599664449692, + "p95": 105.6319996714592, + "p99": 116.03199690580368 }, "isolatedSum": { - "p50": 284.8000079393387, - "p90": 295.9679961204529, - "p95": 297.5679934024811, - "p99": 314.91200625896454 + "p50": 113.47199976444244, + "p90": 124.4800016283989, + "p95": 127.52000242471695, + "p99": 135.903999209404 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 186.46399676799774, - "p90": 192.86400079727173, - "p95": 195.360004901886, - "p99": 208.3200067281723 + "p50": 57.37600103020668, + "p90": 59.51999872922897, + "p95": 61.91999837756157, + "p99": 67.87200272083282 }, "combine": { - "p50": 266.6879892349243, - "p90": 274.78399872779846, - "p95": 275.2639949321747, - "p99": 287.1359884738922 + "p50": 56.51199817657471, + "p90": 64.89600241184235, + "p95": 65.5680000782013, + "p99": 68.86400282382965 }, "roundtrip": { - "p50": 437.4080002307892, - "p90": 442.30398535728455, - "p95": 445.6320106983185, - "p99": 468.51199865341187 + "p50": 102.33599692583084, + "p90": 107.39199817180634, + "p95": 110.1439967751503, + "p99": 116.80000275373459 }, "isolatedSum": { - "p50": 453.15198600292206, - "p90": 467.6479995250702, - "p95": 470.62399983406067, - "p99": 495.4559952020645 + "p50": 113.88799920678139, + "p90": 124.41600114107132, + "p95": 127.48799845576286, + "p99": 136.73600554466248 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 316.0319924354553, - "p90": 319.16800141334534, - "p95": 320.99199295043945, - "p99": 330.01598715782166 + "p50": 58.04799869656563, + "p90": 60.575999319553375, + "p95": 64.09599632024765, + "p99": 75.16799867153168 }, "combine": { - "p50": 458.8479995727539, - "p90": 461.66399121284485, - "p95": 470.20798921585083, - "p99": 483.39200019836426 + "p50": 56.8000003695488, + "p90": 66.04799628257751, + "p95": 66.3359984755516, + "p99": 67.64800101518631 }, "roundtrip": { - "p50": 752.0639896392822, - "p90": 761.3440155982971, - "p95": 763.6799812316895, - "p99": 787.6480221748352 + "p50": 108.73600095510483, + "p90": 112.99200356006622, + "p95": 113.92000317573547, + "p99": 129.82399761676788 }, "isolatedSum": { - "p50": 774.8799920082092, - "p90": 780.8319926261902, - "p95": 791.1999821662903, - "p99": 813.4079873561859 + "p50": 114.84799906611443, + "p90": 126.62399560213089, + "p95": 130.43199479579926, + "p99": 142.815999686718 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 557.2800040245056, - "p90": 565.0240182876587, - "p95": 566.3679838180542, - "p99": 600.0319719314575 + "p50": 58.720000088214874, + "p90": 60.83200126886368, + "p95": 63.040003180503845, + "p99": 79.52000200748444 }, "combine": { - "p50": 817.4399733543396, - "p90": 827.8399705886841, - "p95": 832.0639729499817, - "p99": 854.3999791145325 + "p50": 65.98400324583054, + "p90": 66.81600213050842, + "p95": 66.97600334882736, + "p99": 68.83200258016586 }, "roundtrip": { - "p50": 1359.328031539917, - "p90": 1370.911955833435, - "p95": 1380.5760145187378, - "p99": 1444.640040397644 + "p50": 107.39199817180634, + "p90": 109.47199910879135, + "p95": 111.1999973654747, + "p99": 118.17599833011627 }, "isolatedSum": { - "p50": 1374.7199773788452, - "p90": 1392.8639888763428, - "p95": 1398.431956768036, - "p99": 1454.43195104599 + "p50": 124.70400333404541, + "p90": 127.6480033993721, + "p95": 130.0160065293312, + "p99": 148.3520045876503 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1037.4079942703247, - "p90": 1044.800043106079, - "p95": 1047.4879741668701, - "p99": 1074.3039846420288 + "p50": 70.36799937486649, + "p90": 76.12799853086472, + "p95": 77.47200131416321, + "p99": 94.81599926948547 }, "combine": { - "p50": 1529.6319723129272, - "p90": 1541.375994682312, - "p95": 1552.0639419555664, - "p99": 1577.1199464797974 + "p50": 68.51200014352798, + "p90": 72.7040022611618, + "p95": 76.86399668455124, + "p99": 80.19199967384338 }, "roundtrip": { - "p50": 2550.9119033813477, - "p90": 2564.2240047454834, - "p95": 2571.199893951416, - "p99": 2613.2800579071045 + "p50": 120.99199742078781, + "p90": 125.50400197505951, + "p95": 126.75200402736664, + "p99": 134.07999277114868 }, "isolatedSum": { - "p50": 2567.039966583252, - "p90": 2586.176037788391, - "p95": 2599.5519161224365, - "p99": 2651.423931121826 + "p50": 138.87999951839447, + "p90": 148.83200079202652, + "p95": 154.33599799871445, + "p99": 175.00799894332886 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 78.49600166082382, + "p90": 80.1599994301796, + "p95": 81.727996468544, + "p99": 87.5839963555336 + }, + "combine": { + "p50": 82.78399705886841, + "p90": 90.81599861383438, + "p95": 91.36000275611877, + "p99": 93.53599697351456 + }, + "roundtrip": { + "p50": 146.2080031633377, + "p90": 154.91199493408203, + "p95": 157.98400342464447, + "p99": 176.06399953365326 + }, + "isolatedSum": { + "p50": 161.27999871969223, + "p90": 170.97599804401398, + "p95": 173.08799922466278, + "p99": 181.11999332904816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.61599725484848, + "p90": 93.59999746084213, + "p95": 94.59199756383896, + "p99": 101.21600329875946 + }, + "combine": { + "p50": 116.35199934244156, + "p90": 125.56800246238708, + "p95": 126.52799487113953, + "p99": 127.83999741077423 + }, + "roundtrip": { + "p50": 195.39199769496918, + "p90": 202.94399559497833, + "p95": 204.6079933643341, + "p99": 235.83999276161194 + }, + "isolatedSum": { + "p50": 207.96799659729004, + "p90": 219.16799992322922, + "p95": 221.11999243497849, + "p99": 229.0560007095337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -7545,50 +8101,51 @@ ] }, { - "id": "cx-c8d1506e", - "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_eee29686", - "comparisonKey": "efab2d3670b24be2", + "id": "cx-3bfb4348", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", + "colorKey": "b300_2e44c039", + "comparisonKey": "5c5e6a7ecdec195f", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:42:54.702578+00:00", + "generatedAt": "2026-06-26T23:58:26.448327+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_15", + "runner": "b300-nv_16", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm)", + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -7597,8 +8154,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "47fddabb3277bec", + "workloadId": "set:4:6b84350720aa8233", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -7606,229 +8163,155 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254479346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", - "createdAt": "2026-06-26T17:30:02Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271893428", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271893428", + "createdAt": "2026-06-26T23:58:26.448327+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 83.45600217580795, - "p90": 86.14400029182434, - "p95": 87.2960016131401, - "p99": 102.08000242710114 - }, - "combine": { - "p50": 108.38399827480316, - "p90": 110.75200140476227, - "p95": 111.61600053310394, - "p99": 114.9120032787323 - }, - "roundtrip": { - "p50": 218.33600103855133, - "p90": 221.6320037841797, - "p95": 222.84799814224243, - "p99": 235.23199558258057 - }, - "isolatedSum": { - "p50": 191.84000045061111, - "p90": 196.8960016965866, - "p95": 198.91200214624405, - "p99": 216.99200570583344 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 116.70400202274323, - "p90": 119.64800208806992, - "p95": 121.15199863910675, - "p99": 135.3600025177002 + "p50": 56.832000613212585, + "p90": 63.71200084686279, + "p95": 64.54399973154068, + "p99": 69.88800317049026 }, "combine": { - "p50": 155.29599785804749, - "p90": 167.4560010433197, - "p95": 176.60799622535706, - "p99": 184.1599941253662 + "p50": 55.67999929189682, + "p90": 58.20799991488457, + "p95": 64.86400216817856, + "p99": 68.89600306749344 }, "roundtrip": { - "p50": 324.47999715805054, - "p90": 328.19199562072754, - "p95": 330.04799485206604, - "p99": 345.40799260139465 + "p50": 94.52799707651138, + "p90": 99.2640033364296, + "p95": 101.56799852848053, + "p99": 107.04000294208527 }, "isolatedSum": { - "p50": 271.9999998807907, - "p90": 287.1040031313896, - "p95": 297.7599948644638, - "p99": 319.5199966430664 + "p50": 112.5119999051094, + "p90": 121.92000076174736, + "p95": 129.40800189971924, + "p99": 138.7840062379837 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 177.7919977903366, - "p90": 182.27200210094452, - "p95": 183.9040070772171, - "p99": 191.103994846344 - }, - "combine": { - "p50": 267.520010471344, - "p90": 270.81599831581116, - "p95": 272.0640003681183, - "p99": 275.4879891872406 - }, - "roundtrip": { - "p50": 550.8480072021484, - "p90": 556.9599866867065, - "p95": 560.2560043334961, - "p99": 578.3360004425049 - }, - "isolatedSum": { - "p50": 445.3120082616806, - "p90": 453.0880004167557, - "p95": 455.9680074453354, - "p99": 466.5919840335846 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 298.0160117149353, - "p90": 302.4959862232208, - "p95": 304.4799864292145, - "p99": 319.07200813293457 + "p50": 56.92800134420395, + "p90": 59.039998799562454, + "p95": 60.5119988322258, + "p99": 66.04799628257751 }, "combine": { - "p50": 452.1920084953308, - "p90": 456.6720128059387, - "p95": 458.624005317688, - "p99": 467.9360091686249 + "p50": 56.63999915122986, + "p90": 66.23999774456024, + "p95": 66.56000018119812, + "p99": 78.91199737787247 }, "roundtrip": { - "p50": 976.5759706497192, - "p90": 983.8719964027405, - "p95": 991.5199875831604, - "p99": 1023.3279466629028 + "p50": 107.80800133943558, + "p90": 113.43999952077866, + "p95": 114.656001329422, + "p99": 124.22399967908859 }, "isolatedSum": { - "p50": 750.2080202102661, - "p90": 759.1679990291595, - "p95": 763.1039917469025, - "p99": 787.0080173015594 + "p50": 113.56800049543381, + "p90": 125.2799965441227, + "p95": 127.07199901342392, + "p99": 144.95999366044998 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 541.4720177650452, - "p90": 546.7519760131836, - "p95": 549.4080185890198, - "p99": 557.7920079231262 + "p50": 67.90400296449661, + "p90": 74.07999783754349, + "p95": 75.93599706888199, + "p99": 82.2720006108284 }, "combine": { - "p50": 814.7199749946594, - "p90": 820.8320140838623, - "p95": 824.0640163421631, - "p99": 847.2959995269775 + "p50": 67.90400296449661, + "p90": 70.0799971818924, + "p95": 77.05599814653397, + "p99": 79.26400005817413 }, "roundtrip": { - "p50": 1818.0160522460938, - "p90": 1827.712059020996, - "p95": 1832.0000171661377, - "p99": 1889.5679712295532 + "p50": 120.4800009727478, + "p90": 124.89599734544754, + "p95": 126.27199292182922, + "p99": 140.99200069904327 }, "isolatedSum": { - "p50": 1356.1919927597046, - "p90": 1367.583990097046, - "p95": 1373.4720349311829, - "p99": 1405.0880074501038 + "p50": 135.80800592899323, + "p90": 144.15999501943588, + "p95": 152.99199521541595, + "p99": 161.53600066900253 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1019.6160078048706, - "p90": 1027.9040336608887, - "p95": 1031.391978263855, - "p99": 1045.2799797058105 + "p50": 91.77599847316742, + "p90": 94.17600184679031, + "p95": 95.74399888515472, + "p99": 114.20799791812897 }, "combine": { - "p50": 1529.4400453567505, - "p90": 1537.2480154037476, - "p95": 1540.8639907836914, - "p99": 1614.6240234375 + "p50": 116.28799885511398, + "p90": 119.19999867677689, + "p95": 126.36800110340118, + "p99": 130.43199479579926 }, "roundtrip": { - "p50": 3477.3120880126953, - "p90": 3490.272045135498, - "p95": 3495.3598976135254, - "p99": 3531.3920974731445 + "p50": 194.0159946680069, + "p90": 201.08799636363983, + "p95": 202.84800231456757, + "p99": 212.92799711227417 }, "isolatedSum": { - "p50": 2549.056053161621, - "p90": 2565.1520490646362, - "p95": 2572.2559690475464, - "p99": 2659.9040031433105 + "p50": 208.0639973282814, + "p90": 213.3760005235672, + "p95": 222.1119999885559, + "p99": 244.63999271392822 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -7837,50 +8320,51 @@ ] }, { - "id": "cx-9971d342", - "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_84b10b26", - "comparisonKey": "1c850249e23e1e8c", + "id": "cx-272778fb", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_6d2e4735", + "comparisonKey": "d0ac3c3db4cc1004", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:09:25.013454+00:00", + "generatedAt": "2026-06-27T09:50:21.321707+00:00", "status": "valid", "publicationStatus": "official", - "runner": "b300-nv_15", + "runner": "b300-nv_12", "sku": "b300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -7889,238 +8373,312 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "5a3054422534366", + "workloadId": "set:8:6b84350720aa8233", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254499301", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", - "createdAt": "2026-06-26T17:30:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285654027", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285654027", + "createdAt": "2026-06-27T09:50:21.321707+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 77.02399790287018, - "p90": 79.48800176382065, - "p95": 80.89599758386612, - "p99": 85.28000116348267 + "p50": 1764.9600505828857, + "p90": 2543.519973754883, + "p95": 2872.8959560394287, + "p99": 3412.3198986053467 }, "combine": { - "p50": 108.5439994931221, - "p90": 111.29599809646606, - "p95": 112.35199868679047, - "p99": 124.41600114107132 + "p50": 1750.1120567321777, + "p90": 1847.872018814087, + "p95": 2633.280038833618, + "p99": 2927.1678924560547 }, "roundtrip": { - "p50": 211.74399554729462, - "p90": 214.4320011138916, - "p95": 216.0000056028366, - "p99": 233.15200209617615 + "p50": 1795.583963394165, + "p90": 1911.6159677505493, + "p95": 2635.5841159820557, + "p99": 2994.0481185913086 }, "isolatedSum": { - "p50": 185.56799739599228, - "p90": 190.7839998602867, - "p95": 193.24799627065659, - "p99": 209.69600230455399 + "p50": 3515.0721073150635, + "p90": 4391.39199256897, + "p95": 5506.175994873047, + "p99": 6339.487791061401 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 1758.4320306777954, + "p90": 2088.1919860839844, + "p95": 2806.4639568328857, + "p99": 3415.616035461426 + }, + "combine": { + "p50": 1750.3039836883545, + "p90": 1858.9119911193848, + "p95": 2584.0001106262207, + "p99": 2952.4800777435303 + }, + "roundtrip": { + "p50": 1806.7200183868408, + "p90": 1925.663948059082, + "p95": 2728.480100631714, + "p99": 3011.45601272583 + }, + "isolatedSum": { + "p50": 3508.73601436615, + "p90": 3947.103977203369, + "p95": 5390.464067459106, + "p99": 6368.096113204956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 110.1439967751503, - "p90": 113.53600025177002, - "p95": 115.90400338172913, - "p99": 132.6719969511032 + "p50": 1758.9119672775269, + "p90": 2487.6160621643066, + "p95": 2937.9520416259766, + "p99": 3416.5759086608887 }, "combine": { - "p50": 153.3759981393814, - "p90": 157.60000050067902, - "p95": 159.32799875736237, - "p99": 173.69599640369415 + "p50": 1761.1839771270752, + "p90": 1895.840048789978, + "p95": 2682.528018951416, + "p99": 3779.8080444335938 }, "roundtrip": { - "p50": 318.30400228500366, - "p90": 322.52800464630127, - "p95": 325.408011674881, - "p99": 346.49598598480225 + "p50": 1816.7680501937866, + "p90": 1913.0879640579224, + "p95": 2590.4319286346436, + "p99": 2941.8559074401855 }, "isolatedSum": { - "p50": 263.5199949145317, - "p90": 271.13600075244904, - "p95": 275.2320021390915, - "p99": 306.36799335479736 + "p50": 3520.095944404602, + "p90": 4383.456110954285, + "p95": 5620.480060577393, + "p99": 7196.383953094482 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 171.26399278640747, - "p90": 176.15999281406403, - "p95": 178.6240041255951, - "p99": 194.815993309021 + "p50": 1764.6080255508423, + "p90": 2651.520013809204, + "p95": 3028.7680625915527, + "p99": 5341.599941253662 }, "combine": { - "p50": 268.2879865169525, - "p90": 273.0560004711151, - "p95": 275.64799785614014, - "p99": 283.58399868011475 + "p50": 1762.5279426574707, + "p90": 1947.9999542236328, + "p95": 2684.351921081543, + "p99": 13385.215759277344 }, "roundtrip": { - "p50": 543.7120199203491, - "p90": 550.6880283355713, - "p95": 554.1120171546936, - "p99": 576.0639905929565 + "p50": 1826.6880512237549, + "p90": 1935.968041419983, + "p95": 2620.6719875335693, + "p99": 2976.8319129943848 }, "isolatedSum": { - "p50": 439.55197930336, - "p90": 449.21599328517914, - "p95": 454.27200198173523, - "p99": 478.39999198913574 + "p50": 3527.135968208313, + "p90": 4599.519968032837, + "p95": 5713.119983673096, + "p99": 18726.815700531006 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 288.92800211906433, - "p90": 294.0160036087036, - "p95": 297.791987657547, - "p99": 315.3280019760132 + "p50": 1762.2079849243164, + "p90": 2232.2559356689453, + "p95": 2812.9279613494873, + "p99": 3426.3999462127686 }, "combine": { - "p50": 452.09598541259766, - "p90": 457.37600326538086, - "p95": 461.7280066013336, - "p99": 471.74400091171265 + "p50": 1772.7359533309937, + "p90": 2522.815942764282, + "p95": 2989.471912384033, + "p99": 6136.096000671387 }, "roundtrip": { - "p50": 967.1040177345276, - "p90": 974.62397813797, - "p95": 977.5360226631165, - "p99": 995.6160187721252 + "p50": 1848.736047744751, + "p90": 2838.3679389953613, + "p95": 3572.223901748657, + "p99": 5888.12780380249 }, "isolatedSum": { - "p50": 741.023987531662, - "p90": 751.3920068740845, - "p95": 759.5199942588806, - "p99": 787.0720028877258 + "p50": 3534.94393825531, + "p90": 4755.0718784332275, + "p95": 5802.3998737335205, + "p99": 9562.495946884155 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 523.3920216560364, - "p90": 529.2800068855286, - "p95": 533.3439707756042, - "p99": 550.1120090484619 + "p50": 1787.775993347168, + "p90": 3058.4959983825684, + "p95": 4017.824172973633, + "p99": 5667.263984680176 }, "combine": { - "p50": 816.32000207901, - "p90": 824.9599933624268, - "p95": 831.1359882354736, - "p99": 855.135977268219 + "p50": 1784.991979598999, + "p90": 2866.368055343628, + "p95": 3568.7999725341797, + "p99": 5742.527961730957 }, "roundtrip": { - "p50": 1800.096035003662, - "p90": 1811.743974685669, - "p95": 1825.7919549942017, - "p99": 1866.8160438537598 + "p50": 1844.5760011672974, + "p90": 2729.248046875, + "p95": 3046.3039875030518, + "p99": 5490.079879760742 }, "isolatedSum": { - "p50": 1339.7120237350464, - "p90": 1354.2400002479553, - "p95": 1364.4799590110779, - "p99": 1405.247986316681 + "p50": 3572.767972946167, + "p90": 5924.864053726196, + "p95": 7586.6241455078125, + "p99": 11409.791946411133 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 984.9280118942261, - "p90": 992.2239780426025, - "p95": 996.5760111808777, - "p99": 1026.9759893417358 + "p50": 1779.520034790039, + "p90": 2049.920082092285, + "p95": 2758.0161094665527, + "p99": 3381.7598819732666 }, "combine": { - "p50": 1529.312014579773, - "p90": 1539.1039848327637, - "p95": 1548.0320453643799, - "p99": 1564.3839836120605 + "p50": 1785.2799892425537, + "p90": 1907.647967338562, + "p95": 2608.544111251831, + "p99": 2964.8640155792236 }, "roundtrip": { - "p50": 3440.864086151123, - "p90": 3457.6640129089355, - "p95": 3468.832015991211, - "p99": 3514.2080783843994 + "p50": 1863.2320165634155, + "p90": 1987.104058265686, + "p95": 2669.055938720703, + "p99": 3054.5599460601807 }, "isolatedSum": { - "p50": 2514.240026473999, - "p90": 2531.327962875366, - "p95": 2544.6080565452576, - "p99": 2591.3599729537964 + "p50": 3564.800024032593, + "p90": 3957.568049430847, + "p95": 5366.560220718384, + "p99": 6346.62389755249 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 1799.9680042266846, + "p90": 2006.6559314727783, + "p95": 2855.6160926818848, + "p99": 3409.8880290985107 + }, + "combine": { + "p50": 1824.8319625854492, + "p90": 3164.6718978881836, + "p95": 5201.375961303711, + "p99": 6098.78396987915 + }, + "roundtrip": { + "p50": 1909.2479944229126, + "p90": 3033.3759784698486, + "p95": 5025.9199142456055, + "p99": 5985.599994659424 + }, + "isolatedSum": { + "p50": 3624.799966812134, + "p90": 5171.327829360962, + "p95": 8056.992053985596, + "p99": 9508.671998977661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -8129,34 +8687,35 @@ ] }, { - "id": "cx-1c34e3d1", - "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h100_ff7906f8", - "comparisonKey": "ad5ebda2342035d4", + "id": "cx-d77e8004", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_7ab35d34", + "comparisonKey": "9b1abb398e739521", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:21.600015+00:00", + "generatedAt": "2026-06-27T09:48:58.943687+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", + "runner": "b300-nv_17", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -8166,9 +8725,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -8181,8 +8740,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", + "traceSignature": "f3df51be7d5c32b", + "workloadId": "set:8:289b7f9c14292e96", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -8190,45 +8749,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271684428", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271684428", - "createdAt": "2026-06-26T23:50:25Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285630258", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285630258", + "createdAt": "2026-06-27T09:48:58.943687+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 98.4639972448349, - "p90": 106.52799904346466, - "p95": 128.12800705432892, - "p99": 158.87999534606934 + "p50": 57.40800127387047, + "p90": 59.647999703884125, + "p95": 62.49599903821945, + "p99": 74.07999783754349 }, "combine": { - "p50": 66.52799993753433, - "p90": 73.34399968385696, - "p95": 81.34400099515915, - "p99": 91.96799993515015 + "p50": 66.56000018119812, + "p90": 67.26399809122086, + "p95": 67.64800101518631, + "p99": 78.5600021481514 }, "roundtrip": { - "p50": 139.42399621009827, - "p90": 146.84799313545227, - "p95": 150.56000649929047, - "p99": 186.81600689888 + "p50": 109.6000000834465, + "p90": 113.69600147008896, + "p95": 114.52800035476685, + "p99": 122.65600264072418 }, "isolatedSum": { - "p50": 164.99199718236923, - "p90": 179.87199872732162, - "p95": 209.47200804948807, - "p99": 250.84799528121948 + "p50": 123.96800145506859, + "p90": 126.91199779510498, + "p95": 130.14400005340576, + "p99": 152.63999998569489 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 6, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8237,35 +8796,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 71.58400118350983, - "p90": 103.45599800348282, - "p95": 106.39999806880951, - "p99": 124.67200309038162 + "p50": 57.5999990105629, + "p90": 59.07199904322624, + "p95": 61.43999844789505, + "p99": 67.52000004053116 }, "combine": { - "p50": 64.06400352716446, - "p90": 72.92799651622772, - "p95": 73.31199944019318, - "p99": 74.43200051784515 + "p50": 66.84800237417221, + "p90": 67.32799857854843, + "p95": 68.2239979505539, + "p99": 82.2720006108284 }, "roundtrip": { - "p50": 117.53600090742111, - "p90": 144.41600441932678, - "p95": 147.71200716495514, - "p99": 173.5360026359558 + "p50": 107.32799768447876, + "p90": 110.11199653148651, + "p95": 111.51999980211258, + "p99": 119.19999867677689 }, "isolatedSum": { - "p50": 135.6480047106743, - "p90": 176.38399451971054, - "p95": 179.71199750900269, - "p99": 199.10400360822678 + "p50": 124.44800138473511, + "p90": 126.39999762177467, + "p95": 129.66399639844894, + "p99": 149.79200065135956 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 1, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8274,35 +8833,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 92.70399808883667, - "p90": 100.47999769449234, - "p95": 102.75200009346008, - "p99": 106.23999685049057 + "p50": 58.75200033187866, + "p90": 60.70400029420853, + "p95": 61.91999837756157, + "p99": 70.91200351715088 }, "combine": { - "p50": 66.01600348949432, - "p90": 72.38399982452393, - "p95": 72.86400347948074, - "p99": 75.6160020828247 + "p50": 68.80000233650208, + "p90": 76.99199765920639, + "p95": 77.27999985218048, + "p99": 78.52800190448761 }, "roundtrip": { - "p50": 134.33599472045898, - "p90": 143.77599954605103, - "p95": 146.08000218868256, - "p99": 149.82399344444275 + "p50": 115.80800265073776, + "p90": 121.8239963054657, + "p95": 123.52000176906586, + "p99": 133.66399705410004 }, "isolatedSum": { - "p50": 158.720001578331, - "p90": 172.86399751901627, - "p95": 175.61600357294083, - "p99": 181.85599893331528 + "p50": 127.55200266838074, + "p90": 137.69599795341492, + "p95": 139.19999822974205, + "p99": 149.4400054216385 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 6, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8311,35 +8870,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 97.75999933481216, - "p90": 105.8880016207695, - "p95": 129.66400384902954, - "p99": 177.44000256061554 + "p50": 60.15999987721443, + "p90": 65.15199691057205, + "p95": 66.97600334882736, + "p99": 73.18399846553802 }, "combine": { - "p50": 71.32799923419952, - "p90": 74.65600222349167, - "p95": 81.53600245714188, - "p99": 92.00000017881393 + "p50": 68.60800087451935, + "p90": 77.15199887752533, + "p95": 77.82399654388428, + "p99": 80.03199845552444 }, "roundtrip": { - "p50": 140.99200069904327, - "p90": 149.6960073709488, - "p95": 159.19999778270721, - "p99": 189.43999707698822 + "p50": 120.4800009727478, + "p90": 125.44000148773193, + "p95": 126.71999633312225, + "p99": 144.3520039319992 }, "isolatedSum": { - "p50": 169.0879985690117, - "p90": 180.54400384426117, - "p95": 211.20000630617142, - "p99": 269.4400027394295 + "p50": 128.76800075173378, + "p90": 142.30399578809738, + "p95": 144.79999989271164, + "p99": 153.21599692106247 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 6, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8348,35 +8907,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 97.56799787282944, - "p90": 100.99200159311295, - "p95": 104.3199971318245, - "p99": 107.42399841547012 + "p50": 63.551999628543854, + "p90": 69.95200365781784, + "p95": 71.10399752855301, + "p99": 88.128000497818 }, "combine": { - "p50": 71.6480016708374, - "p90": 73.7600028514862, - "p95": 75.00799745321274, - "p99": 80.92799782752991 + "p50": 76.89599692821503, + "p90": 78.87999713420868, + "p95": 79.26400005817413, + "p99": 82.46400207281113 }, "roundtrip": { - "p50": 142.68800616264343, - "p90": 150.30400454998016, - "p95": 154.2080044746399, - "p99": 156.09599649906158 + "p50": 121.05599790811539, + "p90": 124.89599734544754, + "p95": 126.91199779510498, + "p99": 135.68000495433807 }, "isolatedSum": { - "p50": 169.21599954366684, - "p90": 174.75200444459915, - "p95": 179.32799458503723, - "p99": 188.35199624300003 + "p50": 140.44799655675888, + "p90": 148.83200079202652, + "p95": 150.36799758672714, + "p99": 170.59200257062912 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 6, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8385,35 +8944,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 79.80799674987793, - "p90": 99.55199807882309, - "p95": 101.27999633550644, - "p99": 106.08000308275223 + "p50": 70.68800181150436, + "p90": 76.51200145483017, + "p95": 77.44000107049942, + "p99": 82.49600231647491 }, "combine": { - "p50": 66.68800115585327, - "p90": 76.03199779987335, - "p95": 80.38400113582611, - "p99": 81.31200075149536 + "p50": 79.19999957084656, + "p90": 79.74400371313095, + "p95": 80.19199967384338, + "p99": 89.82399851083755 }, "roundtrip": { - "p50": 123.87199699878693, - "p90": 150.27199685573578, - "p95": 152.16000378131866, - "p99": 155.4879993200302 + "p50": 133.40799510478973, + "p90": 137.82399892807007, + "p95": 139.0720009803772, + "p99": 155.29599785804749 }, "isolatedSum": { - "p50": 146.4959979057312, - "p90": 175.58399587869644, - "p95": 181.66399747133255, - "p99": 187.3920038342476 + "p50": 149.88800138235092, + "p90": 156.25600516796112, + "p95": 157.6320007443428, + "p99": 172.32000082731247 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 1, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8422,35 +8981,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 100.28800368309021, - "p90": 107.96800255775452, - "p95": 109.47199910879135, - "p99": 119.90399658679962 + "p50": 87.5839963555336, + "p90": 90.87999910116196, + "p95": 91.67999774217606, + "p99": 98.9760011434555 }, "combine": { - "p50": 81.11999928951263, - "p90": 87.71199733018875, - "p95": 89.1840010881424, - "p99": 90.14400094747543 + "p50": 92.47999638319016, + "p90": 101.40799731016159, + "p95": 101.98400169610977, + "p99": 115.64800143241882 }, "roundtrip": { - "p50": 151.8079936504364, - "p90": 162.59199380874634, - "p95": 164.06400501728058, - "p99": 168.57600212097168 + "p50": 159.55199301242828, + "p90": 163.29599916934967, + "p95": 165.43999314308167, + "p99": 171.07200622558594 }, "isolatedSum": { - "p50": 181.40800297260284, - "p90": 195.67999988794327, - "p95": 198.65600019693375, - "p99": 210.04799753427505 + "p50": 180.06399273872375, + "p90": 192.28799641132355, + "p95": 193.66399943828583, + "p99": 214.62400257587433 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 6, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8459,34 +9018,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.69600015878677, - "p90": 115.99999666213989, - "p95": 118.1119978427887, - "p99": 124.83199685811996 + "p50": 103.67999970912933, + "p90": 105.56799918413162, + "p95": 108.70400071144104, + "p99": 117.91999638080597 }, "combine": { - "p50": 88.3840024471283, - "p90": 97.4079966545105, - "p95": 97.88800030946732, - "p99": 100.38399696350098 + "p50": 127.87200510501862, + "p90": 128.9599984884262, + "p95": 130.0799995660782, + "p99": 138.5280042886734 }, "roundtrip": { - "p50": 161.72799468040466, - "p90": 177.2480010986328, - "p95": 181.15200102329254, - "p99": 415.48800468444824 + "p50": 214.1440063714981, + "p90": 220.47999501228333, + "p95": 222.49600291252136, + "p99": 228.19200158119202 }, "isolatedSum": { - "p50": 194.08000260591507, - "p90": 213.4079933166504, - "p95": 215.999998152256, - "p99": 225.21599382162094 + "p50": 231.55200481414795, + "p90": 234.52799767255783, + "p95": 238.78400027751923, + "p99": 256.44800066947937 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -8495,46 +9054,47 @@ ] }, { - "id": "cx-8988cd24", - "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h100_ff7906f8", - "comparisonKey": "c91a22e0dde262e4", + "id": "cx-945e07fc", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_5e3d915a", + "comparisonKey": "7cc654cb13c70aa7", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:51.137960+00:00", + "generatedAt": "2026-06-27T09:49:00.117687+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -8547,54 +9107,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", + "traceSignature": "16babcaf4204243", + "workloadId": "set:8:289b7f9c14292e96", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271699258", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271699258", - "createdAt": "2026-06-26T23:50:52Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285632999", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285632999", + "createdAt": "2026-06-27T09:49:00.117687+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 95.551997423172, - "p90": 107.04000294208527, - "p95": 120.38400024175644, - "p99": 156.00000321865082 + "p50": 55.67999929189682, + "p90": 57.72799998521805, + "p95": 59.87200140953064, + "p99": 69.50400024652481 }, "combine": { - "p50": 71.19999825954437, - "p90": 78.84799689054489, - "p95": 81.15199953317642, - "p99": 97.56799787282944 + "p50": 65.50399959087372, + "p90": 66.17599725723267, + "p95": 66.30399823188782, + "p99": 69.31199878454208 }, "roundtrip": { - "p50": 140.25600254535675, - "p90": 152.319997549057, - "p95": 169.8240041732788, - "p99": 207.68000185489655 + "p50": 104.54399883747101, + "p90": 111.42399907112122, + "p95": 112.28799819946289, + "p99": 117.60000139474869 }, "isolatedSum": { - "p50": 166.75199568271637, - "p90": 185.88799983263016, - "p95": 201.53599977493286, - "p99": 253.56800109148026 + "p50": 121.18399888277054, + "p90": 123.90399724245071, + "p95": 126.17599964141846, + "p99": 138.8159990310669 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 430080, - "combineLogicalBytes": 430080, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8603,35 +9163,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 68.4799998998642, - "p90": 104.12800312042236, - "p95": 121.69600278139114, - "p99": 155.13600409030914 + "p50": 55.80800026655197, + "p90": 57.66399949789047, + "p95": 58.9120015501976, + "p99": 63.87200206518173 }, "combine": { - "p50": 64.80000168085098, - "p90": 79.00799810886383, - "p95": 88.06400001049042, - "p99": 103.39199751615524 + "p50": 65.85600227117538, + "p90": 67.03999638557434, + "p95": 67.80800223350525, + "p99": 78.52800190448761 }, "roundtrip": { - "p50": 119.6800023317337, - "p90": 147.32800424098969, - "p95": 149.08799529075623, - "p99": 153.888002038002 + "p50": 105.18400371074677, + "p90": 107.26399719715118, + "p95": 108.15999656915665, + "p99": 112.47999966144562 }, "isolatedSum": { - "p50": 133.28000158071518, - "p90": 183.1360012292862, - "p95": 209.76000279188156, - "p99": 258.5280016064644 + "p50": 121.66400253772736, + "p90": 124.70399588346481, + "p95": 126.72000378370285, + "p99": 142.40000396966934 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 880640, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8640,35 +9200,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 70.01599669456482, - "p90": 98.27200323343277, - "p95": 101.47199779748917, - "p99": 114.33599889278412 + "p50": 56.51199817657471, + "p90": 58.111999183893204, + "p95": 59.23200026154518, + "p99": 65.18399715423584 }, "combine": { - "p50": 65.08799642324448, - "p90": 78.8159966468811, - "p95": 79.23199981451035, - "p99": 85.95199882984161 + "p50": 66.23999774456024, + "p90": 67.90400296449661, + "p95": 68.35199892520905, + "p99": 76.48000121116638 }, "roundtrip": { - "p50": 119.03999745845795, - "p90": 149.98400211334229, - "p95": 151.8079936504364, - "p99": 158.33599865436554 + "p50": 106.39999806880951, + "p90": 109.50399935245514, + "p95": 114.3679991364479, + "p99": 124.9919980764389 }, "isolatedSum": { - "p50": 135.1039931178093, - "p90": 177.08799988031387, - "p95": 180.7039976119995, - "p99": 200.28799772262573 + "p50": 122.75199592113495, + "p90": 126.01600214838982, + "p95": 127.58399918675423, + "p99": 141.66399836540222 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1740800, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 5, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8677,35 +9237,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 70.23999840021133, - "p90": 97.79199957847595, - "p95": 102.01600193977356, - "p99": 116.67200177907944 + "p50": 58.17599967122078, + "p90": 60.575999319553375, + "p95": 61.69600039720535, + "p99": 74.97599720954895 }, "combine": { - "p50": 65.47199934720993, - "p90": 79.0719985961914, - "p95": 79.64800298213959, - "p99": 87.67999708652496 + "p50": 67.03999638557434, + "p90": 69.31199878454208, + "p95": 76.57600194215775, + "p99": 78.91199737787247 }, "roundtrip": { - "p50": 118.367999792099, - "p90": 150.4639983177185, - "p95": 155.68000078201294, - "p99": 188.25599551200867 + "p50": 119.4240003824234, + "p90": 124.79999661445618, + "p95": 125.88800489902496, + "p99": 129.43999469280243 }, "isolatedSum": { - "p50": 135.71199774742126, - "p90": 176.86399817466736, - "p95": 181.66400492191315, - "p99": 204.3519988656044 + "p50": 125.21599605679512, + "p90": 129.88799810409546, + "p95": 138.2720023393631, + "p99": 153.88799458742142 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3471360, - "combineLogicalBytes": 3471360, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 5, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8714,33 +9274,33 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 95.10400146245956, - "p90": 101.34399682283401, - "p95": 105.6319996714592, - "p99": 117.11999773979187 + "p50": 59.776000678539276, + "p90": 65.88800251483917, + "p95": 67.03999638557434, + "p99": 71.9359964132309 }, "combine": { - "p50": 69.11999732255936, - "p90": 79.42400127649307, - "p95": 80.03199845552444, - "p99": 86.87999844551086 + "p50": 68.12799721956253, + "p90": 77.44000107049942, + "p95": 77.79199630022049, + "p99": 89.6959975361824 }, "roundtrip": { - "p50": 120.03199756145477, - "p90": 147.039994597435, - "p95": 149.72800016403198, - "p99": 158.55999290943146 + "p50": 118.81600320339203, + "p90": 124.83199685811996, + "p95": 125.91999769210815, + "p99": 131.80799782276154 }, "isolatedSum": { - "p50": 164.22399878501892, - "p90": 180.7679980993271, - "p95": 185.66399812698364, - "p99": 203.99999618530273 + "p50": 127.9039978981018, + "p90": 143.3280035853386, + "p95": 144.83199268579483, + "p99": 161.6319939494133 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6912000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, "recvTokensMax": 93, "stragglerRank": 5, "correct": true, @@ -8751,35 +9311,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 77.47200131416321, - "p90": 103.16800326108932, - "p95": 109.72800105810165, - "p99": 237.37600445747375 + "p50": 68.03199648857117, + "p90": 69.2799985408783, + "p95": 71.23199850320816, + "p99": 76.22399926185608 }, "combine": { - "p50": 71.99999690055847, - "p90": 87.13600039482117, - "p95": 95.20000219345093, - "p99": 104.16000336408615 + "p50": 77.85599678754807, + "p90": 78.5600021481514, + "p95": 79.32800054550171, + "p99": 90.84799885749817 }, "roundtrip": { - "p50": 146.14400267601013, - "p90": 166.52800142765045, - "p95": 171.1679995059967, - "p99": 366.0160005092621 + "p50": 128.86400520801544, + "p90": 132.35199451446533, + "p95": 134.91199910640717, + "p99": 140.57600498199463 }, "isolatedSum": { - "p50": 149.47199821472168, - "p90": 190.3040036559105, - "p95": 204.92800325155258, - "p99": 341.5360078215599 + "p50": 145.88799327611923, + "p90": 147.8400006890297, + "p95": 150.55999904870987, + "p99": 167.07199811935425 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13977600, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 5, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8788,34 +9348,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 92.67199784517288, - "p90": 111.04000359773636, - "p95": 113.79200220108032, - "p99": 126.68800354003906 + "p50": 86.30400151014328, + "p90": 88.86399865150452, + "p95": 89.82399851083755, + "p99": 103.20000350475311 }, "combine": { - "p50": 81.08799904584885, - "p90": 88.67199718952179, - "p95": 95.45599669218063, - "p99": 96.28800302743912 + "p50": 90.87999910116196, + "p90": 92.8959995508194, + "p95": 94.40000355243683, + "p99": 102.4319976568222 }, "roundtrip": { - "p50": 147.5840061903, - "p90": 168.96000504493713, - "p95": 170.9440052509308, - "p99": 174.9120056629181 + "p50": 157.31200575828552, + "p90": 161.56800091266632, + "p95": 163.03999722003937, + "p99": 172.8000044822693 }, "isolatedSum": { - "p50": 173.75999689102173, - "p90": 199.71200078725815, - "p95": 209.24799889326096, - "p99": 222.97600656747818 + "p50": 177.18400061130524, + "p90": 181.7599982023239, + "p95": 184.22400206327438, + "p99": 205.63200116157532 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27975680, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -8825,35 +9385,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.15200346708298, - "p90": 120.7680031657219, - "p95": 122.68800288438797, - "p99": 131.29599392414093 + "p50": 92.6079973578453, + "p90": 95.67999839782715, + "p95": 98.94400089979172, + "p99": 109.79200154542923 }, "combine": { - "p50": 95.90400010347366, - "p90": 104.67199981212616, - "p95": 112.60800063610077, - "p99": 267.5839960575104 + "p50": 114.62400108575821, + "p90": 115.35999923944473, + "p95": 116.06399714946747, + "p99": 126.78399682044983 }, "roundtrip": { - "p50": 173.0239987373352, - "p90": 194.17600333690643, - "p95": 195.90400159358978, - "p99": 308.351993560791 + "p50": 194.94399428367615, + "p90": 198.33600521087646, + "p95": 199.39200580120087, + "p99": 228.70400547981262 }, "isolatedSum": { - "p50": 201.05600357055664, - "p90": 225.44000297784805, - "p95": 235.29600352048874, - "p99": 398.8799899816513 + "p50": 207.23199844360352, + "p90": 211.03999763727188, + "p95": 215.0079980492592, + "p99": 236.57599836587906 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 2, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8861,34 +9421,35 @@ ] }, { - "id": "cx-d5af8f11", - "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ff7906f8", - "comparisonKey": "bb40f1d7fb8ef5bf", + "id": "cx-29812ef0", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_fdf55523", + "comparisonKey": "941e1d8581ae6b5b", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:15.657129+00:00", + "generatedAt": "2026-06-27T09:49:22.371406+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -8898,9 +9459,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -8913,8 +9474,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:120a8dc1dba92ca9", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -8922,45 +9483,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271714089", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271714089", - "createdAt": "2026-06-26T23:51:20Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285640709", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285640709", + "createdAt": "2026-06-27T09:49:22.371406+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 95.51999717950821, - "p90": 104.99200224876404, - "p95": 123.16799908876419, - "p99": 153.05599570274353 + "p50": 56.63999915122986, + "p90": 58.27200040221214, + "p95": 60.28800085186958, + "p99": 68.92800331115723 }, "combine": { - "p50": 74.0479975938797, - "p90": 82.36800134181976, - "p95": 90.65599739551544, - "p99": 115.13599753379822 + "p50": 65.47199934720993, + "p90": 66.27199798822403, + "p95": 66.39999896287918, + "p99": 69.21599805355072 }, "roundtrip": { - "p50": 144.73600685596466, - "p90": 151.7760008573532, - "p95": 153.9199948310852, - "p99": 191.74399971961975 + "p50": 106.84800148010254, + "p90": 112.19199746847153, + "p95": 113.08799684047699, + "p99": 127.96799838542938 }, "isolatedSum": { - "p50": 169.5679947733879, - "p90": 187.3600035905838, - "p95": 213.82399648427963, - "p99": 268.19199323654175 + "p50": 122.11199849843979, + "p90": 124.54399839043617, + "p95": 126.68799981474876, + "p99": 138.14400136470795 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8969,35 +9530,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 71.26399874687195, - "p90": 102.4319976568222, - "p95": 104.47999835014343, - "p99": 143.48800480365753 + "p50": 56.86400085687637, + "p90": 58.59199911355972, + "p95": 60.99199876189232, + "p99": 64.41599875688553 }, "combine": { - "p50": 67.77600198984146, - "p90": 81.15199953317642, - "p95": 81.727996468544, - "p99": 87.71199733018875 + "p50": 65.88800251483917, + "p90": 66.52799993753433, + "p95": 67.52000004053116, + "p99": 78.015998005867 }, "roundtrip": { - "p50": 124.03199821710587, - "p90": 153.02400290966034, - "p95": 154.94400262832642, - "p99": 158.36800634860992 + "p50": 105.92000186443329, + "p90": 112.76800185441971, + "p95": 113.56800049543381, + "p99": 130.3360015153885 }, "isolatedSum": { - "p50": 139.0400007367134, - "p90": 183.58399718999863, - "p95": 186.20799481868744, - "p99": 231.20000213384628 + "p50": 122.75200337171555, + "p90": 125.11999905109406, + "p95": 128.51199880242348, + "p99": 142.43199676275253 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 3, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9006,34 +9567,34 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 91.58399701118469, - "p90": 103.20000350475311, - "p95": 104.032002389431, - "p99": 107.58399963378906 + "p50": 58.400001376867294, + "p90": 61.08799949288368, + "p95": 64.19199705123901, + "p99": 66.72000139951706 }, "combine": { - "p50": 74.20799881219864, - "p90": 80.64000308513641, - "p95": 81.31200075149536, - "p99": 82.49600231647491 + "p50": 66.17599725723267, + "p90": 67.48799979686737, + "p95": 68.54400038719177, + "p99": 76.31999999284744 }, "roundtrip": { - "p50": 145.79200744628906, - "p90": 152.38399803638458, - "p95": 154.55999970436096, - "p99": 172.38399386405945 + "p50": 107.96800255775452, + "p90": 113.0559965968132, + "p95": 115.64800143241882, + "p99": 122.01599776744843 }, "isolatedSum": { - "p50": 165.79199582338333, - "p90": 183.84000658988953, - "p95": 185.34400314092636, - "p99": 190.08000195026398 + "p50": 124.57599863409996, + "p90": 128.57599928975105, + "p95": 132.7359974384308, + "p99": 143.0400013923645 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9043,35 +9604,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 92.6079973578453, - "p90": 103.00800204277039, - "p95": 114.46399986743927, - "p99": 149.98400211334229 + "p50": 58.6559996008873, + "p90": 60.575999319553375, + "p95": 62.144000083208084, + "p99": 69.47200000286102 }, "combine": { - "p50": 76.1599987745285, - "p90": 82.49600231647491, - "p95": 86.68799698352814, - "p99": 95.77599912881851 + "p50": 68.00000369548798, + "p90": 76.28799974918365, + "p95": 76.54400169849396, + "p99": 77.60000228881836 }, "roundtrip": { - "p50": 146.84799313545227, - "p90": 161.40800714492798, - "p95": 192.09599494934082, - "p99": 203.74399423599243 + "p50": 117.95199662446976, + "p90": 123.6800029873848, + "p95": 125.11999905109406, + "p99": 143.2960033416748 }, "isolatedSum": { - "p50": 168.7679961323738, - "p90": 185.5040043592453, - "p95": 201.1519968509674, - "p99": 245.7600012421608 + "p50": 126.65600329637527, + "p90": 136.86399906873703, + "p95": 138.68800178170204, + "p99": 147.07200229167938 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9080,35 +9641,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 91.2960022687912, - "p90": 99.16800260543823, - "p95": 101.1200025677681, - "p99": 107.68000036478043 + "p50": 59.93599817156792, + "p90": 65.8240020275116, + "p95": 69.40799951553345, + "p99": 84.54400300979614 }, "combine": { - "p50": 77.37600058317184, - "p90": 81.53600245714188, - "p95": 82.24000036716461, - "p99": 87.13600039482117 + "p50": 68.12799721956253, + "p90": 76.12799853086472, + "p95": 76.7040029168129, + "p99": 77.85599678754807 }, "roundtrip": { - "p50": 150.30400454998016, - "p90": 157.05600380897522, - "p95": 158.9760035276413, - "p99": 162.49600052833557 + "p50": 122.97599762678146, + "p90": 126.43200159072876, + "p95": 127.32799351215363, + "p99": 132.47999548912048 }, "isolatedSum": { - "p50": 168.67200285196304, - "p90": 180.7040050625801, - "p95": 183.3600029349327, - "p99": 194.8160007596016 + "p50": 128.06399539113045, + "p90": 141.9520005583763, + "p95": 146.11200243234634, + "p99": 162.3999997973442 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9117,34 +9678,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 96.03200107812881, - "p90": 103.90400141477585, - "p95": 107.68000036478043, - "p99": 194.815993309021 + "p50": 74.43200051784515, + "p90": 77.37600058317184, + "p95": 78.17599922418594, + "p99": 81.4720019698143 }, "combine": { - "p50": 80.51200211048126, - "p90": 87.00799942016602, - "p95": 90.55999666452408, - "p99": 383.7119936943054 + "p50": 77.72800326347351, + "p90": 78.52800190448761, + "p95": 78.68800312280655, + "p99": 90.68799763917923 }, "roundtrip": { - "p50": 134.97599959373474, - "p90": 158.27199816703796, - "p95": 171.36000096797943, - "p99": 204.0960043668747 + "p50": 129.2479932308197, + "p90": 133.59999656677246, + "p95": 134.8160058259964, + "p99": 141.63200557231903 }, "isolatedSum": { - "p50": 176.54400318861008, - "p90": 190.91200083494186, - "p95": 198.2399970293045, - "p99": 578.5279870033264 + "p50": 152.16000378131866, + "p90": 155.90400248765945, + "p95": 156.8640023469925, + "p99": 172.15999960899353 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9154,34 +9715,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 107.61599987745285, - "p90": 114.49600011110306, - "p95": 116.35199934244156, - "p99": 122.84799665212631 + "p50": 79.52000200748444, + "p90": 84.86399799585342, + "p95": 85.69599688053131, + "p99": 103.00800204277039 }, "combine": { - "p50": 92.06400066614151, - "p90": 98.2080027461052, - "p95": 98.68799895048141, - "p99": 102.46399790048599 + "p50": 90.68799763917923, + "p90": 92.22400188446045, + "p95": 93.72799843549728, + "p99": 102.27199643850327 }, "roundtrip": { - "p50": 167.84000396728516, - "p90": 173.567995429039, - "p95": 175.90400576591492, - "p99": 179.4240027666092 + "p50": 158.07999670505524, + "p90": 162.84799575805664, + "p95": 163.68000209331512, + "p99": 179.80800569057465 }, "isolatedSum": { - "p50": 199.68000054359436, - "p90": 212.70400285720825, - "p95": 215.03999829292297, - "p99": 225.3119945526123 + "p50": 170.20799964666367, + "p90": 177.08799988031387, + "p95": 179.4239953160286, + "p99": 205.27999848127365 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9191,35 +9752,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 123.26399981975555, - "p90": 130.68799674510956, - "p95": 132.83200562000275, - "p99": 148.0959951877594 + "p50": 102.52799838781357, + "p90": 109.79200154542923, + "p95": 111.68000102043152, + "p99": 126.5919953584671 }, "combine": { - "p50": 106.6880002617836, - "p90": 114.23999816179276, - "p95": 115.23199826478958, - "p99": 137.85600662231445 + "p50": 126.3359934091568, + "p90": 127.55200266838074, + "p95": 127.74400413036346, + "p99": 134.3040019273758 }, "roundtrip": { - "p50": 197.60000705718994, - "p90": 204.8639953136444, - "p95": 207.07200467586517, - "p99": 225.8879989385605 + "p50": 209.50399339199066, + "p90": 217.3759937286377, + "p95": 220.92799842357635, + "p99": 231.55200481414795 }, "isolatedSum": { - "p50": 229.95200008153915, - "p90": 244.9279949069023, - "p95": 248.06400388479233, - "p99": 285.95200181007385 + "p50": 228.86399179697037, + "p90": 237.34400421380997, + "p95": 239.42400515079498, + "p99": 260.8959972858429 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 2, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9227,16 +9788,16 @@ ] }, { - "id": "cx-7171c240", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "1fe2184d83233e7e", + "id": "cx-b49699d8", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_4eade0db", + "comparisonKey": "4a0af3f3eb467c05", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:32.898956+00:00", + "generatedAt": "2026-06-27T09:49:28.247452+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", + "runner": "b300-nv_14", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -9244,29 +9805,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -9279,271 +9841,238 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:120a8dc1dba92ca9", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272125238", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272125238", - "createdAt": "2026-06-27T00:04:22Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285643524", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285643524", + "createdAt": "2026-06-27T09:49:28.247452+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 94.7519987821579, - "p90": 118.49600076675415, - "p95": 129.60000336170197, - "p99": 144.31999623775482 + "p50": 57.8560009598732, + "p90": 60.32000109553337, + "p95": 62.55999952554703, + "p99": 75.26399940252304 }, "combine": { - "p50": 76.64000242948532, - "p90": 87.2960016131401, - "p95": 90.52799642086029, - "p99": 103.10400277376175 + "p50": 66.01600348949432, + "p90": 66.39999896287918, + "p95": 66.56000018119812, + "p99": 69.82400268316269 }, "roundtrip": { - "p50": 147.2640037536621, - "p90": 170.30400037765503, - "p95": 184.89600718021393, - "p99": 195.6160068511963 + "p50": 107.77600109577179, + "p90": 113.98400366306305, + "p95": 115.10399729013443, + "p99": 123.80799651145935 }, "isolatedSum": { - "p50": 171.39200121164322, - "p90": 205.79200237989426, - "p95": 220.12799978256226, - "p99": 247.42399901151657 + "p50": 123.87200444936752, + "p90": 126.72000005841255, + "p95": 129.11999970674515, + "p99": 145.08800208568573 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, "recvTokensMax": 7, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 98.68799895048141, - "p90": 122.17599898576736, - "p95": 138.7840062379837, - "p99": 191.9039934873581 + "p50": 57.95200169086456, + "p90": 59.55199897289276, + "p95": 61.02399900555611, + "p99": 63.35999816656113 }, "combine": { - "p50": 81.31200075149536, - "p90": 89.72799777984619, - "p95": 97.08800166845322, - "p99": 106.62399977445602 + "p50": 66.23999774456024, + "p90": 67.9360032081604, + "p95": 69.11999732255936, + "p99": 78.20799946784973 }, "roundtrip": { - "p50": 152.70400047302246, - "p90": 174.9120056629181, - "p95": 184.03199315071106, - "p99": 195.51999866962433 + "p50": 107.00800269842148, + "p90": 109.21599715948105, + "p95": 111.13599687814713, + "p99": 122.27199971675873 }, "isolatedSum": { - "p50": 179.99999970197678, - "p90": 211.90399676561356, - "p95": 235.87200790643692, - "p99": 298.5279932618141 + "p50": 124.1919994354248, + "p90": 127.48800218105316, + "p95": 130.14399632811546, + "p99": 141.56799763441086 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 101.79200023412704, - "p90": 127.96799838542938, - "p95": 147.42399752140045, - "p99": 195.16800343990326 + "p50": 59.74400043487549, + "p90": 61.85600161552429, + "p95": 63.968002796173096, + "p99": 73.44000041484833 }, "combine": { - "p50": 89.66399729251862, - "p90": 103.4879982471466, - "p95": 113.02399635314941, - "p99": 128.1599998474121 + "p50": 67.55200028419495, + "p90": 69.24799829721451, + "p95": 71.99999690055847, + "p99": 77.72800326347351 }, "roundtrip": { - "p50": 162.88000345230103, - "p90": 193.53599846363068, - "p95": 214.08000588417053, - "p99": 247.71200120449066 + "p50": 118.27199906110764, + "p90": 124.70400333404541, + "p95": 126.20800733566284, + "p99": 130.11200726032257 }, "isolatedSum": { - "p50": 191.45599752664566, - "p90": 231.455996632576, - "p95": 260.44799387454987, - "p99": 323.32800328731537 + "p50": 127.29600071907043, + "p90": 131.1039999127388, + "p95": 135.96799969673157, + "p99": 151.16800367832184 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 130.49599528312683, - "p90": 143.8719928264618, - "p95": 152.70400047302246, - "p99": 158.9760035276413 + "p50": 60.22400036454201, + "p90": 62.20800057053566, + "p95": 64.19199705123901, + "p99": 81.11999928951263 }, "combine": { - "p50": 114.81600254774094, - "p90": 127.23200023174286, - "p95": 131.071999669075, - "p99": 139.5840048789978 + "p50": 68.06399673223495, + "p90": 70.0799971818924, + "p95": 77.15199887752533, + "p99": 79.16799932718277 }, "roundtrip": { - "p50": 212.70400285720825, - "p90": 226.33600234985352, - "p95": 233.69599878787994, - "p99": 247.8400021791458 + "p50": 123.00799787044525, + "p90": 127.10399925708771, + "p95": 127.68000364303589, + "p99": 130.46400249004364 }, "isolatedSum": { - "p50": 245.31199783086777, - "p90": 271.10399305820465, - "p95": 283.7760001420975, - "p99": 298.5600084066391 + "p50": 128.28799709677696, + "p90": 132.28799775242805, + "p95": 141.34399592876434, + "p99": 160.2879986166954 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-0a4944c1", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||2baace5eca64609", - "colorKey": "h100_42947950", - "comparisonKey": "fb346b1019e55bb0", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:20.307571+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2baace5eca64609", - "workloadId": "set:2:07d544ac2af401ec", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271533135", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271533135", - "createdAt": "2026-06-26T23:45:44Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 99.0080013871193, - "p90": 111.00800335407257, - "p95": 115.1999980211258, - "p99": 124.67200309038162 + "p50": 65.95200300216675, + "p90": 69.72800195217133, + "p95": 72.41600006818771, + "p99": 76.09599828720093 }, "combine": { - "p50": 80.1599994301796, - "p90": 88.03199976682663, - "p95": 88.48000317811966, - "p99": 96.3200032711029 + "p50": 68.70400160551071, + "p90": 77.63200253248215, + "p95": 78.3040001988411, + "p99": 92.0960009098053 }, "roundtrip": { - "p50": 159.04000401496887, - "p90": 169.11999881267548, - "p95": 173.69599640369415, - "p99": 179.61600422859192 + "p50": 120.99199742078781, + "p90": 126.08000636100769, + "p95": 127.20000743865967, + "p99": 143.13599467277527 }, "isolatedSum": { - "p50": 179.1680008172989, - "p90": 199.0400031208992, - "p95": 203.68000119924545, - "p99": 220.99200636148453 + "p50": 134.65600460767746, + "p90": 147.36000448465347, + "p95": 150.7200002670288, + "p99": 168.19199919700623 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.11199742555618, + "p90": 74.43200051784515, + "p95": 75.42400062084198, + "p99": 80.25600016117096 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 79.1039988398552, + "p95": 80.12799918651581, + "p99": 90.46400338411331 + }, + "roundtrip": { + "p50": 133.37600231170654, + "p90": 137.2160017490387, + "p95": 138.20800185203552, + "p99": 140.86399972438812 + }, + "isolatedSum": { + "p50": 148.51199835538864, + "p90": 153.53599935770035, + "p95": 155.5519998073578, + "p99": 170.72000354528427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9553,34 +10082,71 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 113.95200341939926, - "p90": 126.56000256538391, - "p95": 130.46400249004364, - "p99": 137.2479945421219 + "p50": 88.44800293445587, + "p90": 90.7519981265068, + "p95": 91.51999652385712, + "p99": 95.32800316810608 }, "combine": { - "p50": 97.85600006580353, - "p90": 105.40799796581268, - "p95": 112.47999966144562, - "p99": 113.50400000810623 + "p50": 92.3520028591156, + "p90": 101.24800354242325, + "p95": 101.82400047779083, + "p99": 115.07199704647064 }, "roundtrip": { - "p50": 177.95200645923615, - "p90": 187.58399784564972, - "p95": 192.73599982261658, - "p99": 206.43199980258942 + "p50": 161.40800714492798, + "p90": 165.3759926557541, + "p95": 166.72000288963318, + "p99": 173.21600019931793 }, "isolatedSum": { - "p50": 211.8080034852028, - "p90": 231.9680005311966, - "p95": 242.94400215148926, - "p99": 250.75199455022812 + "p50": 180.80000579357147, + "p90": 192.00000166893005, + "p95": 193.34399700164795, + "p99": 210.40000021457672 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.81599926948547, + "p90": 97.63199836015701, + "p95": 99.80800002813339, + "p99": 106.84800148010254 + }, + "combine": { + "p50": 115.23199826478958, + "p90": 116.12799763679504, + "p95": 117.3119992017746, + "p99": 179.83999848365784 + }, + "roundtrip": { + "p50": 193.53599846363068, + "p90": 199.16799664497375, + "p95": 200.41599869728088, + "p99": 207.48800039291382 + }, + "isolatedSum": { + "p50": 210.04799753427505, + "p90": 213.75999599695206, + "p95": 217.119999229908, + "p99": 286.6879999637604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -9589,16 +10155,16 @@ ] }, { - "id": "cx-7c169b4e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-686fd558", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_f1ea991b", + "comparisonKey": "72d679cfb4846306", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:30.292467+00:00", + "generatedAt": "2026-06-27T09:48:52.585093+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", + "runner": "b300-nv_02", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -9606,29 +10172,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -9641,52 +10208,52 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271781761", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271781761", - "createdAt": "2026-06-26T23:53:23Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285622991", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285622991", + "createdAt": "2026-06-27T09:48:52.585093+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 98.43199700117111, - "p90": 107.32799768447876, - "p95": 114.20799791812897, - "p99": 163.71199488639832 + "p50": 58.01599845290184, + "p90": 59.58399921655655, + "p95": 60.896001756191254, + "p99": 72.35199958086014 }, "combine": { - "p50": 80.4160013794899, - "p90": 83.00799876451492, - "p95": 87.00799942016602, - "p99": 87.93599903583527 + "p50": 66.17599725723267, + "p90": 66.880002617836, + "p95": 67.4239993095398, + "p99": 80.73599636554718 }, "roundtrip": { - "p50": 153.43999862670898, - "p90": 159.93599593639374, - "p95": 161.82400286197662, - "p99": 166.24000668525696 + "p50": 107.55199939012527, + "p90": 113.56800049543381, + "p95": 114.9120032787323, + "p99": 131.8719983100891 }, "isolatedSum": { - "p50": 178.847998380661, - "p90": 190.33599644899368, - "p95": 201.21599733829498, - "p99": 251.64799392223358 + "p50": 124.1919957101345, + "p90": 126.46400183439255, + "p95": 128.32000106573105, + "p99": 153.08799594640732 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, "recvTokensMax": 7, "stragglerRank": 4, "correct": true, @@ -9694,74 +10261,222 @@ "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 98.08000177145004, - "p90": 106.62399977445602, - "p95": 110.30399799346924, - "p99": 118.56000125408173 + "p50": 58.27200040221214, + "p90": 59.99999865889549, + "p95": 60.7680007815361, + "p99": 74.0479975938797 }, "combine": { - "p50": 80.79999685287476, - "p90": 87.3280018568039, - "p95": 87.93599903583527, - "p99": 95.39200365543365 + "p50": 66.59200042486191, + "p90": 67.52000004053116, + "p95": 68.64000111818314, + "p99": 70.88000327348709 }, "roundtrip": { - "p50": 156.2879979610443, - "p90": 163.13600540161133, - "p95": 169.21600699424744, - "p99": 271.2959945201874 + "p50": 107.744000852108, + "p90": 109.79200154542923, + "p95": 111.29599809646606, + "p99": 121.72800302505493 }, "isolatedSum": { - "p50": 178.8799986243248, - "p90": 193.95200163125992, - "p95": 198.2399970293045, - "p99": 213.95200490951538 + "p50": 124.86400082707405, + "p90": 127.51999869942665, + "p95": 129.40800189971924, + "p99": 144.9280008673668 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 102.27199643850327, - "p90": 111.32799834012985, - "p95": 115.80800265073776, - "p99": 124.03199821710587 - }, - "combine": { - "p50": 88.3840024471283, - "p90": 95.58399766683578, - "p95": 96.19200229644775, - "p99": 103.61599922180176 + "p50": 61.664000153541565, + "p90": 82.40000158548355, + "p95": 86.36800199747086, + "p99": 92.92799979448318 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 76.4160007238388, + "p95": 77.15199887752533, + "p99": 79.29600030183792 }, "roundtrip": { - "p50": 164.63999450206757, - "p90": 170.97599804401398, - "p95": 174.52800273895264, - "p99": 185.7600063085556 + "p50": 124.15999919176102, + "p90": 127.36000120639801, + "p95": 128.48000228405, + "p99": 144.57599818706512 }, "isolatedSum": { - "p50": 190.65599888563156, - "p90": 206.91199600696564, - "p95": 212.00000494718552, - "p99": 227.64799743890762 + "p50": 130.20800054073334, + "p90": 158.81600230932236, + "p95": 163.52000087499619, + "p99": 172.2240000963211 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.32000109553337, + "p90": 62.49599903821945, + "p95": 64.00000303983688, + "p99": 79.23199981451035 + }, + "combine": { + "p50": 68.44799965620041, + "p90": 77.44000107049942, + "p95": 78.23999971151352, + "p99": 79.55200225114822 + }, + "roundtrip": { + "p50": 121.44000083208084, + "p90": 126.43200159072876, + "p95": 127.61600315570831, + "p99": 132.03200697898865 + }, + "isolatedSum": { + "p50": 128.76800075173378, + "p90": 139.93600010871887, + "p95": 142.2400027513504, + "p99": 158.78400206565857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 66.78400188684464, + "p90": 73.11999797821045, + "p95": 74.81600344181061, + "p99": 79.19999957084656 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 78.72000336647034, + "p95": 79.16799932718277, + "p99": 81.50400221347809 + }, + "roundtrip": { + "p50": 120.64000219106674, + "p90": 123.77600371837616, + "p95": 125.95200538635254, + "p99": 143.0719941854477 + }, + "isolatedSum": { + "p50": 144.19200271368027, + "p90": 151.8400013446808, + "p95": 153.98400276899338, + "p99": 160.70400178432465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.20799815654755, + "p90": 72.95999675989151, + "p95": 76.48000121116638, + "p99": 82.94399827718735 + }, + "combine": { + "p50": 78.78399640321732, + "p90": 79.80799674987793, + "p95": 80.99199831485748, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 133.12000036239624, + "p90": 137.7280056476593, + "p95": 138.36799561977386, + "p99": 155.10399639606476 + }, + "isolatedSum": { + "p50": 148.99199455976486, + "p90": 152.76799350976944, + "p95": 157.47199952602386, + "p99": 172.86399751901627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.66399729251862, + "p90": 91.93599969148636, + "p95": 92.8959995508194, + "p99": 102.30399668216705 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 96.67199850082397, + "p95": 101.72799974679947, + "p99": 103.7760004401207 + }, + "roundtrip": { + "p50": 161.6320013999939, + "p90": 165.43999314308167, + "p95": 166.52800142765045, + "p99": 182.68799781799316 + }, + "isolatedSum": { + "p50": 181.72799795866013, + "p90": 188.60799819231033, + "p95": 194.62399929761887, + "p99": 206.07999712228775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9771,34 +10486,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 130.0799995660782, - "p90": 136.4160031080246, - "p95": 138.20800185203552, - "p99": 144.896000623703 + "p50": 94.94400024414062, + "p90": 97.4079966545105, + "p95": 100.00000149011612, + "p99": 108.96000266075134 }, "combine": { - "p50": 114.88000303506851, - "p90": 120.70400267839432, - "p95": 121.5360015630722, - "p99": 128.28800082206726 + "p50": 115.58400094509125, + "p90": 117.21599847078323, + "p95": 118.56000125408173, + "p99": 138.3039951324463 }, "roundtrip": { - "p50": 213.18399906158447, - "p90": 219.61599588394165, - "p95": 221.11999988555908, - "p99": 227.03999280929565 + "p50": 197.2160041332245, + "p90": 202.39999890327454, + "p95": 204.0960043668747, + "p99": 209.6640020608902 }, "isolatedSum": { - "p50": 244.9600026011467, - "p90": 257.1200057864189, - "p95": 259.7440034151077, - "p99": 273.18400144577026 + "p50": 210.52800118923187, + "p90": 214.62399512529373, + "p95": 218.56000274419785, + "p99": 247.26399779319763 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9807,28 +10522,29 @@ ] }, { - "id": "cx-7a284f4e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_42947950", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-f0dd83d8", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_c1ad910f", + "comparisonKey": "80e2eefb7447672f", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:32.113885+00:00", + "generatedAt": "2026-06-26T17:41:08.828331+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -9843,14 +10559,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -9868,45 +10584,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271543513", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271543513", - "createdAt": "2026-06-26T23:46:04Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:41:08.828331+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 71.00799679756165, - "p90": 100.67199915647507, - "p95": 101.6319990158081, - "p99": 103.74400019645691 + "p50": 56.992001831531525, + "p90": 59.039998799562454, + "p95": 61.824001371860504, + "p99": 73.44000041484833 }, "combine": { - "p50": 73.34399968385696, - "p90": 81.79199695587158, - "p95": 117.47200042009354, - "p99": 304.4799864292145 + "p50": 66.3359984755516, + "p90": 67.4239993095398, + "p95": 68.15999746322632, + "p99": 77.47200131416321 }, "roundtrip": { - "p50": 126.52799487113953, - "p90": 130.3360015153885, - "p95": 131.84000551700592, - "p99": 137.95199990272522 + "p50": 106.81600123643875, + "p90": 113.08799684047699, + "p95": 114.23999816179276, + "p99": 135.6479972600937 }, "isolatedSum": { - "p50": 144.3519964814186, - "p90": 182.46399611234665, - "p95": 219.10399943590164, - "p99": 408.2239866256714 + "p50": 123.32800030708313, + "p90": 126.46399810910225, + "p95": 129.98399883508682, + "p99": 150.91200172901154 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9915,35 +10631,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 68.57600063085556, - "p90": 76.31999999284744, - "p95": 79.13599908351898, - "p99": 88.32000195980072 + "p50": 56.992001831531525, + "p90": 58.78400057554245, + "p95": 60.92799827456474, + "p99": 73.21599870920181 }, "combine": { - "p50": 72.54400104284286, - "p90": 73.98399710655212, - "p95": 74.36800003051758, - "p99": 78.84799689054489 + "p50": 67.32799857854843, + "p90": 69.11999732255936, + "p95": 70.65600156784058, + "p99": 79.93599772453308 }, "roundtrip": { - "p50": 126.81600451469421, - "p90": 131.1360001564026, - "p95": 134.24000144004822, - "p99": 137.69599795341492 + "p50": 106.9440022110939, + "p90": 109.40799862146378, + "p95": 110.88000237941742, + "p99": 119.39200013875961 }, "isolatedSum": { - "p50": 141.12000167369843, - "p90": 150.30399709939957, - "p95": 153.50399911403656, - "p99": 167.1679988503456 + "p50": 124.32000041007996, + "p90": 127.9039978981018, + "p95": 131.58399984240532, + "p99": 153.1519964337349 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9952,35 +10668,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 73.05599749088287, - "p90": 103.67999970912933, - "p95": 108.51199924945831, - "p99": 261.34398579597473 + "p50": 57.792000472545624, + "p90": 59.39200147986412, + "p95": 61.28000095486641, + "p99": 68.09599697589874 }, "combine": { - "p50": 73.37599992752075, - "p90": 80.03199845552444, - "p95": 87.0399996638298, - "p99": 87.87199854850769 + "p50": 67.80800223350525, + "p90": 69.66400146484375, + "p95": 76.99199765920639, + "p99": 78.75200361013412 }, "roundtrip": { - "p50": 130.52800297737122, - "p90": 157.4079990386963, - "p95": 160.76800227165222, - "p99": 164.22399878501892 + "p50": 116.22399836778641, + "p90": 122.68800288438797, + "p95": 124.35200065374374, + "p99": 127.93600559234619 }, "isolatedSum": { - "p50": 146.43199741840363, - "p90": 183.71199816465378, - "p95": 195.55199891328812, - "p99": 349.2159843444824 + "p50": 125.60000270605087, + "p90": 129.05600294470787, + "p95": 138.2719986140728, + "p99": 146.84800058603287 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9989,35 +10705,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 71.45600020885468, - "p90": 98.88000041246414, - "p95": 103.00800204277039, - "p99": 109.69600081443787 + "p50": 59.29600074887276, + "p90": 61.15199998021126, + "p95": 62.39999830722809, + "p99": 68.1919977068901 }, "combine": { - "p50": 73.7600028514862, - "p90": 82.59200304746628, - "p95": 83.99999886751175, - "p99": 88.41600269079208 + "p50": 68.38399916887283, + "p90": 77.31200009584427, + "p95": 77.72800326347351, + "p99": 78.78399640321732 }, "roundtrip": { - "p50": 131.29599392414093, - "p90": 154.59200739860535, - "p95": 157.05600380897522, - "p99": 165.66400229930878 + "p50": 120.25599926710129, + "p90": 125.82400441169739, + "p95": 126.75200402736664, + "p99": 133.44000279903412 }, "isolatedSum": { - "p50": 145.21600306034088, - "p90": 181.47200345993042, - "p95": 187.00800091028214, - "p99": 198.11200350522995 + "p50": 127.67999991774559, + "p90": 138.46400007605553, + "p95": 140.1280015707016, + "p99": 146.97599411010742 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10026,35 +10742,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 82.97599852085114, - "p90": 100.8640006184578, - "p95": 103.26399654150009, - "p99": 108.44799876213074 + "p50": 62.78400123119354, + "p90": 69.023996591568, + "p95": 71.03999704122543, + "p99": 76.73600316047668 }, "combine": { - "p50": 74.49600100517273, - "p90": 87.10400015115738, - "p95": 87.74399757385254, - "p99": 88.86399865150452 + "p50": 77.2479996085167, + "p90": 78.5600021481514, + "p95": 78.72000336647034, + "p99": 80.86399734020233 }, "roundtrip": { - "p50": 128.1919926404953, - "p90": 158.720001578331, - "p95": 161.53599321842194, - "p99": 164.09599781036377 + "p50": 119.61600184440613, + "p90": 122.72000312805176, + "p95": 124.35200065374374, + "p99": 131.29599392414093 }, "isolatedSum": { - "p50": 157.47199952602386, - "p90": 187.96800076961517, - "p95": 191.00799411535263, - "p99": 197.31199741363525 + "p50": 140.03200083971024, + "p90": 147.5839987397194, + "p95": 149.76000040769577, + "p99": 157.60000050067902 }, "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10063,35 +10779,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 90.81599861383438, - "p90": 103.04000228643417, - "p95": 107.87200182676315, - "p99": 111.51999980211258 + "p50": 69.24799829721451, + "p90": 70.91200351715088, + "p95": 73.69600236415863, + "p99": 81.69600367546082 }, "combine": { - "p50": 81.50400221347809, - "p90": 89.9519994854927, - "p95": 90.43200314044952, - "p99": 96.19200229644775 + "p50": 78.59200239181519, + "p90": 79.80799674987793, + "p95": 80.73599636554718, + "p99": 90.94399958848953 }, "roundtrip": { - "p50": 140.47999680042267, - "p90": 163.29599916934967, - "p95": 166.87999665737152, - "p99": 171.03999853134155 + "p50": 130.68799674510956, + "p90": 135.23200154304504, + "p95": 136.51199638843536, + "p99": 140.47999680042267 }, "isolatedSum": { - "p50": 172.32000082731247, - "p90": 192.99200177192688, - "p95": 198.30400496721268, - "p99": 207.71200209856033 + "p50": 147.8400006890297, + "p90": 150.7200002670288, + "p95": 154.4319987297058, + "p99": 172.64000326395035 }, "roundtripMeasured": true, "dispatchLogicalBytes": 19726336, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 3, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10100,28 +10816,28 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 97.31200337409973, - "p90": 119.10399794578552, - "p95": 121.69600278139114, - "p99": 131.26400113105774 + "p50": 82.49600231647491, + "p90": 92.70399808883667, + "p95": 95.0080007314682, + "p99": 99.45599734783173 }, "combine": { - "p50": 90.20800143480301, - "p90": 97.15200215578079, - "p95": 103.93600165843964, - "p99": 104.47999835014343 + "p50": 92.25600212812424, + "p90": 100.09600222110748, + "p95": 102.36799716949463, + "p99": 106.65600001811981 }, "roundtrip": { - "p50": 162.1759980916977, - "p90": 181.7920058965683, - "p95": 184.4799965620041, - "p99": 187.74400651454926 + "p50": 158.65600109100342, + "p90": 163.00800442695618, + "p95": 164.19200599193573, + "p99": 169.50400173664093 }, "isolatedSum": { - "p50": 187.52000480890274, - "p90": 216.25600010156631, - "p95": 225.63200443983078, - "p99": 235.74399948120117 + "p50": 174.75200444459915, + "p90": 192.80000030994415, + "p95": 197.37599790096283, + "p99": 206.11199736595154 }, "roundtripMeasured": true, "dispatchLogicalBytes": 38993920, @@ -10137,35 +10853,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 113.3119985461235, - "p90": 132.7359974384308, - "p95": 134.5919966697693, - "p99": 140.35199582576752 + "p50": 93.91999989748001, + "p90": 95.83999961614609, + "p95": 98.04800152778625, + "p99": 104.99200224876404 }, "combine": { - "p50": 108.41599851846695, - "p90": 120.44800072908401, - "p95": 120.7360029220581, - "p99": 121.47200107574463 + "p50": 115.35999923944473, + "p90": 115.93600362539291, + "p95": 116.60800129175186, + "p99": 119.45600062608719 }, "roundtrip": { - "p50": 198.2080042362213, - "p90": 216.86400473117828, - "p95": 221.24800086021423, - "p99": 223.80800545215607 + "p50": 192.51200556755066, + "p90": 198.88000190258026, + "p95": 199.48799908161163, + "p99": 209.47200059890747 }, "isolatedSum": { - "p50": 221.72799706459045, - "p90": 253.1839981675148, - "p95": 255.3279995918274, - "p99": 261.82399690151215 + "p50": 209.27999913692474, + "p90": 211.776003241539, + "p95": 214.65600281953812, + "p99": 224.44800287485123 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10173,34 +10889,35 @@ ] }, { - "id": "cx-9a231e73", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", - "colorKey": "h100_42947950", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-dede7717", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", + "colorKey": "b300_0622d929", + "comparisonKey": "c4ede73885f09b56", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:23.336108+00:00", + "generatedAt": "2026-06-26T18:12:16.850895+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", + "runner": "b300-nv_17", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -10209,14 +10926,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -10225,8 +10942,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": "set:3:07d544ac2af401ec", + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -10234,374 +10951,304 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272369133", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272369133", - "createdAt": "2026-06-27T00:12:24Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254508907", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", + "createdAt": "2026-06-26T18:12:16.850895+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 98.88000041246414, - "p90": 104.8320010304451, - "p95": 107.96800255775452, - "p99": 118.97599697113037 + "p50": 57.69599974155426, + "p90": 60.06399914622307, + "p95": 61.664000153541565, + "p99": 77.7600035071373 }, "combine": { - "p50": 79.93599772453308, - "p90": 87.5839963555336, - "p95": 87.99999952316284, - "p99": 92.28800237178802 + "p50": 68.03199648857117, + "p90": 69.76000219583511, + "p95": 76.92799717187881, + "p99": 78.52800190448761 }, "roundtrip": { - "p50": 154.11199629306793, - "p90": 159.2639982700348, - "p95": 161.43999993801117, - "p99": 167.29600727558136 + "p50": 107.80800133943558, + "p90": 110.59200018644333, + "p95": 112.19199746847153, + "p99": 128.76799702644348 }, "isolatedSum": { - "p50": 178.81599813699722, - "p90": 192.4159973859787, - "p95": 195.96800208091736, - "p99": 211.2639993429184 + "p50": 125.72799623012543, + "p90": 129.82400134205818, + "p95": 138.59199732542038, + "p99": 156.2880054116249 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 103.04000228643417, - "p90": 108.22399705648422, - "p95": 110.43199896812439, - "p99": 116.64000153541565 + "p50": 58.559998869895935, + "p90": 60.15999987721443, + "p95": 61.664000153541565, + "p99": 72.76800274848938 }, "combine": { - "p50": 87.93599903583527, - "p90": 94.94400024414062, - "p95": 96.03200107812881, - "p99": 98.49599748849869 + "p50": 68.25599819421768, + "p90": 76.86399668455124, + "p95": 77.53600180149078, + "p99": 79.9039974808693 }, "roundtrip": { - "p50": 162.4639928340912, - "p90": 170.3999936580658, - "p95": 172.31999337673187, - "p99": 178.9119988679886 + "p50": 116.22399836778641, + "p90": 122.11199849843979, + "p95": 123.07199835777283, + "p99": 127.9039978981018 }, "isolatedSum": { - "p50": 190.97600132226944, - "p90": 203.16799730062485, - "p95": 206.4640000462532, - "p99": 215.13599902391434 + "p50": 126.81599706411362, + "p90": 137.02399656176567, + "p95": 139.20000195503235, + "p99": 152.67200022935867 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 129.40800189971924, - "p90": 137.7599984407425, - "p95": 139.45600390434265, - "p99": 143.48800480365753 + "p50": 58.59199911355972, + "p90": 60.5119988322258, + "p95": 61.664000153541565, + "p99": 69.66400146484375 }, "combine": { - "p50": 114.88000303506851, - "p90": 119.87199634313583, - "p95": 120.4800009727478, - "p99": 123.48800152540207 + "p50": 70.01599669456482, + "p90": 78.40000092983246, + "p95": 78.52800190448761, + "p99": 81.216000020504 }, "roundtrip": { - "p50": 213.0880057811737, - "p90": 217.3759937286377, - "p95": 219.10400688648224, - "p99": 223.23200106620789 + "p50": 121.66400253772736, + "p90": 125.37600100040436, + "p95": 127.20000743865967, + "p99": 135.74400544166565 }, "isolatedSum": { - "p50": 244.28800493478775, - "p90": 257.6319947838783, - "p95": 259.93600487709045, - "p99": 266.9760063290596 + "p50": 128.60799580812454, + "p90": 138.91199976205826, + "p95": 140.19200205802917, + "p99": 150.88000148534775 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-535aa40c", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "h100_42947950", - "comparisonKey": "f31dd87deba90285", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:53:48.998127+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28273506790", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273506790", - "createdAt": "2026-06-27T00:52:45Z", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 94.4959968328476, - "p90": 100.5759984254837, - "p95": 102.81600058078766, - "p99": 107.42399841547012 + "p50": 59.61599946022034, + "p90": 61.95199862122536, + "p95": 63.90400230884552, + "p99": 71.52000069618225 }, "combine": { - "p50": 76.92799717187881, - "p90": 80.89599758386612, - "p95": 81.37600123882294, - "p99": 85.91999858617783 + "p50": 77.40800082683563, + "p90": 78.65600287914276, + "p95": 78.94399762153625, + "p99": 89.28000181913376 }, "roundtrip": { - "p50": 150.65599977970123, - "p90": 155.35999834537506, - "p95": 157.02399611473083, - "p99": 163.5199934244156 + "p50": 119.80800330638885, + "p90": 122.65600264072418, + "p95": 124.83199685811996, + "p99": 136.83199882507324 }, "isolatedSum": { - "p50": 171.4239940047264, - "p90": 181.47199600934982, - "p95": 184.1920018196106, - "p99": 193.34399700164795 + "p50": 137.02400028705597, + "p90": 140.60800150036812, + "p95": 142.84799993038177, + "p99": 160.800002515316 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 90.97599983215332, - "p90": 98.52799773216248, - "p95": 101.02400183677673, - "p99": 107.68000036478043 + "p50": 73.91999661922455, + "p90": 76.09599828720093, + "p95": 78.04799824953079, + "p99": 85.24800091981888 }, "combine": { - "p50": 77.11999863386154, - "p90": 81.216000020504, - "p95": 82.71999657154083, - "p99": 87.55200356245041 + "p50": 78.40000092983246, + "p90": 79.1039988398552, + "p95": 79.39200103282928, + "p99": 85.08799970149994 }, "roundtrip": { - "p50": 149.47199821472168, - "p90": 154.91199493408203, - "p95": 157.151997089386, - "p99": 163.80800306797028 + "p50": 121.44000083208084, + "p90": 126.94400548934937, + "p95": 128.92800569534302, + "p99": 145.31199634075165 }, "isolatedSum": { - "p50": 168.09599846601486, - "p90": 179.74399775266647, - "p95": 183.74399840831757, - "p99": 195.23200392723083 + "p50": 152.319997549057, + "p90": 155.19999712705612, + "p95": 157.43999928236008, + "p99": 170.33600062131882 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 1, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 93.12000125646591, - "p90": 99.64799880981445, - "p95": 102.27199643850327, - "p99": 109.43999886512756 + "p50": 71.07199728488922, + "p90": 72.86400347948074, + "p95": 73.47200065851212, + "p99": 82.40000158548355 }, "combine": { - "p50": 79.3600007891655, - "p90": 83.0719992518425, - "p95": 84.22400057315826, - "p99": 88.54400366544724 + "p50": 80.06399869918823, + "p90": 81.37600123882294, + "p95": 81.82399719953537, + "p99": 89.88799899816513 }, "roundtrip": { - "p50": 151.96800231933594, - "p90": 158.9439958333969, - "p95": 160.25599837303162, - "p99": 163.07200491428375 + "p50": 134.36800241470337, + "p90": 141.56800508499146, + "p95": 143.99999380111694, + "p99": 148.80000054836273 }, "isolatedSum": { - "p50": 172.4800020456314, - "p90": 182.71999806165695, - "p95": 186.49599701166153, - "p99": 197.9840025305748 + "p50": 151.13599598407745, + "p90": 154.24000471830368, + "p95": 155.29599785804749, + "p99": 172.28800058364868 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 1, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 94.27200257778168, - "p90": 100.80000013113022, - "p95": 102.62399911880493, - "p99": 107.80800133943558 + "p50": 85.34400165081024, + "p90": 89.63199704885483, + "p95": 91.93599969148636, + "p99": 96.57599776983261 }, "combine": { - "p50": 78.68800312280655, - "p90": 83.13599973917007, - "p95": 84.25600081682205, - "p99": 86.65599673986435 + "p50": 93.98400038480759, + "p90": 103.10400277376175, + "p95": 103.29599678516388, + "p99": 105.92000186443329 }, "roundtrip": { - "p50": 151.39199793338776, - "p90": 157.79200196266174, - "p95": 160.25599837303162, - "p99": 164.95999693870544 + "p50": 169.3439930677414, + "p90": 172.89599776268005, + "p95": 175.87199807167053, + "p99": 196.16000354290009 }, "isolatedSum": { - "p50": 172.96000570058823, - "p90": 183.9359998703003, - "p95": 186.87999993562698, - "p99": 194.46399807929993 + "p50": 179.32800203561783, + "p90": 192.73599982261658, + "p95": 195.23199647665024, + "p99": 202.4959996342659 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 96.16000205278397, - "p90": 100.92800110578537, - "p95": 103.71199995279312, - "p99": 108.06400328874588 + "p50": 107.13600367307663, + "p90": 109.79200154542923, + "p95": 111.7120012640953, + "p99": 131.96800649166107 }, "combine": { - "p50": 81.85599744319916, - "p90": 87.26400136947632, - "p95": 88.8959988951683, - "p99": 90.04800021648407 + "p50": 130.49599528312683, + "p90": 139.52000439167023, + "p95": 139.8719996213913, + "p99": 140.54399728775024 }, "roundtrip": { - "p50": 153.6639928817749, - "p90": 160.35200655460358, - "p95": 161.95200383663177, - "p99": 165.3439998626709 + "p50": 231.1680018901825, + "p90": 235.00800132751465, + "p95": 236.7040067911148, + "p99": 257.6960027217865 }, "isolatedSum": { - "p50": 178.01599949598312, - "p90": 188.1920024752617, - "p95": 192.60799884796143, - "p99": 198.11200350522995 + "p50": 237.63199895620346, + "p90": 249.31200593709946, + "p95": 251.5840008854866, + "p99": 272.5120037794113 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 1, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10609,50 +11256,51 @@ ] }, { - "id": "cx-5a3d925c", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "da8c4fcc63f5bf6e", + "id": "cx-e56568fe", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", + "colorKey": "b300_01ab5b1a", + "comparisonKey": "1f56c3705f670037", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:07.028525+00:00", + "generatedAt": "2026-06-26T23:38:03.696815+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "runner": "b300-nv_07", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm) · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -10661,8 +11309,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -10670,337 +11318,267 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272117855", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272117855", - "createdAt": "2026-06-27T00:04:08Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28271231753", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", + "createdAt": "2026-06-26T23:38:03.696815+00:00", + "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 97.79199957847595, - "p90": 105.02400249242783, - "p95": 107.29599744081497, - "p99": 115.90400338172913 + "p50": 55.904000997543335, + "p90": 59.776000678539276, + "p95": 65.72800129652023, + "p99": 85.11999994516373 }, "combine": { - "p50": 79.77599650621414, - "p90": 82.11199939250946, - "p95": 86.91199868917465, - "p99": 88.79999816417694 + "p50": 65.60000032186508, + "p90": 66.3679987192154, + "p95": 66.91200286149979, + "p99": 76.86399668455124 }, "roundtrip": { - "p50": 152.44799852371216, - "p90": 158.59200060367584, - "p95": 160.44799983501434, - "p99": 165.40800034999847 + "p50": 105.05600273609161, + "p90": 111.35999858379364, + "p95": 112.96000331640244, + "p99": 121.05599790811539 }, "isolatedSum": { - "p50": 177.5679960846901, - "p90": 187.1360018849373, - "p95": 194.20799612998962, - "p99": 204.70400154590607 + "p50": 121.50400131940842, + "p90": 126.14399939775467, + "p95": 132.64000415802002, + "p99": 161.98399662971497 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 98.01600128412247, - "p90": 103.87200117111206, - "p95": 106.01600259542465, - "p99": 113.11999708414078 - }, - "combine": { - "p50": 81.02399855852127, - "p90": 87.71199733018875, - "p95": 87.96799927949905, - "p99": 89.50400352478027 + "p50": 55.84000051021576, + "p90": 57.56799876689911, + "p95": 60.095999389886856, + "p99": 72.4480003118515 + }, + "combine": { + "p50": 65.69600105285645, + "p90": 66.3679987192154, + "p95": 66.84800237417221, + "p99": 69.2799985408783 }, "roundtrip": { - "p50": 155.16799688339233, - "p90": 160.38399934768677, - "p95": 162.23999857902527, - "p99": 166.87999665737152 + "p50": 104.76800054311752, + "p90": 109.40799862146378, + "p95": 112.03200370073318, + "p99": 159.19999778270721 }, "isolatedSum": { - "p50": 179.03999984264374, - "p90": 191.5839985013008, - "p95": 193.9840018749237, - "p99": 202.62400060892105 + "p50": 121.5360015630722, + "p90": 123.9359974861145, + "p95": 126.94400176405907, + "p99": 141.7279988527298 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 101.98400169610977, - "p90": 106.6880002617836, - "p95": 109.95200276374817, - "p99": 120.35199999809265 + "p50": 57.37600103020668, + "p90": 60.80000102519989, + "p95": 65.76000154018402, + "p99": 95.8079993724823 }, "combine": { - "p50": 88.22400122880936, - "p90": 95.0080007314682, - "p95": 95.93600034713745, - "p99": 96.83199971914291 + "p50": 66.59200042486191, + "p90": 77.18399912118912, + "p95": 77.82399654388428, + "p99": 79.16799932718277 }, "roundtrip": { - "p50": 162.75200247764587, - "p90": 169.63200271129608, - "p95": 171.58399522304535, - "p99": 176.28799378871918 + "p50": 106.91200196743011, + "p90": 112.38399893045425, + "p95": 115.23199826478958, + "p99": 124.22399967908859 }, "isolatedSum": { - "p50": 190.20800292491913, - "p90": 201.6960009932518, - "p95": 205.88800311088562, - "p99": 217.18399971723557 + "p50": 123.96800145506859, + "p90": 137.984000146389, + "p95": 143.5839980840683, + "p99": 174.97599869966507 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 129.66400384902954, - "p90": 137.08800077438354, - "p95": 139.0399932861328, - "p99": 142.752006649971 + "p50": 58.6559996008873, + "p90": 63.231997191905975, + "p95": 65.60000032186508, + "p99": 69.47200000286102 }, "combine": { - "p50": 115.00799655914307, - "p90": 120.7680031657219, - "p95": 121.31199985742569, - "p99": 127.83999741077423 + "p50": 68.12799721956253, + "p90": 76.48000121116638, + "p95": 77.15199887752533, + "p99": 84.1279998421669 }, "roundtrip": { - "p50": 212.89600431919098, - "p90": 218.72000396251678, - "p95": 219.9680060148239, - "p99": 224.06400740146637 + "p50": 122.11199849843979, + "p90": 125.34399330615997, + "p95": 128.4479945898056, + "p99": 151.5520066022873 }, "isolatedSum": { - "p50": 244.6720004081726, - "p90": 257.85600394010544, - "p95": 260.3519931435585, - "p99": 270.59200406074524 + "p50": 126.78399682044983, + "p90": 139.71199840307236, + "p95": 142.7519991993904, + "p99": 153.59999984502792 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-49497b06", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "5ec10556693a8c2b", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:08.113815+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272121618", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272121618", - "createdAt": "2026-06-27T00:04:15Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 98.24000298976898, - "p90": 105.69600015878677, - "p95": 108.12799632549286, - "p99": 113.37599903345108 + "p50": 59.487998485565186, + "p90": 65.24799764156342, + "p95": 67.00800359249115, + "p99": 73.56800138950348 }, "combine": { - "p50": 79.68000322580338, - "p90": 82.07999914884567, - "p95": 82.97599852085114, - "p99": 87.61599659919739 + "p50": 68.12799721956253, + "p90": 77.34400033950806, + "p95": 77.88799703121185, + "p99": 89.53599631786346 }, "roundtrip": { - "p50": 146.464005112648, - "p90": 152.8320014476776, - "p95": 154.59200739860535, - "p99": 158.84800255298615 + "p50": 119.1679984331131, + "p90": 124.67200309038162, + "p95": 125.69600343704224, + "p99": 134.5600038766861 }, "isolatedSum": { - "p50": 177.92000621557236, - "p90": 187.77599930763245, - "p95": 191.103994846344, - "p99": 200.99199563264847 + "p50": 127.61599570512772, + "p90": 142.59199798107147, + "p95": 144.896000623703, + "p99": 163.10399770736694 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 100.0640019774437, - "p90": 107.32799768447876, - "p95": 110.27199774980545, - "p99": 160.92799603939056 + "p50": 74.52800124883652, + "p90": 76.51200145483017, + "p95": 77.18399912118912, + "p99": 81.7599967122078 }, "combine": { - "p50": 81.34400099515915, - "p90": 87.16800063848495, - "p95": 87.87199854850769, - "p99": 90.27200192213058 + "p50": 77.91999727487564, + "p90": 78.78399640321732, + "p95": 79.26400005817413, + "p99": 81.85599744319916 }, "roundtrip": { - "p50": 152.92799472808838, - "p90": 160.51200032234192, - "p95": 162.30399906635284, - "p99": 166.24000668525696 + "p50": 132.32000172138214, + "p90": 135.6160044670105, + "p95": 136.31999492645264, + "p99": 141.66399836540222 }, "isolatedSum": { - "p50": 181.40800297260284, - "p90": 194.49599832296371, - "p95": 198.14399629831314, - "p99": 251.19999796152115 + "p50": 152.44799852371216, + "p90": 155.29599785804749, + "p95": 156.44799917936325, + "p99": 163.61599415540695 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 103.39199751615524, - "p90": 108.2879975438118, - "p95": 110.6560006737709, - "p99": 119.03999745845795 + "p50": 80.19199967384338, + "p90": 81.88799768686295, + "p95": 83.52000266313553, + "p99": 90.30400216579437 }, "combine": { - "p50": 89.75999802350998, - "p90": 95.20000219345093, - "p95": 95.93600034713745, - "p99": 98.68799895048141 + "p50": 90.59199690818787, + "p90": 91.67999774217606, + "p95": 92.57599711418152, + "p99": 101.21600329875946 }, "roundtrip": { - "p50": 161.6320013999939, - "p90": 169.08800601959229, - "p95": 170.68800330162048, - "p99": 175.64800381660461 + "p50": 155.45600652694702, + "p90": 160.5760008096695, + "p95": 161.98399662971497, + "p99": 169.53599452972412 }, "isolatedSum": { - "p50": 193.15199553966522, - "p90": 203.48799973726273, - "p95": 206.59200102090836, - "p99": 217.72799640893936 + "p50": 170.78399658203125, + "p90": 173.567995429039, + "p95": 176.09599977731705, + "p99": 191.52000546455383 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11009,35 +11587,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 130.46400249004364, - "p90": 136.9280070066452, - "p95": 139.23199474811554, - "p99": 143.5839980840683 + "p50": 101.69599950313568, + "p90": 105.15200346708298, + "p95": 106.04800283908844, + "p99": 115.167997777462 }, "combine": { - "p50": 114.78400230407715, - "p90": 120.83200365304947, - "p95": 122.11199849843979, - "p99": 122.8799968957901 + "p50": 126.81600451469421, + "p90": 127.77599692344666, + "p95": 128.12800705432892, + "p99": 131.71200454235077 }, "roundtrip": { - "p50": 211.71200275421143, - "p90": 219.35999393463135, - "p95": 221.91999852657318, - "p99": 235.00800132751465 + "p50": 207.58399367332458, + "p90": 212.41599321365356, + "p95": 215.45599400997162, + "p99": 240.79999327659607 }, "isolatedSum": { - "p50": 245.2480047941208, - "p90": 257.7600106596947, - "p95": 261.3439932465553, - "p99": 266.4639949798584 + "p50": 228.5120040178299, + "p90": 232.92800039052963, + "p95": 234.17600989341736, + "p99": 246.88000231981277 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11045,50 +11623,51 @@ ] }, { - "id": "cx-3b04d344", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "8bd0272e65400ebd", + "id": "cx-a499b6fe", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", + "colorKey": "b300_085c12d4", + "comparisonKey": "f41671f558a3c8d2", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:11.747577+00:00", + "generatedAt": "2026-06-26T18:23:15.234137+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", - "activationProfile": "zeros", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -11097,91 +11676,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272113941", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272113941", - "createdAt": "2026-06-27T00:04:01Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28255311146", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", + "createdAt": "2026-06-26T18:23:15.234137+00:00", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 97.15200215578079, - "p90": 103.67999970912933, - "p95": 105.85600137710571, - "p99": 108.99200290441513 + "p50": 56.86400085687637, + "p90": 59.7120001912117, + "p95": 63.32799792289734, + "p99": 72.64000177383423 }, "combine": { - "p50": 79.64800298213959, - "p90": 82.33600109815598, - "p95": 86.84799820184708, - "p99": 87.96799927949905 + "p50": 64.83200192451477, + "p90": 66.46399945020676, + "p95": 66.94400310516357, + "p99": 76.51200145483017 }, "roundtrip": { - "p50": 151.8400013446808, - "p90": 158.01599621772766, - "p95": 160.76800227165222, - "p99": 165.3120070695877 + "p50": 105.12000322341919, + "p90": 110.72000116109848, + "p95": 111.7440015077591, + "p99": 122.56000190973282 }, "isolatedSum": { - "p50": 176.80000513792038, - "p90": 186.0160008072853, - "p95": 192.7039995789528, - "p99": 196.96000218391418 + "p50": 121.69600278139114, + "p90": 126.17599964141846, + "p95": 130.2720010280609, + "p99": 149.1520032286644 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 97.28000313043594, - "p90": 103.71199995279312, - "p95": 106.4319983124733, - "p99": 121.63200229406357 + "p50": 57.5999990105629, + "p90": 59.808000922203064, + "p95": 62.07999959588051, + "p99": 71.45600020885468 }, "combine": { - "p50": 79.93599772453308, - "p90": 87.39200234413147, - "p95": 87.93599903583527, - "p99": 90.04800021648407 + "p50": 66.27199798822403, + "p90": 67.00800359249115, + "p95": 67.29599833488464, + "p99": 76.92799717187881 }, "roundtrip": { - "p50": 153.72799336910248, - "p90": 159.55199301242828, - "p95": 160.7999950647354, - "p99": 165.6000018119812 + "p50": 106.27199709415436, + "p90": 108.22399705648422, + "p95": 110.01600325107574, + "p99": 132.54399597644806 }, "isolatedSum": { - "p50": 177.21600085496902, - "p90": 191.1040022969246, - "p95": 194.36799734830856, - "p99": 211.68000251054764 + "p50": 123.87199699878693, + "p90": 126.81600451469421, + "p95": 129.37599793076515, + "p99": 148.3839973807335 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.53599852323532, + "p90": 59.808000922203064, + "p95": 60.70400029420853, + "p99": 67.87200272083282 + }, + "combine": { + "p50": 66.43199920654297, + "p90": 67.45599955320358, + "p95": 69.31199878454208, + "p99": 78.78399640321732 + }, + "roundtrip": { + "p50": 106.6880002617836, + "p90": 109.50399935245514, + "p95": 111.87200248241425, + "p99": 125.08800625801086 + }, + "isolatedSum": { + "p50": 123.96799772977829, + "p90": 127.26400047540665, + "p95": 130.0159990787506, + "p99": 146.65599912405014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.848001062870026, + "p90": 61.15199998021126, + "p95": 64.41599875688553, + "p99": 78.14399898052216 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 76.25599950551987, + "p95": 76.92799717187881, + "p99": 79.64800298213959 + }, + "roundtrip": { + "p50": 116.28799885511398, + "p90": 122.8799968957901, + "p95": 124.70400333404541, + "p99": 145.08800208568573 + }, + "isolatedSum": { + "p50": 127.1359995007515, + "p90": 137.40799948573112, + "p95": 141.34399592876434, + "p99": 157.79200196266174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.54399907588959, + "p90": 66.14399701356888, + "p95": 68.67200136184692, + "p99": 83.29600095748901 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 77.2159993648529, + "p95": 77.82399654388428, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 126.0479986667633, + "p95": 127.16799974441528, + "p99": 131.1040073633194 + }, + "isolatedSum": { + "p50": 129.18400019407272, + "p90": 143.35999637842178, + "p95": 146.4959979057312, + "p99": 162.20799833536148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11190,35 +11880,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 104.00000214576721, - "p90": 108.44799876213074, - "p95": 111.68000102043152, - "p99": 126.75200402736664 + "p50": 69.31199878454208, + "p90": 75.52000135183334, + "p95": 76.4160007238388, + "p99": 83.20000022649765 }, "combine": { - "p50": 87.99999952316284, - "p90": 93.44000369310379, - "p95": 95.87199985980988, - "p99": 97.59999811649323 + "p50": 78.46400141716003, + "p90": 79.26400005817413, + "p95": 79.45600152015686, + "p99": 82.40000158548355 }, "roundtrip": { - "p50": 161.8880033493042, - "p90": 168.64000260829926, - "p95": 170.0800061225891, - "p99": 175.99999904632568 + "p50": 132.192000746727, + "p90": 135.6479972600937, + "p95": 136.3839954137802, + "p99": 147.20000326633453 }, "isolatedSum": { - "p50": 192.00000166893005, - "p90": 201.88800245523453, - "p95": 207.5520008802414, - "p99": 224.35200214385986 + "p50": 147.77600020170212, + "p90": 154.78400141000748, + "p95": 155.87200224399567, + "p99": 165.6000018119812 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.8399983048439, + "p90": 90.30400216579437, + "p95": 91.87199920415878, + "p99": 100.0640019774437 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 93.08800101280212, + "p95": 93.85599941015244, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 157.44000673294067, + "p90": 162.4639928340912, + "p95": 163.71199488639832, + "p99": 168.89600455760956 + }, + "isolatedSum": { + "p50": 179.1360005736351, + "p90": 183.3920031785965, + "p95": 185.72799861431122, + "p99": 208.19199830293655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11227,35 +11954,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 130.0159990787506, - "p90": 137.7280056476593, - "p95": 138.7840062379837, - "p99": 142.2719955444336 + "p50": 93.9520001411438, + "p90": 98.39999675750732, + "p95": 100.00000149011612, + "p99": 105.53599894046783 }, "combine": { - "p50": 115.167997777462, - "p90": 120.54400146007538, - "p95": 120.95999717712402, - "p99": 123.87199699878693 + "p50": 115.29599875211716, + "p90": 116.12799763679504, + "p95": 116.48000031709671, + "p99": 127.87200510501862 }, "roundtrip": { - "p50": 212.47999370098114, - "p90": 216.63999557495117, - "p95": 218.1439995765686, - "p99": 221.47199511528015 + "p50": 193.08799505233765, + "p90": 199.90399479866028, + "p95": 201.50400698184967, + "p99": 214.1759991645813 }, "isolatedSum": { - "p50": 245.18399685621262, - "p90": 258.2720071077347, - "p95": 259.7440034151077, - "p99": 266.1439925432205 + "p50": 209.24799889326096, + "p90": 214.52799439430237, + "p95": 216.48000180721283, + "p99": 233.40800404548645 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11263,28 +11990,29 @@ ] }, { - "id": "cx-d0428a76", - "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ff7906f8", - "comparisonKey": "e3488cf5058170e6", + "id": "cx-8481f6a4", + "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_63f1354f", + "comparisonKey": "63f9b5a5300d4d4b", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:28.813270+00:00", + "generatedAt": "2026-06-26T18:09:35.317427+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", + "runner": "b300-nv_16", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -11299,14 +12027,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -11324,45 +12052,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271559607", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271559607", - "createdAt": "2026-06-26T23:46:31Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254489726", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", + "createdAt": "2026-06-26T18:09:35.317427+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 96.79999947547913, - "p90": 103.39199751615524, - "p95": 104.80000078678131, - "p99": 109.43999886512756 + "p50": 50.303999334573746, + "p90": 52.06400156021118, + "p95": 53.82400006055832, + "p99": 65.05600363016129 }, "combine": { - "p50": 79.13599908351898, - "p90": 81.40800148248672, - "p95": 86.68799698352814, - "p99": 87.90399879217148 + "p50": 66.56000018119812, + "p90": 68.2239979505539, + "p95": 68.76800209283829, + "p99": 77.95199751853943 }, "roundtrip": { - "p50": 152.12799608707428, - "p90": 159.96800363063812, - "p95": 162.36799955368042, - "p99": 177.69600450992584 + "p50": 99.84000027179718, + "p90": 103.90400141477585, + "p95": 107.51999914646149, + "p99": 117.11999773979187 }, "isolatedSum": { - "p50": 175.9359985589981, - "p90": 184.79999899864197, - "p95": 191.48799777030945, - "p99": 197.34399765729904 + "p50": 116.86399951577187, + "p90": 120.28799951076508, + "p95": 122.5920021533966, + "p99": 143.0080011487007 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11371,35 +12099,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 71.23199850320816, - "p90": 101.98400169610977, - "p95": 103.84000092744827, - "p99": 108.35199803113937 + "p50": 51.263999193906784, + "p90": 52.89600044488907, + "p95": 55.32800033688545, + "p99": 65.18399715423584 }, "combine": { - "p50": 72.54400104284286, - "p90": 81.40800148248672, - "p95": 82.62400329113007, - "p99": 87.77599781751633 + "p50": 66.97600334882736, + "p90": 68.7360018491745, + "p95": 69.11999732255936, + "p99": 78.11199873685837 }, "roundtrip": { - "p50": 129.08799946308136, - "p90": 158.2079976797104, - "p95": 159.58400070667267, - "p99": 165.02399742603302 + "p50": 100.99200159311295, + "p90": 103.26399654150009, + "p95": 105.76000064611435, + "p99": 113.6000007390976 }, "isolatedSum": { - "p50": 143.77599954605103, - "p90": 183.3920031785965, - "p95": 186.46400421857834, - "p99": 196.1279958486557 + "p50": 118.24000254273415, + "p90": 121.63200229406357, + "p95": 124.44799765944481, + "p99": 143.2959958910942 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11408,35 +12136,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 70.52800059318542, - "p90": 99.84000027179718, - "p95": 105.72800040245056, - "p99": 115.07199704647064 + "p50": 51.4880008995533, + "p90": 53.408000618219376, + "p95": 54.9440011382103, + "p99": 61.63199990987778 }, "combine": { - "p50": 72.9919970035553, - "p90": 80.99199831485748, - "p95": 86.94399893283844, - "p99": 103.55199873447418 + "p50": 67.6800012588501, + "p90": 69.60000097751617, + "p95": 76.89599692821503, + "p99": 79.16799932718277 }, "roundtrip": { - "p50": 129.43999469280243, - "p90": 156.19200468063354, - "p95": 159.07199680805206, - "p99": 162.56000101566315 + "p50": 108.73600095510483, + "p90": 115.80800265073776, + "p95": 117.0239970088005, + "p99": 124.35200065374374 }, "isolatedSum": { - "p50": 143.51999759674072, - "p90": 180.83199858665466, - "p95": 192.671999335289, - "p99": 218.62399578094482 + "p50": 119.1680021584034, + "p90": 123.00800159573555, + "p95": 131.83999806642532, + "p99": 140.79999923706055 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 6, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11445,35 +12173,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 96.54399752616882, - "p90": 101.3759970664978, - "p95": 103.61599922180176, - "p99": 111.26399785280228 + "p50": 52.639998495578766, + "p90": 55.64799904823303, + "p95": 59.39200147986412, + "p99": 68.00000369548798 }, "combine": { - "p50": 79.52000200748444, - "p90": 87.13600039482117, - "p95": 87.64799684286118, - "p99": 88.73599767684937 + "p50": 68.25599819421768, + "p90": 77.08799839019775, + "p95": 77.60000228881836, + "p99": 78.94399762153625 }, "roundtrip": { - "p50": 152.16000378131866, - "p90": 159.39199924468994, - "p95": 161.15200519561768, - "p99": 170.52799463272095 + "p50": 113.69600147008896, + "p90": 117.66400188207626, + "p95": 118.72000247240067, + "p99": 121.18399888277054 }, "isolatedSum": { - "p50": 176.06399953365326, - "p90": 188.51199746131897, - "p95": 191.26399606466293, - "p99": 199.99999552965164 + "p50": 120.89599668979645, + "p90": 132.7359974384308, + "p95": 136.99200376868248, + "p99": 146.94400131702423 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11482,35 +12210,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 96.3520035147667, - "p90": 101.75999999046326, - "p95": 104.89600151777267, - "p99": 110.11199653148651 + "p50": 61.792001128196716, + "p90": 67.90400296449661, + "p95": 68.67200136184692, + "p99": 71.1359977722168 }, "combine": { - "p50": 84.48000252246857, - "p90": 88.03199976682663, - "p95": 89.21600133180618, - "p99": 95.23200243711472 + "p50": 70.46400010585785, + "p90": 78.40000092983246, + "p95": 78.59200239181519, + "p99": 81.44000172615051 }, "roundtrip": { - "p50": 153.05599570274353, - "p90": 160.288006067276, - "p95": 162.432000041008, - "p99": 171.2000072002411 + "p50": 113.18399757146835, + "p90": 115.9679964184761, + "p95": 117.53600090742111, + "p99": 127.87200510501862 }, "isolatedSum": { - "p50": 180.83200603723526, - "p90": 189.7919997572899, - "p95": 194.11200284957886, - "p99": 205.34399896860123 + "p50": 132.25600123405457, + "p90": 146.30400389432907, + "p95": 147.2640037536621, + "p99": 152.5759994983673 }, "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 6, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11519,35 +12247,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 89.9519994854927, - "p90": 104.35199737548828, - "p95": 106.65600001811981, - "p99": 117.85600334405899 + "p50": 62.65600025653839, + "p90": 64.92800265550613, + "p95": 66.880002617836, + "p99": 73.69600236415863 }, "combine": { - "p50": 81.216000020504, - "p90": 92.19200164079666, - "p95": 95.39200365543365, - "p99": 96.0640013217926 + "p50": 78.59200239181519, + "p90": 79.74400371313095, + "p95": 80.64000308513641, + "p99": 85.63199639320374 }, "roundtrip": { - "p50": 141.05600118637085, - "p90": 168.2880073785782, - "p95": 169.5680022239685, - "p99": 174.40000176429749 + "p50": 124.28800016641617, + "p90": 127.93600559234619, + "p95": 130.43199479579926, + "p99": 138.5599970817566 }, "isolatedSum": { - "p50": 171.1679995059967, - "p90": 196.54399901628494, - "p95": 202.04800367355347, - "p99": 213.9200046658516 + "p50": 141.24800264835358, + "p90": 144.67200636863708, + "p95": 147.5200057029724, + "p99": 159.32799875736237 }, "roundtripMeasured": true, "dispatchLogicalBytes": 19726336, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11556,35 +12284,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 111.51999980211258, - "p90": 119.00799721479416, - "p95": 121.44000083208084, - "p99": 126.56000256538391 + "p50": 75.77600330114365, + "p90": 83.16799998283386, + "p95": 83.96799862384796, + "p99": 96.3520035147667 }, "combine": { - "p50": 95.0080007314682, - "p90": 103.04000228643417, - "p95": 103.35999727249146, - "p99": 104.92800176143646 + "p50": 91.48799628019333, + "p90": 93.6959981918335, + "p95": 95.90400010347366, + "p99": 104.76800054311752 }, "roundtrip": { - "p50": 164.63999450206757, - "p90": 182.3039948940277, - "p95": 185.12000143527985, - "p99": 188.7039989233017 + "p50": 150.11200308799744, + "p90": 153.28000485897064, + "p95": 154.91199493408203, + "p99": 159.96800363063812 }, "isolatedSum": { - "p50": 206.52800053358078, - "p90": 222.04799950122833, - "p95": 224.7999981045723, - "p99": 231.48800432682037 + "p50": 167.26399958133698, + "p90": 176.86399817466736, + "p95": 179.87199872732162, + "p99": 201.12000405788422 }, "roundtripMeasured": true, "dispatchLogicalBytes": 38993920, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11593,35 +12321,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.99199676513672, - "p90": 133.2480013370514, - "p95": 135.51999628543854, - "p99": 140.6400054693222 + "p50": 87.36000210046768, + "p90": 89.31200206279755, + "p95": 92.3520028591156, + "p99": 98.36799651384354 }, "combine": { - "p50": 106.88000172376633, - "p90": 119.55200135707855, - "p95": 120.2239990234375, - "p99": 127.55200266838074 + "p50": 115.32799899578094, + "p90": 115.9679964184761, + "p95": 117.21599847078323, + "p99": 126.49600207805634 }, "roundtrip": { - "p50": 199.3280053138733, - "p90": 215.45599400997162, - "p95": 217.56799519062042, - "p99": 258.91199707984924 + "p50": 186.14399433135986, + "p90": 191.67999923229218, + "p95": 193.05600225925446, + "p99": 199.072003364563 }, "isolatedSum": { - "p50": 223.87199848890305, - "p90": 252.80000269412994, - "p95": 255.74399530887604, - "p99": 268.19200813770294 + "p50": 202.68800109624863, + "p90": 205.27999848127365, + "p95": 209.56800132989883, + "p99": 224.86399859189987 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11629,32 +12357,33 @@ ] }, { - "id": "cx-e96d722b", - "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h100_ff7906f8", - "comparisonKey": "c69daa1ab05193b6", + "id": "cx-00895a92", + "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_33311fdc", + "comparisonKey": "fb96ce98136947bb", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:56.132475+00:00", + "generatedAt": "2026-06-27T09:46:23.442699+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", + "runner": "b300-nv_07", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -11666,9 +12395,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -11681,8 +12410,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -11690,45 +12419,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271667766", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271667766", - "createdAt": "2026-06-26T23:49:58Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285573016", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285573016", + "createdAt": "2026-06-27T09:46:23.442699+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 96.09600156545639, - "p90": 102.81600058078766, - "p95": 104.54399883747101, - "p99": 110.59200018644333 + "p50": 49.6320016682148, + "p90": 50.97600072622299, + "p95": 52.319999784231186, + "p99": 56.992001831531525 }, "combine": { - "p50": 79.03999835252762, - "p90": 81.50400221347809, - "p95": 82.11199939250946, - "p99": 87.90399879217148 + "p50": 65.66400080919266, + "p90": 66.52799993753433, + "p95": 67.26399809122086, + "p99": 77.2479996085167 }, "roundtrip": { - "p50": 145.56799829006195, - "p90": 153.31199765205383, - "p95": 155.71199357509613, - "p99": 159.39199924468994 + "p50": 98.62399846315384, + "p90": 101.31199657917023, + "p95": 103.39199751615524, + "p99": 122.3360002040863 }, "isolatedSum": { - "p50": 175.135999917984, - "p90": 184.32000279426575, - "p95": 186.65599822998047, - "p99": 198.4959989786148 + "p50": 115.29600247740746, + "p90": 117.50400066375732, + "p95": 119.58399787545204, + "p99": 134.24000144004822 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11737,34 +12466,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 71.03999704122543, - "p90": 101.6319990158081, - "p95": 102.65599936246872, - "p99": 106.62399977445602 + "p50": 50.144001841545105, + "p90": 51.872000098228455, + "p95": 53.37600037455559, + "p99": 63.00800293684006 }, "combine": { - "p50": 72.28799909353256, - "p90": 80.54400235414505, - "p95": 81.40800148248672, - "p99": 87.00799942016602 + "p50": 67.07199662923813, + "p90": 68.7360018491745, + "p95": 69.24799829721451, + "p99": 79.1039988398552 }, "roundtrip": { - "p50": 129.18399274349213, - "p90": 152.70400047302246, - "p95": 156.92800283432007, - "p99": 160.76800227165222 + "p50": 99.80800002813339, + "p90": 102.01600193977356, + "p95": 103.7760004401207, + "p99": 110.20799726247787 }, "isolatedSum": { - "p50": 143.327996134758, - "p90": 182.17600136995316, - "p95": 184.06400084495544, - "p99": 193.63199919462204 + "p50": 117.21599847078323, + "p90": 120.60800194740295, + "p95": 122.6239986717701, + "p99": 142.11200177669525 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -11774,35 +12503,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 72.57600128650665, - "p90": 101.02400183677673, - "p95": 103.61599922180176, - "p99": 110.81600189208984 + "p50": 50.97600072622299, + "p90": 52.799999713897705, + "p95": 54.91200089454651, + "p99": 61.11999973654747 }, "combine": { - "p50": 72.25599884986877, - "p90": 79.96799796819687, - "p95": 86.71999722719193, - "p99": 87.64799684286118 + "p50": 68.4799998998642, + "p90": 89.63199704885483, + "p95": 92.28800237178802, + "p99": 102.4319976568222 }, "roundtrip": { - "p50": 129.92000579833984, - "p90": 161.3759994506836, - "p95": 162.30399906635284, - "p99": 166.4319932460785 + "p50": 112.86400258541107, + "p90": 117.08799749612808, + "p95": 118.23999881744385, + "p99": 121.95199728012085 }, "isolatedSum": { - "p50": 144.83200013637543, - "p90": 180.9919998049736, - "p95": 190.33599644899368, - "p99": 198.46399873495102 + "p50": 119.45600062608719, + "p90": 142.43199676275253, + "p95": 147.20000326633453, + "p99": 163.55199739336967 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 5, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11811,35 +12540,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 96.3200032711029, - "p90": 101.1200025677681, - "p95": 102.52799838781357, - "p99": 109.11999642848969 + "p50": 55.296000093221664, + "p90": 70.30399888753891, + "p95": 75.1039981842041, + "p99": 115.58400094509125 }, "combine": { - "p50": 79.23199981451035, - "p90": 82.11199939250946, - "p95": 87.00799942016602, - "p99": 87.71199733018875 + "p50": 68.89600306749344, + "p90": 77.79199630022049, + "p95": 78.33600044250488, + "p99": 82.33600109815598 }, "roundtrip": { - "p50": 151.5199989080429, - "p90": 159.2320054769516, - "p95": 160.60799360275269, - "p99": 165.21599888801575 + "p50": 113.6000007390976, + "p90": 117.91999638080597, + "p95": 118.97599697113037, + "p99": 125.18399953842163 }, "isolatedSum": { - "p50": 175.55200308561325, - "p90": 183.23200196027756, - "p95": 189.53599780797958, - "p99": 196.83199375867844 + "p50": 124.1920031607151, + "p90": 148.0959951877594, + "p95": 153.43999862670898, + "p99": 197.92000204324722 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11848,34 +12577,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 96.22400254011154, - "p90": 102.36799716949463, - "p95": 105.05600273609161, - "p99": 110.30399799346924 + "p50": 60.06399914622307, + "p90": 67.19999760389328, + "p95": 68.03199648857117, + "p99": 71.87200337648392 }, "combine": { - "p50": 81.88799768686295, - "p90": 88.28800171613693, - "p95": 89.31200206279755, - "p99": 94.43199634552002 + "p50": 68.89600306749344, + "p90": 77.85599678754807, + "p95": 78.3040001988411, + "p99": 81.4720019698143 }, "roundtrip": { - "p50": 152.48000621795654, - "p90": 160.09600460529327, - "p95": 164.19200599193573, - "p99": 172.83199727535248 + "p50": 112.09599673748016, + "p90": 114.9120032787323, + "p95": 116.54400080442429, + "p99": 128.25599312782288 }, "isolatedSum": { - "p50": 178.1120002269745, - "p90": 190.65599888563156, - "p95": 194.36800479888916, - "p99": 204.73599433898926 + "p50": 128.9600022137165, + "p90": 145.05599439144135, + "p95": 146.33599668741226, + "p99": 153.34400534629822 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -11885,35 +12614,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 90.36800265312195, - "p90": 102.59199887514114, - "p95": 104.3199971318245, - "p99": 108.03200304508209 + "p50": 61.792001128196716, + "p90": 63.45599889755249, + "p95": 66.49599969387054, + "p99": 72.03199714422226 }, "combine": { - "p50": 80.92799782752991, - "p90": 90.01599997282028, - "p95": 95.13600170612335, - "p99": 96.41599655151367 + "p50": 78.11199873685837, + "p90": 79.42400127649307, + "p95": 80.35200089216232, + "p99": 83.48800241947174 }, "roundtrip": { - "p50": 142.46399700641632, - "p90": 169.95200514793396, - "p95": 174.55999553203583, - "p99": 181.7920058965683 + "p50": 122.81599640846252, + "p90": 124.95999783277512, + "p95": 127.00800597667694, + "p99": 132.76800513267517 }, "isolatedSum": { - "p50": 171.29600048065186, - "p90": 192.60799884796143, - "p95": 199.45599883794785, - "p99": 204.44799959659576 + "p50": 139.90399986505508, + "p90": 142.88000017404556, + "p95": 146.84800058603287, + "p99": 155.519999563694 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 3, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11922,35 +12651,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 95.71199864149094, - "p90": 116.54400080442429, - "p95": 118.59200149774551, - "p99": 125.63200294971466 + "p50": 74.5920017361641, + "p90": 82.14399963617325, + "p95": 83.20000022649765, + "p99": 97.69599884748459 }, "combine": { - "p50": 89.72799777984619, - "p90": 103.74400019645691, - "p95": 104.22399640083313, - "p99": 106.04800283908844 + "p50": 91.93599969148636, + "p90": 100.63999891281128, + "p95": 101.75999999046326, + "p99": 108.22399705648422 }, "roundtrip": { - "p50": 165.66400229930878, - "p90": 185.34399569034576, - "p95": 186.97600066661835, - "p99": 190.08000195026398 + "p50": 148.60799908638, + "p90": 151.96800231933594, + "p95": 153.1199961900711, + "p99": 155.93600273132324 }, "isolatedSum": { - "p50": 185.43999642133713, - "p90": 220.2880010008812, - "p95": 222.81599789857864, - "p99": 231.6800057888031 + "p50": 166.52800142765045, + "p90": 182.78399854898453, + "p95": 184.9600002169609, + "p99": 205.9199959039688 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 5, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11959,35 +12688,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 113.11999708414078, - "p90": 133.82400572299957, - "p95": 137.05599308013916, - "p99": 140.28799533843994 + "p50": 86.5280032157898, + "p90": 87.99999952316284, + "p95": 90.11200070381165, + "p99": 99.84000027179718 }, "combine": { - "p50": 106.46399855613708, - "p90": 120.12799829244614, - "p95": 120.51200121641159, - "p99": 120.99199742078781 + "p50": 114.97599631547928, + "p90": 116.28799885511398, + "p95": 117.18399822711945, + "p99": 126.49600207805634 }, "roundtrip": { - "p50": 196.8960016965866, - "p90": 216.99200570583344, - "p95": 218.9120054244995, - "p99": 220.99199891090393 + "p50": 185.2799952030182, + "p90": 191.00800156593323, + "p95": 192.76799261569977, + "p99": 203.23200523853302 }, "isolatedSum": { - "p50": 219.58399564027786, - "p90": 253.9520040154457, - "p95": 257.56799429655075, - "p99": 261.27999275922775 + "p50": 201.50399953126907, + "p90": 204.28799837827682, + "p95": 207.2959989309311, + "p99": 226.33600234985352 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 5, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11995,16 +12724,16 @@ ] }, { - "id": "cx-10aeccec", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", - "colorKey": "h100_648ede74", - "comparisonKey": "03a9af950bebf5a9", + "id": "cx-34fdfa58", + "identity": "b300|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "e31dbd692115f689", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:11:55.271848+00:00", + "generatedAt": "2026-06-27T11:14:20.626757+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", + "runner": "b300-nv_04", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -12012,29 +12741,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups", + "label": "B300 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -12047,8 +12777,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3cd13eac5b27759", - "workloadId": "set:3:24add4cb1eb472b4", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -12056,263 +12786,267 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272328109", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272328109", - "createdAt": "2026-06-27T00:11:02Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287509502", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287509502", + "createdAt": "2026-06-27T11:14:20.626757+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 97.28000313043594, - "p90": 104.06400263309479, - "p95": 106.72000050544739, - "p99": 117.34399944543839 + "p50": 54.655998945236206, + "p90": 56.60799890756607, + "p95": 57.40800127387047, + "p99": 63.80800157785416 }, "combine": { - "p50": 78.87999713420868, - "p90": 81.82399719953537, - "p95": 83.29600095748901, - "p99": 88.99199962615967 + "p50": 49.92000013589859, + "p90": 51.16799846291542, + "p95": 52.12799832224846, + "p99": 59.10399928689003 }, "roundtrip": { - "p50": 147.5840061903, - "p90": 155.5519998073578, - "p95": 157.98400342464447, - "p99": 164.0319973230362 + "p50": 107.07200318574905, + "p90": 109.6000000834465, + "p95": 111.84000223875046, + "p99": 129.56799566745758 }, "isolatedSum": { - "p50": 176.16000026464462, - "p90": 185.88799983263016, - "p95": 190.0160014629364, - "p99": 206.33599907159805 + "p50": 104.5759990811348, + "p90": 107.77599737048149, + "p95": 109.53599959611893, + "p99": 122.91200086474419 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 4, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 102.81600058078766, - "p90": 106.78400099277496, - "p95": 110.46399921178818, - "p99": 137.40800321102142 + "p50": 55.135998874902725, + "p90": 57.24800005555153, + "p95": 58.720000088214874, + "p99": 64.80000168085098 }, "combine": { - "p50": 87.13600039482117, - "p90": 87.93599903583527, - "p95": 88.79999816417694, - "p99": 95.48799693584442 + "p50": 50.75199902057648, + "p90": 52.480001002550125, + "p95": 52.83199995756149, + "p99": 63.90400230884552 }, "roundtrip": { - "p50": 158.6879938840866, - "p90": 165.69599509239197, - "p95": 167.64800250530243, - "p99": 171.80800437927246 + "p50": 108.83200168609619, + "p90": 112.12799698114395, + "p95": 115.26399850845337, + "p99": 229.40799593925476 }, "isolatedSum": { - "p50": 189.95200097560883, - "p90": 194.72000002861023, - "p95": 199.26399737596512, - "p99": 232.89600014686584 + "p50": 105.8879978954792, + "p90": 109.72800105810165, + "p95": 111.55200004577637, + "p99": 128.7040039896965 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 4, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 139.615997672081, - "p90": 146.464005112648, - "p95": 149.47199821472168, - "p99": 156.6080003976822 + "p50": 56.671999394893646, + "p90": 76.38400048017502, + "p95": 85.69599688053131, + "p99": 199.10399615764618 }, "combine": { - "p50": 119.87199634313583, - "p90": 121.31199985742569, - "p95": 127.58399546146393, - "p99": 128.92800569534302 + "p50": 53.15199866890907, + "p90": 57.11999908089638, + "p95": 58.62399935722351, + "p99": 63.07200342416763 }, "roundtrip": { - "p50": 225.92000663280487, - "p90": 230.27199506759644, - "p95": 232.06399381160736, - "p99": 238.0480021238327 + "p50": 111.42399907112122, + "p90": 114.33599889278412, + "p95": 116.48000031709671, + "p99": 125.5359947681427 }, "isolatedSum": { - "p50": 259.4879940152168, - "p90": 267.7760049700737, - "p95": 277.0559936761856, - "p99": 285.5360060930252 + "p50": 109.82399806380272, + "p90": 133.5039995610714, + "p95": 144.31999623775482, + "p99": 262.1759995818138 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-62470199", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", - "colorKey": "h100_b681a3a4", - "comparisonKey": "03a9af950bebf5a9", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:00.195927+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups@s1", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s1", - "routingStep": 1, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272331593", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272331593", - "createdAt": "2026-06-27T00:11:09Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 95.20000219345093, - "p90": 101.24800354242325, - "p95": 103.42399775981903, - "p99": 115.84000289440155 + "p50": 57.760000228881836, + "p90": 59.93599817156792, + "p95": 61.184000223875046, + "p99": 67.26399809122086 }, "combine": { - "p50": 79.29600030183792, - "p90": 80.92799782752991, - "p95": 81.79199695587158, - "p99": 88.03199976682663 + "p50": 54.91200089454651, + "p90": 56.96000158786774, + "p95": 57.28000029921532, + "p99": 63.391998410224915 }, "roundtrip": { - "p50": 148.03199470043182, - "p90": 153.24799716472626, - "p95": 156.41599893569946, - "p99": 176.06399953365326 + "p50": 114.78400230407715, + "p90": 116.99199676513672, + "p95": 118.43200027942657, + "p99": 134.94400680065155 }, "isolatedSum": { - "p50": 174.49600249528885, - "p90": 182.17600136995316, - "p95": 185.2159947156906, - "p99": 203.87200266122818 + "p50": 112.67200112342834, + "p90": 116.89599975943565, + "p95": 118.46400052309036, + "p99": 130.65599650144577 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 57.8560009598732, + "p90": 59.93599817156792, + "p95": 61.216000467538834, + "p99": 79.80799674987793 + }, + "combine": { + "p50": 54.91200089454651, + "p90": 56.73599988222122, + "p95": 57.023998349905014, + "p99": 60.06399914622307 + }, + "roundtrip": { + "p50": 115.13599753379822, + "p90": 117.34399944543839, + "p95": 118.6240017414093, + "p99": 126.08000636100769 + }, + "isolatedSum": { + "p50": 112.76800185441971, + "p90": 116.67199805378914, + "p95": 118.23999881744385, + "p99": 139.871995896101 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 99.5199978351593, - "p90": 107.39199817180634, - "p95": 118.72000247240067, - "p99": 229.95199263095856 + "p50": 61.08799949288368, + "p90": 63.26399743556976, + "p95": 64.4799992442131, + "p99": 74.43200051784515 }, "combine": { - "p50": 87.52000331878662, - "p90": 89.34400230646133, - "p95": 92.3520028591156, - "p99": 96.44799679517746 + "p50": 58.27200040221214, + "p90": 60.28800085186958, + "p95": 60.92799827456474, + "p99": 65.0240033864975 }, "roundtrip": { - "p50": 155.5519998073578, - "p90": 160.70400178432465, - "p95": 164.76799547672272, - "p99": 175.07199943065643 + "p50": 122.6240023970604, + "p90": 125.56800246238708, + "p95": 127.26399302482605, + "p99": 150.36800503730774 }, "isolatedSum": { - "p50": 187.04000115394592, - "p90": 196.73600047826767, - "p95": 211.07200533151627, - "p99": 326.399989426136 + "p50": 119.35999989509583, + "p90": 123.55199828743935, + "p95": 125.40799751877785, + "p99": 139.45600390434265 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 5, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 67.80800223350525, + "p90": 70.62400132417679, + "p95": 71.3919997215271, + "p99": 76.99199765920639 + }, + "combine": { + "p50": 71.29599899053574, + "p90": 73.40800017118454, + "p95": 74.23999905586243, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 146.91199362277985, + "p90": 150.176003575325, + "p95": 151.90400183200836, + "p99": 180.51199615001678 + }, + "isolatedSum": { + "p50": 139.10400122404099, + "p90": 144.03200149536133, + "p95": 145.63199877738953, + "p99": 154.14399653673172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12321,35 +13055,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 133.82400572299957, - "p90": 141.08799397945404, - "p95": 142.62400567531586, - "p99": 146.40000462532043 + "p50": 77.11999863386154, + "p90": 79.52000200748444, + "p95": 80.70400357246399, + "p99": 101.1200025677681 }, "combine": { - "p50": 120.28799951076508, - "p90": 122.56000190973282, - "p95": 127.10399925708771, - "p99": 136.00000739097595 + "p50": 87.74399757385254, + "p90": 89.82399851083755, + "p95": 90.91199934482574, + "p99": 96.12800180912018 }, "roundtrip": { - "p50": 221.88800573349, - "p90": 225.79200565814972, - "p95": 227.26400196552277, - "p99": 233.024001121521 + "p50": 178.8800060749054, + "p90": 181.5679967403412, + "p95": 182.8799992799759, + "p99": 190.68799912929535 }, "isolatedSum": { - "p50": 254.11200523376465, - "p90": 263.64799588918686, - "p95": 269.72800493240356, - "p99": 282.4000120162964 + "p50": 164.86399620771408, + "p90": 169.344000518322, + "p95": 171.61600291728973, + "p99": 197.24800437688828 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 4, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12357,46 +13091,47 @@ ] }, { - "id": "cx-62dda1f3", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", - "colorKey": "h100_b981a85d", - "comparisonKey": "03a9af950bebf5a9", + "id": "cx-3b501b50", + "identity": "b300|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_c4c63f07", + "comparisonKey": "b3fe3e767199861f", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:08.462042+00:00", + "generatedAt": "2026-06-27T09:51:42.086775+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_09", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups@s2", + "label": "B300 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s2", - "routingStep": 2, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -12409,54 +13144,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3cd13eac5b27759", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272335347", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272335347", - "createdAt": "2026-06-27T00:11:16Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285693587", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285693587", + "createdAt": "2026-06-27T09:51:42.086775+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.89600282907486, + "p90": 116.22399836778641, + "p95": 118.40000003576279, + "p99": 145.11999487876892 + }, + "combine": { + "p50": 50.71999877691269, + "p90": 52.57600173354149, + "p95": 53.119998425245285, + "p99": 63.07200342416763 + }, + "roundtrip": { + "p50": 155.45600652694702, + "p90": 158.62399339675903, + "p95": 161.05599701404572, + "p99": 178.27199399471283 + }, + "isolatedSum": { + "p50": 163.61600160598755, + "p90": 168.8000001013279, + "p95": 171.51999846100807, + "p99": 208.19199830293655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.82400244474411, + "p90": 117.18399822711945, + "p95": 119.80800330638885, + "p99": 142.62400567531586 + }, + "combine": { + "p50": 52.000001072883606, + "p90": 53.0879981815815, + "p95": 54.11199852824211, + "p99": 62.3680017888546 + }, + "roundtrip": { + "p50": 156.8640023469925, + "p90": 161.85599565505981, + "p95": 168.44800114631653, + "p99": 217.50399470329285 + }, + "isolatedSum": { + "p50": 165.82400351762772, + "p90": 170.27199640870094, + "p95": 173.92000183463097, + "p99": 204.99200746417046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.13599753379822, + "p90": 118.75200271606445, + "p95": 120.86399644613266, + "p99": 133.2480013370514 + }, + "combine": { + "p50": 54.75199967622757, + "p90": 56.703999638557434, + "p95": 56.992001831531525, + "p99": 60.127999633550644 + }, + "roundtrip": { + "p50": 161.21600568294525, + "p90": 165.0879979133606, + "p95": 167.00799763202667, + "p99": 186.91200017929077 + }, + "isolatedSum": { + "p50": 169.8879972100258, + "p90": 175.4560023546219, + "p95": 177.85599827766418, + "p99": 193.37600097060204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 91.96799993515015, - "p90": 101.85600072145462, - "p95": 102.88000106811523, - "p99": 111.00800335407257 + "p50": 116.31999909877777, + "p90": 118.9119964838028, + "p95": 120.70400267839432, + "p99": 126.49600207805634 }, "combine": { - "p50": 76.60800218582153, - "p90": 81.60000294446945, - "p95": 82.17599987983704, - "p99": 85.21600067615509 + "p50": 56.2559999525547, + "p90": 57.88800120353699, + "p95": 58.559998869895935, + "p99": 72.7040022611618 }, "roundtrip": { - "p50": 146.7839926481247, - "p90": 152.6080071926117, - "p95": 154.27200496196747, - "p99": 160.99199652671814 + "p50": 163.00800442695618, + "p90": 166.20799899101257, + "p95": 167.93599724769592, + "p99": 180.86400628089905 }, "isolatedSum": { - "p50": 168.57600212097168, - "p90": 183.45600366592407, - "p95": 185.05600094795227, - "p99": 196.22400403022766 + "p50": 172.57599905133247, + "p90": 176.79999768733978, + "p95": 179.26400154829025, + "p99": 199.20000433921814 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 0, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 116.70400202274323, + "p90": 119.58400160074234, + "p95": 121.76000326871872, + "p99": 145.88800072669983 + }, + "combine": { + "p50": 56.832000613212585, + "p90": 58.62399935722351, + "p95": 59.007998555898666, + "p99": 66.880002617836 + }, + "roundtrip": { + "p50": 165.72800278663635, + "p90": 169.21600699424744, + "p95": 170.8800047636032, + "p99": 185.72799861431122 + }, + "isolatedSum": { + "p50": 173.5360026359558, + "p90": 178.20800095796585, + "p95": 180.7680018246174, + "p99": 212.76800334453583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12465,35 +13348,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 98.88000041246414, - "p90": 104.06400263309479, - "p95": 106.30399733781815, - "p99": 139.42399621009827 + "p50": 119.52000111341476, + "p90": 122.72000312805176, + "p95": 124.60800260305405, + "p99": 160.5439931154251 }, "combine": { - "p50": 84.60800349712372, - "p90": 86.30400151014328, - "p95": 86.81599795818329, - "p99": 92.51199662685394 + "p50": 59.67999994754791, + "p90": 61.63199990987778, + "p95": 62.24000081419945, + "p99": 65.79200178384781 }, "roundtrip": { - "p50": 154.65599298477173, - "p90": 160.64000129699707, - "p95": 162.59199380874634, - "p99": 168.09600591659546 + "p50": 170.68800330162048, + "p90": 173.98400604724884, + "p95": 175.64800381660461, + "p99": 187.6160055398941 }, "isolatedSum": { - "p50": 183.48800390958786, - "p90": 190.36800414323807, - "p95": 193.11999529600143, - "p99": 231.9359928369522 + "p50": 179.20000106096268, + "p90": 184.35200303792953, + "p95": 186.8480034172535, + "p99": 226.33599489927292 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 0, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 130.68799674510956, + "p90": 134.97599959373474, + "p95": 138.5280042886734, + "p99": 150.84800124168396 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 74.5600014925003, + "p95": 75.23199915885925, + "p99": 79.19999957084656 + }, + "roundtrip": { + "p50": 195.10400295257568, + "p90": 199.42399859428406, + "p95": 202.04800367355347, + "p99": 217.95199811458588 + }, + "isolatedSum": { + "p50": 203.19999754428864, + "p90": 209.53600108623505, + "p95": 213.76000344753265, + "p99": 230.04800081253052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12502,35 +13422,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 141.50400459766388, - "p90": 146.68799936771393, - "p95": 147.67999947071075, - "p99": 152.41600573062897 + "p50": 145.53600549697876, + "p90": 148.70400726795197, + "p95": 150.07999539375305, + "p99": 157.47199952602386 }, "combine": { - "p50": 118.17599833011627, - "p90": 122.56000190973282, - "p95": 123.58400225639343, - "p99": 125.82400441169739 + "p50": 89.6959975361824, + "p90": 92.12800115346909, + "p95": 93.56799721717834, + "p99": 105.12000322341919 }, "roundtrip": { - "p50": 227.13600099086761, - "p90": 231.23200237751007, - "p95": 232.92799293994904, - "p99": 237.05600202083588 + "p50": 228.28799486160278, + "p90": 232.35200345516205, + "p95": 234.23999547958374, + "p99": 251.3279914855957 }, "isolatedSum": { - "p50": 259.68000292778015, - "p90": 269.24800127744675, - "p95": 271.2640017271042, - "p99": 278.24001014232635 + "p50": 235.23200303316116, + "p90": 240.83200842142105, + "p95": 243.6479926109314, + "p99": 262.59200274944305 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 4, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12538,46 +13458,47 @@ ] }, { - "id": "cx-f337d9a1", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", - "colorKey": "h100_b881a6ca", - "comparisonKey": "03a9af950bebf5a9", + "id": "cx-59d44b57", + "identity": "b300|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_c4c63f07", + "comparisonKey": "16e666f429329305", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:29.724404+00:00", + "generatedAt": "2026-06-27T09:52:10.269764+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups@s3", + "label": "B300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s3", - "routingStep": 3, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -12590,272 +13511,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272338723", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272338723", - "createdAt": "2026-06-27T00:11:23Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285705053", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285705053", + "createdAt": "2026-06-27T09:52:10.269764+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 94.84799951314926, - "p90": 121.37600034475327, - "p95": 148.8959938287735, - "p99": 189.56799805164337 + "p50": 111.7120012640953, + "p90": 115.52000045776367, + "p95": 118.17599833011627, + "p99": 129.08799946308136 }, "combine": { - "p50": 79.58400249481201, - "p90": 96.6079980134964, - "p95": 113.0559965968132, - "p99": 123.77600371837616 + "p50": 54.62399870157242, + "p90": 55.93600124120712, + "p95": 56.89600110054016, + "p99": 59.74400043487549 }, "roundtrip": { - "p50": 148.44800531864166, - "p90": 183.20000171661377, - "p95": 218.78400444984436, - "p99": 249.79199469089508 + "p50": 156.0640037059784, + "p90": 160.35200655460358, + "p95": 163.13600540161133, + "p99": 179.967999458313 }, "isolatedSum": { - "p50": 174.43200200796127, - "p90": 217.98399835824966, - "p95": 261.9519904255867, - "p99": 313.34400177001953 + "p50": 166.33599996566772, + "p90": 171.4560016989708, + "p95": 175.07199943065643, + "p99": 188.83199989795685 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 5, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 100.41599720716476, - "p90": 127.55200266838074, - "p95": 156.5759927034378, - "p99": 182.81599879264832 + "p50": 112.15999722480774, + "p90": 115.7120019197464, + "p95": 117.60000139474869, + "p99": 126.24000012874603 }, "combine": { - "p50": 87.8399983048439, - "p90": 103.93600165843964, - "p95": 120.38400024175644, - "p99": 128.89599800109863 + "p50": 55.64799904823303, + "p90": 57.24800005555153, + "p95": 58.079998940229416, + "p99": 65.11999666690826 }, "roundtrip": { - "p50": 156.99200332164764, - "p90": 193.7599927186966, - "p95": 223.7119972705841, - "p99": 247.23200500011444 + "p50": 158.720001578331, + "p90": 163.07200491428375, + "p95": 167.26399958133698, + "p99": 184.28799510002136 }, "isolatedSum": { - "p50": 188.25599551200867, - "p90": 231.48800432682037, - "p95": 276.95999294519424, - "p99": 311.71199679374695 + "p50": 167.80799627304077, + "p90": 172.96000197529793, + "p95": 175.6800003349781, + "p99": 191.3599967956543 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 5, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 134.2719942331314, - "p90": 147.39200472831726, - "p95": 173.567995429039, - "p99": 188.1919950246811 + "p50": 114.43199962377548, + "p90": 120.35199999809265, + "p95": 123.74400347471237, + "p99": 146.68799936771393 }, "combine": { - "p50": 120.44800072908401, - "p90": 138.62399756908417, - "p95": 152.38399803638458, - "p99": 160.96000373363495 + "p50": 60.127999633550644, + "p90": 61.85600161552429, + "p95": 63.07200342416763, + "p99": 65.43999910354614 }, "roundtrip": { - "p50": 222.6880043745041, - "p90": 247.80799448490143, - "p95": 264.6079957485199, - "p99": 279.35999631881714 + "p50": 164.99200463294983, + "p90": 170.1119989156723, + "p95": 173.5360026359558, + "p99": 206.7520022392273 }, "isolatedSum": { - "p50": 254.71999496221542, - "p90": 286.0160022974014, - "p95": 325.9519934654236, - "p99": 349.15199875831604 + "p50": 174.55999925732613, + "p90": 182.20800161361694, + "p95": 186.81600689888, + "p99": 212.12799847126007 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 5, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-cf5bc26b", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", - "colorKey": "h100_16047c28", - "comparisonKey": "64192d9d479bdd44", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:33.118563+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2279937619f3971", - "workloadId": "set:4:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271788376", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271788376", - "createdAt": "2026-06-26T23:53:36Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 94.68799829483032, - "p90": 101.1200025677681, - "p95": 104.41599786281586, - "p99": 111.10399663448334 + "p50": 114.59200084209442, + "p90": 117.72800236940384, + "p95": 119.4240003824234, + "p99": 133.5040032863617 }, "combine": { - "p50": 80.99199831485748, - "p90": 86.84799820184708, - "p95": 87.8399983048439, - "p99": 89.9519994854927 + "p50": 59.58399921655655, + "p90": 61.40799820423126, + "p95": 62.111999839544296, + "p99": 65.18399715423584 }, "roundtrip": { - "p50": 150.30400454998016, - "p90": 156.95999562740326, - "p95": 159.67999398708344, - "p99": 164.15999829769135 + "p50": 166.17600619792938, + "p90": 170.33599317073822, + "p95": 173.21600019931793, + "p99": 191.48799777030945 }, "isolatedSum": { - "p50": 175.6799966096878, - "p90": 187.96800076961517, - "p95": 192.25599616765976, - "p99": 201.05599611997604 + "p50": 174.17600005865097, + "p90": 179.1360005736351, + "p95": 181.5360002219677, + "p99": 198.68800044059753 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 95.0080007314682, - "p90": 100.00000149011612, - "p95": 102.68799960613251, - "p99": 108.57599973678589 + "p50": 115.55200070142746, + "p90": 119.84000355005264, + "p95": 121.79200351238251, + "p99": 141.50400459766388 }, "combine": { - "p50": 81.727996468544, - "p90": 88.51200342178345, - "p95": 89.37600255012512, - "p99": 90.59199690818787 + "p50": 61.28000095486641, + "p90": 63.231997191905975, + "p95": 63.840001821517944, + "p99": 69.88800317049026 }, "roundtrip": { - "p50": 150.65599977970123, - "p90": 159.58400070667267, - "p95": 161.50400042533875, - "p99": 167.42399334907532 + "p50": 167.1999990940094, + "p90": 171.55200242996216, + "p95": 174.43199455738068, + "p99": 185.12000143527985 }, "isolatedSum": { - "p50": 176.7359972000122, - "p90": 188.51200491189957, - "p95": 192.06400215625763, - "p99": 199.16799664497375 + "p50": 176.83200165629387, + "p90": 183.07200074195862, + "p95": 185.63200533390045, + "p99": 211.39200776815414 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12864,35 +13715,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 104.63999956846237, - "p90": 112.28799819946289, - "p95": 114.14399743080139, - "p99": 119.84000355005264 + "p50": 120.51200121641159, + "p90": 123.6800029873848, + "p95": 125.59999525547028, + "p99": 131.32800161838531 }, "combine": { - "p50": 92.25600212812424, - "p90": 97.69599884748459, - "p95": 98.39999675750732, - "p99": 104.47999835014343 + "p50": 64.96000289916992, + "p90": 66.94400310516357, + "p95": 67.29599833488464, + "p99": 75.39200037717819 }, "roundtrip": { - "p50": 164.000004529953, - "p90": 171.64799571037292, - "p95": 175.4560023546219, - "p99": 228.4799963235855 + "p50": 175.20000040531158, + "p90": 179.32799458503723, + "p95": 183.77600610256195, + "p99": 198.62399995326996 }, "isolatedSum": { - "p50": 196.8960016965866, - "p90": 209.98399704694748, - "p95": 212.54399418830872, - "p99": 224.32000190019608 + "p50": 185.4720041155815, + "p90": 190.62400609254837, + "p95": 192.89599359035492, + "p99": 206.7200019955635 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 7, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 131.71200454235077, + "p90": 136.60800457000732, + "p95": 139.13600146770477, + "p99": 152.96000242233276 + }, + "combine": { + "p50": 77.85599678754807, + "p90": 79.99999821186066, + "p95": 80.64000308513641, + "p99": 85.02399921417236 + }, + "roundtrip": { + "p50": 201.664000749588, + "p90": 206.4639925956726, + "p95": 208.19200575351715, + "p99": 221.98399901390076 + }, + "isolatedSum": { + "p50": 209.56800132989883, + "p90": 216.60800278186798, + "p95": 219.7760045528412, + "p99": 237.98400163650513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12901,35 +13789,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 143.93599331378937, - "p90": 148.00000190734863, - "p95": 149.79200065135956, - "p99": 155.68000078201294 + "p50": 151.7760008573532, + "p90": 157.05600380897522, + "p95": 159.7760021686554, + "p99": 171.58399522304535 }, "combine": { - "p50": 132.06399977207184, - "p90": 138.75199854373932, - "p95": 139.29599523544312, - "p99": 145.6959992647171 + "p50": 98.39999675750732, + "p90": 104.38399761915207, + "p95": 108.51199924945831, + "p99": 120.38400024175644 }, "roundtrip": { - "p50": 241.2479966878891, - "p90": 247.6480007171631, - "p95": 249.15200471878052, - "p99": 252.76800990104675 + "p50": 242.0479953289032, + "p90": 246.59200012683868, + "p95": 248.51199984550476, + "p99": 264.384001493454 }, "isolatedSum": { - "p50": 275.9999930858612, - "p90": 286.75200045108795, - "p95": 289.0879958868027, - "p99": 301.37600004673004 + "p50": 250.17599761486053, + "p90": 261.4400014281273, + "p95": 268.2880014181137, + "p99": 291.9679954648018 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12937,16 +13825,16 @@ ] }, { - "id": "cx-4d49fd79", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", - "colorKey": "h100_16047c28", - "comparisonKey": "64192d9d479bdd44", + "id": "cx-a0445944", + "identity": "b300|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "e7da15664ffcf0f8", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:13.030328+00:00", + "generatedAt": "2026-06-27T11:13:45.257215+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", + "runner": "b300-nv_05", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -12954,29 +13842,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", + "label": "B300 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -12989,8 +13878,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -12998,45 +13887,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271931349", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271931349", - "createdAt": "2026-06-26T23:58:18Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287498289", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287498289", + "createdAt": "2026-06-27T11:13:45.257215+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 95.93600034713745, - "p90": 101.56799852848053, - "p95": 103.13600301742554, - "p99": 107.744000852108 + "p50": 55.03999814391136, + "p90": 57.312000542879105, + "p95": 58.78400057554245, + "p99": 68.12799721956253 }, "combine": { - "p50": 80.89599758386612, - "p90": 87.07199990749359, - "p95": 87.8399983048439, - "p99": 89.40800279378891 + "p50": 56.48000165820122, + "p90": 57.920001447200775, + "p95": 58.720000088214874, + "p99": 66.52799993753433 }, "roundtrip": { - "p50": 151.42400562763214, - "p90": 160.12799739837646, - "p95": 172.86400496959686, - "p99": 232.12799429893494 + "p50": 114.656001329422, + "p90": 116.99199676513672, + "p95": 118.9119964838028, + "p99": 136.19199395179749 }, "isolatedSum": { - "p50": 176.83199793100357, - "p90": 188.63999843597412, - "p95": 190.97600132226944, - "p99": 197.1520036458969 + "p50": 111.51999980211258, + "p90": 115.23200199007988, + "p95": 117.50400066375732, + "p99": 134.65599715709686 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 0, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13045,35 +13934,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 96.47999703884125, - "p90": 103.42399775981903, - "p95": 107.71200060844421, - "p99": 161.40800714492798 + "p50": 55.96800148487091, + "p90": 58.079998940229416, + "p95": 59.23200026154518, + "p99": 67.07199662923813 }, "combine": { - "p50": 81.11999928951263, - "p90": 87.61599659919739, - "p95": 89.1840010881424, - "p99": 185.5359971523285 + "p50": 58.720000088214874, + "p90": 60.06399914622307, + "p95": 60.70400029420853, + "p99": 68.35199892520905 }, "roundtrip": { - "p50": 153.43999862670898, - "p90": 159.4880074262619, - "p95": 163.71199488639832, - "p99": 313.1200075149536 + "p50": 117.88800358772278, + "p90": 120.19199877977371, + "p95": 122.3360002040863, + "p99": 133.760005235672 }, "isolatedSum": { - "p50": 177.59999632835388, - "p90": 191.03999435901642, - "p95": 196.8960016965866, - "p99": 346.94400429725647 + "p50": 114.68800157308578, + "p90": 118.14399808645248, + "p95": 119.93600055575371, + "p99": 135.42399555444717 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 0, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13082,35 +13971,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 95.32800316810608, - "p90": 100.3199964761734, - "p95": 102.1760031580925, - "p99": 106.84800148010254 + "p50": 57.98399820923805, + "p90": 60.15999987721443, + "p95": 61.40799820423126, + "p99": 68.44799965620041 }, "combine": { - "p50": 80.32000064849854, - "p90": 84.22400057315826, - "p95": 88.41600269079208, - "p99": 90.14400094747543 + "p50": 60.896001756191254, + "p90": 62.94400244951248, + "p95": 63.4239986538887, + "p99": 69.023996591568 }, "roundtrip": { - "p50": 150.94399452209473, - "p90": 158.4639996290207, - "p95": 159.90400314331055, - "p99": 163.32800686359406 + "p50": 121.47200107574463, + "p90": 123.87199699878693, + "p95": 125.05599856376648, + "p99": 135.48800349235535 }, "isolatedSum": { - "p50": 175.64800381660461, - "p90": 184.54399704933167, - "p95": 190.59200584888458, - "p99": 196.99200242757797 + "p50": 118.8799999654293, + "p90": 123.10400232672691, + "p95": 124.83199685811996, + "p99": 137.4719962477684 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 5, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13119,35 +14008,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 94.87999975681305, - "p90": 98.43199700117111, - "p95": 100.3199964761734, - "p99": 105.3759977221489 + "p50": 59.039998799562454, + "p90": 61.08799949288368, + "p95": 62.3680017888546, + "p99": 72.89600372314453 }, "combine": { - "p50": 80.54400235414505, - "p90": 87.20000088214874, - "p95": 88.73599767684937, - "p99": 89.82399851083755 + "p50": 62.94400244951248, + "p90": 64.41599875688553, + "p95": 65.05600363016129, + "p99": 69.15199756622314 }, "roundtrip": { - "p50": 152.0960032939911, - "p90": 158.65600109100342, - "p95": 160.16000509262085, - "p99": 166.97600483894348 + "p50": 125.08800625801086, + "p90": 127.13600695133209, + "p95": 130.23999333381653, + "p99": 145.9520012140274 }, "isolatedSum": { - "p50": 175.4240021109581, - "p90": 185.63199788331985, - "p95": 189.05599415302277, - "p99": 195.19999623298645 + "p50": 121.98400124907494, + "p90": 125.50399824976921, + "p95": 127.42400541901588, + "p99": 142.04800128936768 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 5, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13156,35 +14045,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 95.93600034713745, - "p90": 103.20000350475311, - "p95": 106.20799660682678, - "p99": 168.57600212097168 + "p50": 59.776000678539276, + "p90": 61.91999837756157, + "p95": 63.00800293684006, + "p99": 73.27999919652939 }, "combine": { - "p50": 84.3840017914772, - "p90": 89.40800279378891, - "p95": 89.75999802350998, - "p99": 94.84799951314926 + "p50": 63.551999628543854, + "p90": 65.50399959087372, + "p95": 66.97600334882736, + "p99": 72.03199714422226 }, "roundtrip": { - "p50": 154.84799444675446, - "p90": 161.02400422096252, - "p95": 163.7440025806427, - "p99": 497.50399589538574 + "p50": 126.39999389648438, + "p90": 128.86400520801544, + "p95": 130.3360015153885, + "p99": 143.74400675296783 }, "isolatedSum": { - "p50": 180.32000213861465, - "p90": 192.60800629854202, - "p95": 195.96799463033676, - "p99": 263.42400163412094 + "p50": 123.32800030708313, + "p90": 127.42399796843529, + "p95": 129.98400628566742, + "p99": 145.31199634075165 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 5, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13193,35 +14082,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 103.16800326108932, - "p90": 109.37599837779999, - "p95": 110.75200140476227, - "p99": 113.43999952077866 + "p50": 63.1679967045784, + "p90": 65.8240020275116, + "p95": 67.03999638557434, + "p99": 75.48800110816956 }, "combine": { - "p50": 88.79999816417694, - "p90": 95.74399888515472, - "p95": 97.120001912117, - "p99": 97.95200079679489 + "p50": 68.41599941253662, + "p90": 70.81600278615952, + "p95": 71.52000069618225, + "p99": 95.04000097513199 }, "roundtrip": { - "p50": 161.6639941930771, - "p90": 167.1999990940094, - "p95": 168.73599588871002, - "p99": 172.89599776268005 + "p50": 135.96799969673157, + "p90": 138.59200477600098, + "p95": 140.25600254535675, + "p99": 151.32799744606018 }, "isolatedSum": { - "p50": 191.96800142526627, - "p90": 205.1199972629547, - "p95": 207.87200331687927, - "p99": 211.39200031757355 + "p50": 131.58399611711502, + "p90": 136.6400048136711, + "p95": 138.5599970817566, + "p99": 170.52800208330154 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 5, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13230,35 +14119,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 119.6800023317337, - "p90": 128.00000607967377, - "p95": 129.05600666999817, - "p99": 133.91999900341034 + "p50": 70.52800059318542, + "p90": 73.31199944019318, + "p95": 75.3600001335144, + "p99": 84.83199775218964 }, "combine": { - "p50": 103.16800326108932, - "p90": 106.55999928712845, - "p95": 107.90400207042694, - "p99": 113.63200098276138 + "p50": 82.30400085449219, + "p90": 84.19200032949448, + "p95": 85.28000116348267, + "p99": 99.61599856615067 }, "roundtrip": { - "p50": 186.71999871730804, - "p90": 194.65599954128265, - "p95": 196.31999731063843, - "p99": 199.48799908161163 + "p50": 163.35999965667725, + "p90": 165.8560037612915, + "p95": 167.71200299263, + "p99": 189.11999464035034 }, "isolatedSum": { - "p50": 222.84800559282303, - "p90": 234.56000536680222, - "p95": 236.9600087404251, - "p99": 247.55199998617172 + "p50": 152.8320014476776, + "p90": 157.50399976968765, + "p95": 160.64000129699707, + "p99": 184.4479963183403 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 5, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13267,35 +14156,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 137.66400516033173, - "p90": 146.7200070619583, - "p95": 147.8080004453659, - "p99": 151.10400319099426 - }, + "p50": 81.40800148248672, + "p90": 84.09599959850311, + "p95": 85.66399663686752, + "p99": 95.29600292444229 + }, "combine": { - "p50": 131.1360001564026, - "p90": 137.82399892807007, - "p95": 138.46400380134583, - "p99": 145.28000354766846 + "p50": 102.14400291442871, + "p90": 104.3199971318245, + "p95": 105.72800040245056, + "p99": 114.72000181674957 }, "roundtrip": { - "p50": 241.40800535678864, - "p90": 248.60799312591553, - "p95": 250.59199333190918, - "p99": 258.5600018501282 + "p50": 205.9839963912964, + "p90": 208.99200439453125, + "p95": 210.4959934949875, + "p99": 222.04799950122833 }, "isolatedSum": { - "p50": 268.8000053167343, - "p90": 284.5440059900284, - "p95": 286.27200424671173, - "p99": 296.3840067386627 + "p50": 183.55200439691544, + "p90": 188.4159967303276, + "p95": 191.39199703931808, + "p99": 210.01600474119186 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 0, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13303,46 +14192,47 @@ ] }, { - "id": "cx-38b8b0c2", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", - "colorKey": "h100_0c515f8b", - "comparisonKey": "47e8e48c891afabb", + "id": "cx-429a4a40", + "identity": "b300|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_c4c63f07", + "comparisonKey": "fe452cc5767ffbdd", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:43.774495+00:00", + "generatedAt": "2026-06-27T09:52:37.801228+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", + "runner": "b300-nv_16", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "label": "B300 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -13355,8 +14245,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d02a66236b524b8", - "workloadId": "set:4:2eebbed158fe1320", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -13364,45 +14254,119 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271795429", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271795429", - "createdAt": "2026-06-26T23:53:50Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285716223", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285716223", + "createdAt": "2026-06-27T09:52:37.801228+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 96.03200107812881, - "p90": 102.49599814414978, - "p95": 105.66399991512299, - "p99": 117.88800358772278 + "p50": 114.84800279140472, + "p90": 118.6240017414093, + "p95": 120.51200121641159, + "p99": 149.3760049343109 }, "combine": { - "p50": 71.45600020885468, - "p90": 73.98399710655212, - "p95": 77.18399912118912, - "p99": 81.56800270080566 + "p50": 58.49599838256836, + "p90": 60.22400036454201, + "p95": 60.95999851822853, + "p99": 64.64000046253204 }, "roundtrip": { - "p50": 142.04800128936768, - "p90": 149.98400211334229, - "p95": 151.45599842071533, - "p99": 159.07199680805206 + "p50": 165.0879979133606, + "p90": 168.2880073785782, + "p95": 170.30400037765503, + "p99": 177.34399437904358 }, "isolatedSum": { - "p50": 167.4880012869835, - "p90": 176.4799952507019, - "p95": 182.8479990363121, - "p99": 199.45600628852844 + "p50": 173.34400117397308, + "p90": 178.8480021059513, + "p95": 181.47199973464012, + "p99": 214.01600539684296 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 6, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.167997777462, + "p90": 118.17599833011627, + "p95": 120.06399780511856, + "p99": 135.16800105571747 + }, + "combine": { + "p50": 59.55199897289276, + "p90": 61.15199998021126, + "p95": 62.04799935221672, + "p99": 64.31999802589417 + }, + "roundtrip": { + "p50": 164.57599401474, + "p90": 168.35199296474457, + "p95": 170.46399414539337, + "p99": 185.47199666500092 + }, + "isolatedSum": { + "p50": 174.71999675035477, + "p90": 179.32799831032753, + "p95": 182.11199715733528, + "p99": 199.48799908161163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 118.75200271606445, + "p90": 124.32000041007996, + "p95": 128.80000472068787, + "p99": 145.56799829006195 + }, + "combine": { + "p50": 62.68800050020218, + "p90": 64.41599875688553, + "p95": 65.0240033864975, + "p99": 73.82400333881378 + }, + "roundtrip": { + "p50": 170.6559956073761, + "p90": 174.6560037136078, + "p95": 176.83200538158417, + "p99": 186.88000738620758 + }, + "isolatedSum": { + "p50": 181.44000321626663, + "p90": 188.73599916696548, + "p95": 193.82400810718536, + "p99": 219.39200162887573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13411,35 +14375,72 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 98.9760011434555, - "p90": 106.62399977445602, - "p95": 110.07999628782272, - "p99": 123.00799787044525 + "p50": 118.04799735546112, + "p90": 121.2799996137619, + "p95": 123.61600250005722, + "p99": 135.3600025177002 }, "combine": { - "p50": 71.32799923419952, - "p90": 73.69600236415863, - "p95": 78.52800190448761, - "p99": 80.22399991750717 + "p50": 63.231997191905975, + "p90": 64.99200314283371, + "p95": 65.24799764156342, + "p99": 72.51200079917908 }, "roundtrip": { - "p50": 143.26399564743042, - "p90": 150.14399588108063, - "p95": 153.1520038843155, - "p99": 162.88000345230103 + "p50": 172.54400253295898, + "p90": 176.15999281406403, + "p95": 177.59999632835388, + "p99": 187.51999735832214 }, "isolatedSum": { - "p50": 170.30400037765503, - "p90": 180.32000213861465, - "p95": 188.60799819231033, - "p99": 203.23199778795242 + "p50": 181.2799945473671, + "p90": 186.2720027565956, + "p95": 188.86400014162064, + "p99": 207.87200331687927 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.9119964838028, + "p90": 122.94399738311768, + "p95": 127.42400169372559, + "p99": 143.48800480365753 + }, + "combine": { + "p50": 64.35199826955795, + "p90": 65.79200178384781, + "p95": 66.68800115585327, + "p99": 70.27199864387512 + }, + "roundtrip": { + "p50": 173.40800166130066, + "p90": 176.83200538158417, + "p95": 178.5919964313507, + "p99": 190.7840073108673 + }, + "isolatedSum": { + "p50": 183.26399475336075, + "p90": 188.73599916696548, + "p95": 194.11200284957886, + "p99": 213.76000344753265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13448,35 +14449,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 100.09600222110748, - "p90": 107.61599987745285, - "p95": 112.31999844312668, - "p99": 163.16799819469452 + "p50": 124.9919980764389, + "p90": 128.1599998474121, + "p95": 130.23999333381653, + "p99": 156.5759927034378 }, "combine": { - "p50": 79.71200346946716, - "p90": 87.16800063848495, - "p95": 87.74399757385254, - "p99": 95.8079993724823 + "p50": 68.64000111818314, + "p90": 70.14399766921997, + "p95": 70.78400254249573, + "p99": 75.39200037717819 }, "roundtrip": { - "p50": 154.01600301265717, - "p90": 161.47199273109436, - "p95": 164.5440012216568, - "p99": 176.83200538158417 + "p50": 185.56800484657288, + "p90": 189.18399512767792, + "p95": 191.52000546455383, + "p99": 204.83200252056122 }, "isolatedSum": { - "p50": 179.80800569057465, - "p90": 194.7840005159378, - "p95": 200.06399601697922, - "p99": 258.9759975671768 + "p50": 193.63199919462204, + "p90": 198.30399751663208, + "p95": 201.02399587631226, + "p99": 231.967993080616 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 1, - "recvTokensMax": 32, - "stragglerRank": 6, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 135.42400300502777, + "p90": 139.5840048789978, + "p95": 143.45599710941315, + "p99": 153.6960005760193 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 85.02399921417236, + "p95": 85.85599809885025, + "p99": 96.19200229644775 + }, + "roundtrip": { + "p50": 210.207998752594, + "p90": 215.2319997549057, + "p95": 217.6000028848648, + "p99": 234.72000658512115 + }, + "isolatedSum": { + "p50": 218.1759998202324, + "p90": 224.60800409317017, + "p95": 229.3119952082634, + "p99": 249.88800287246704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13485,35 +14523,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 104.38399761915207, - "p90": 108.44799876213074, - "p95": 111.455999314785, - "p99": 119.74400281906128 + "p50": 157.151997089386, + "p90": 161.02400422096252, + "p95": 163.29599916934967, + "p99": 173.0560064315796 }, "combine": { - "p50": 83.26400071382523, - "p90": 88.03199976682663, - "p95": 88.22400122880936, - "p99": 92.83199906349182 + "p50": 102.1760031580925, + "p90": 104.35199737548828, + "p95": 105.43999820947647, + "p99": 116.06399714946747 }, "roundtrip": { - "p50": 154.9759954214096, - "p90": 161.18399798870087, - "p95": 165.0879979133606, - "p99": 170.01600563526154 + "p50": 253.02401185035706, + "p90": 257.60000944137573, + "p95": 260.8320116996765, + "p99": 278.9439857006073 }, "isolatedSum": { - "p50": 187.6479983329773, - "p90": 196.47999852895737, - "p95": 199.68000054359436, - "p99": 212.5760018825531 + "p50": 259.3280002474785, + "p90": 265.3760015964508, + "p95": 268.73599737882614, + "p99": 289.12000358104706 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 6, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13521,16 +14559,16 @@ ] }, { - "id": "cx-94696c7b", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", - "colorKey": "h100_c0c0ad86", - "comparisonKey": "00faf19eae8c1230", + "id": "cx-c27e2cad", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "ac13ebc2bb2c560a", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:00.906485+00:00", + "generatedAt": "2026-06-27T10:26:01.213105+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -13538,29 +14576,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -13573,53 +14612,53 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f0e66a15078595b", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271935069", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271935069", - "createdAt": "2026-06-26T23:58:25Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28286436120", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120", + "createdAt": "2026-06-27T10:26:01.213105+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 97.63199836015701, - "p90": 106.36799782514572, - "p95": 109.63200032711029, - "p99": 118.65600198507309 + "p50": 56.2559999525547, + "p90": 58.78400057554245, + "p95": 61.28000095486641, + "p99": 77.69600301980972 }, "combine": { - "p50": 71.45600020885468, - "p90": 78.94399762153625, - "p95": 79.42400127649307, - "p99": 82.24000036716461 + "p50": 61.983998864889145, + "p90": 78.8159966468811, + "p95": 86.87999844551086, + "p99": 95.10400146245956 }, "roundtrip": { - "p50": 145.4080045223236, - "p90": 154.23999726772308, - "p95": 155.64799308776855, - "p99": 157.98400342464447 + "p50": 120.44800072908401, + "p90": 123.19999933242798, + "p95": 125.82400441169739, + "p99": 144.03200149536133 }, "isolatedSum": { - "p50": 169.0879985690117, - "p90": 185.31199544668198, - "p95": 189.05600160360336, - "p99": 200.8960023522377 + "p50": 118.23999881744385, + "p90": 137.59999722242355, + "p95": 148.15999940037727, + "p99": 172.8000044822693 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 2, - "recvTokensMax": 3, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -13629,35 +14668,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 77.08799839019775, - "p90": 104.06400263309479, - "p95": 105.66399991512299, - "p99": 111.1999973654747 + "p50": 57.151999324560165, + "p90": 59.039998799562454, + "p95": 59.99999865889549, + "p99": 73.11999797821045 }, "combine": { - "p50": 65.05600363016129, - "p90": 74.5600014925003, - "p95": 79.00799810886383, - "p99": 82.33600109815598 + "p50": 64.54399973154068, + "p90": 66.17599725723267, + "p95": 67.16799736022949, + "p99": 74.23999905586243 }, "roundtrip": { - "p50": 122.8799968957901, - "p90": 151.64799988269806, - "p95": 153.24799716472626, - "p99": 161.50400042533875 + "p50": 124.15999919176102, + "p90": 126.39999389648438, + "p95": 129.60000336170197, + "p99": 138.49599659442902 }, "isolatedSum": { - "p50": 142.14400202035904, - "p90": 178.6240041255951, - "p95": 184.67199802398682, - "p99": 193.53599846363068 + "p50": 121.69599905610085, + "p90": 125.21599605679512, + "p95": 127.16799601912498, + "p99": 147.35999703407288 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 2, - "recvTokensMax": 6, - "stragglerRank": 5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13666,35 +14705,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 97.56799787282944, - "p90": 107.80800133943558, - "p95": 114.04799669981003, - "p99": 120.44800072908401 + "p50": 58.88000130653381, + "p90": 61.37600168585777, + "p95": 63.10400366783142, + "p99": 91.10400080680847 }, "combine": { - "p50": 65.69600105285645, - "p90": 78.87999713420868, - "p95": 79.32800054550171, - "p99": 87.13600039482117 + "p50": 67.35999882221222, + "p90": 69.50400024652481, + "p95": 70.14399766921997, + "p99": 86.30400151014328 }, "roundtrip": { - "p50": 123.99999797344208, - "p90": 158.720001578331, - "p95": 165.3439998626709, - "p99": 176.28799378871918 + "p50": 127.68000364303589, + "p90": 130.14400005340576, + "p95": 131.55199587345123, + "p99": 137.08800077438354 }, "isolatedSum": { - "p50": 163.26399892568588, - "p90": 186.68799847364426, - "p95": 193.37599724531174, - "p99": 207.58400112390518 + "p50": 126.24000012874603, + "p90": 130.88000193238258, + "p95": 133.2480013370514, + "p99": 177.40800231695175 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 2, - "recvTokensMax": 12, - "stragglerRank": 7, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13703,34 +14742,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 98.11200201511383, - "p90": 105.76000064611435, - "p95": 108.64000022411346, - "p99": 122.30399996042252 + "p50": 60.03199890255928, + "p90": 62.30400130152702, + "p95": 63.26399743556976, + "p99": 69.2799985408783 }, "combine": { - "p50": 72.22399860620499, - "p90": 79.1039988398552, - "p95": 80.38400113582611, - "p99": 87.0399996638298 + "p50": 68.76800209283829, + "p90": 70.46400010585785, + "p95": 71.3919997215271, + "p99": 87.74399757385254 }, "roundtrip": { - "p50": 145.28000354766846, - "p90": 152.54400670528412, - "p95": 155.39200603961945, - "p99": 160.38399934768677 + "p50": 130.62399625778198, + "p90": 133.08799266815186, + "p95": 134.94400680065155, + "p99": 141.88799262046814 }, "isolatedSum": { - "p50": 170.33600062131882, - "p90": 184.86399948596954, - "p95": 189.02400135993958, - "p99": 209.34399962425232 + "p50": 128.80000099539757, + "p90": 132.76800140738487, + "p95": 134.65599715709686, + "p99": 157.02399611473083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 2, - "recvTokensMax": 24, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -13740,34 +14779,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 98.52799773216248, - "p90": 109.69600081443787, - "p95": 117.34399944543839, - "p99": 131.45600259304047 + "p50": 61.28000095486641, + "p90": 63.551999628543854, + "p95": 64.89600241184235, + "p99": 75.58400183916092 }, "combine": { - "p50": 78.59200239181519, - "p90": 81.53600245714188, - "p95": 86.91199868917465, - "p99": 88.32000195980072 + "p50": 69.47200000286102, + "p90": 71.45600020885468, + "p95": 72.38399982452393, + "p99": 76.67200267314911 }, "roundtrip": { - "p50": 146.97599411010742, - "p90": 156.47999942302704, - "p95": 161.56800091266632, - "p99": 173.18400740623474 + "p50": 132.9919993877411, + "p90": 135.55200397968292, + "p95": 137.37599551677704, + "p99": 149.63200688362122 }, "isolatedSum": { - "p50": 177.12000012397766, - "p90": 191.23200327157974, - "p95": 204.25599813461304, - "p99": 219.7760045528412 + "p50": 130.75200095772743, + "p90": 135.00799983739853, + "p95": 137.28000223636627, + "p99": 152.25600451231003 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 2, - "recvTokensMax": 48, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -13777,35 +14816,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 84.54400300979614, - "p90": 107.07200318574905, - "p95": 113.40799927711487, - "p99": 126.08000636100769 + "p50": 64.2239972949028, + "p90": 66.39999896287918, + "p95": 67.87200272083282, + "p99": 82.8159973025322 }, "combine": { - "p50": 71.10399752855301, - "p90": 80.57600259780884, - "p95": 87.13600039482117, - "p99": 95.51999717950821 + "p50": 75.39200037717819, + "p90": 77.02399790287018, + "p95": 77.72800326347351, + "p99": 85.82399785518646 }, "roundtrip": { - "p50": 127.93600559234619, - "p90": 151.7760008573532, - "p95": 154.40000593662262, - "p99": 161.56800091266632 + "p50": 145.37599682807922, + "p90": 147.8399932384491, + "p95": 148.83199334144592, + "p99": 160.41600704193115 }, "isolatedSum": { - "p50": 155.64800053834915, - "p90": 187.6480057835579, - "p95": 200.54399967193604, - "p99": 221.6000035405159 + "p50": 139.615997672081, + "p90": 143.42399686574936, + "p95": 145.60000598430634, + "p99": 168.63999515771866 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 2, - "recvTokensMax": 96, - "stragglerRank": 4, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13814,35 +14853,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 100.35199671983719, - "p90": 113.37599903345108, - "p95": 126.49600207805634, - "p99": 162.1759980916977 + "p50": 72.9919970035553, + "p90": 75.6480023264885, + "p95": 76.89599692821503, + "p99": 89.79199826717377 }, "combine": { - "p50": 79.58400249481201, - "p90": 87.16800063848495, - "p95": 87.71199733018875, - "p99": 95.45599669218063 + "p50": 89.24800157546997, + "p90": 91.2960022687912, + "p95": 92.99200028181076, + "p99": 104.76800054311752 }, "roundtrip": { - "p50": 154.62400019168854, - "p90": 165.18400609493256, - "p95": 170.27199268341064, - "p99": 184.7359985113144 + "p50": 173.92000555992126, + "p90": 176.9919991493225, + "p95": 179.1040003299713, + "p99": 198.08000326156616 }, "isolatedSum": { - "p50": 179.9359992146492, - "p90": 200.54399967193604, - "p95": 214.2079994082451, - "p99": 257.6319947838783 + "p50": 162.23999857902527, + "p90": 166.9440045952797, + "p95": 169.8879972100258, + "p99": 194.5599988102913 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 2, - "recvTokensMax": 192, - "stragglerRank": 6, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13851,35 +14890,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 98.1760025024414, - "p90": 120.80000340938568, - "p95": 125.56800246238708, - "p99": 134.49600338935852 + "p50": 83.90399813652039, + "p90": 86.65599673986435, + "p95": 87.96799927949905, + "p99": 94.2080020904541 }, "combine": { - "p50": 87.77599781751633, - "p90": 96.0640013217926, - "p95": 97.69599884748459, - "p99": 107.35999792814255 + "p50": 110.20799726247787, + "p90": 112.92800307273865, + "p95": 113.88800293207169, + "p99": 120.92799693346024 }, "roundtrip": { - "p50": 160.70400178432465, - "p90": 178.3680021762848, - "p95": 184.1920018196106, - "p99": 190.62399864196777 + "p50": 220.19200026988983, + "p90": 223.4240025281906, + "p95": 224.99200701713562, + "p99": 245.08799612522125 }, "isolatedSum": { - "p50": 185.95200031995773, - "p90": 216.86400473117828, - "p95": 223.26400130987167, - "p99": 241.85600131750107 + "p50": 194.11199539899826, + "p90": 199.583999812603, + "p95": 201.85600221157074, + "p99": 215.13599902391434 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 7, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13887,46 +14926,47 @@ ] }, { - "id": "cx-b4d89049", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", - "colorKey": "h100_1c83c0b0", - "comparisonKey": "b84a29c0643a5455", + "id": "cx-669dd02d", + "identity": "b300|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_c4c63f07", + "comparisonKey": "564ae99a5e9997e8", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:11:39.736162+00:00", + "generatedAt": "2026-06-27T09:50:49.099200+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving", + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -13939,8 +14979,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "90042e0db6a8297", - "workloadId": "set:3:8fd05d9ebee41064", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -13948,225 +14988,192 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272315381", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272315381", - "createdAt": "2026-06-27T00:10:35Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285671692", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285671692", + "createdAt": "2026-06-27T09:50:49.099200+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 98.1760025024414, - "p90": 105.12000322341919, - "p95": 107.4879989027977, - "p99": 114.43199962377548 + "p50": 112.35199868679047, + "p90": 115.87200313806534, + "p95": 118.59200149774551, + "p99": 133.215993642807 }, "combine": { - "p50": 81.216000020504, - "p90": 87.8399983048439, - "p95": 88.19200098514557, - "p99": 89.08800035715103 + "p50": 62.33600154519081, + "p90": 64.35199826955795, + "p95": 64.7680014371872, + "p99": 68.4799998998642 }, "roundtrip": { - "p50": 154.4959992170334, - "p90": 160.99199652671814, - "p95": 162.59199380874634, - "p99": 167.35999286174774 + "p50": 164.92800414562225, + "p90": 168.06399822235107, + "p95": 170.27199268341064, + "p99": 182.6239973306656 }, "isolatedSum": { - "p50": 179.3920025229454, - "p90": 192.9600015282631, - "p95": 195.67999988794327, - "p99": 203.5199999809265 + "p50": 174.68800023198128, + "p90": 180.2240014076233, + "p95": 183.3600029349327, + "p99": 201.6959935426712 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 104.3199971318245, - "p90": 109.98400300741196, - "p95": 111.77600175142288, - "p99": 118.81600320339203 + "p50": 112.28799819946289, + "p90": 116.09599739313126, + "p95": 119.6800023317337, + "p99": 136.4479959011078 }, "combine": { - "p50": 89.1840010881424, - "p90": 95.58399766683578, - "p95": 96.09600156545639, - "p99": 97.18400239944458 + "p50": 62.39999830722809, + "p90": 64.15999680757523, + "p95": 64.38399851322174, + "p99": 65.92000275850296 }, "roundtrip": { - "p50": 164.2560064792633, - "p90": 169.69600319862366, - "p95": 171.64799571037292, - "p99": 176.64000391960144 + "p50": 167.26399958133698, + "p90": 169.76000368595123, + "p95": 172.4800020456314, + "p99": 185.92000007629395 }, "isolatedSum": { - "p50": 193.5039982199669, - "p90": 205.56800067424774, - "p95": 207.87200331687927, - "p99": 216.0000056028366 + "p50": 174.68799650669098, + "p90": 180.25599420070648, + "p95": 184.06400084495544, + "p99": 202.36799865961075 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 137.28000223636627, - "p90": 146.11199498176575, - "p95": 149.6639996767044, - "p99": 152.19199657440186 + "p50": 113.92000317573547, + "p90": 117.3119992017746, + "p95": 121.21599912643433, + "p99": 127.68000364303589 }, "combine": { - "p50": 128.48000228405, - "p90": 130.14400005340576, - "p95": 130.65600395202637, - "p99": 136.57599687576294 + "p50": 63.680000603199005, + "p90": 65.40799885988235, + "p95": 65.95200300216675, + "p99": 78.78399640321732 }, "roundtrip": { - "p50": 231.10400140285492, - "p90": 236.4799976348877, - "p95": 238.11200261116028, - "p99": 242.88000166416168 + "p50": 168.35199296474457, + "p90": 172.35200107097626, + "p95": 174.43199455738068, + "p99": 184.4480037689209 }, "isolatedSum": { - "p50": 265.76000452041626, - "p90": 276.2559950351715, - "p95": 280.3200036287308, - "p99": 288.7679934501648 + "p50": 177.60000377893448, + "p90": 182.71999806165695, + "p95": 187.16800212860107, + "p99": 206.4640000462532 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-c41b3617", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", - "colorKey": "h100_52b1e978", - "comparisonKey": "b84a29c0643a5455", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:11:37.049465+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s1", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s1", - "routingStep": 1, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "6288a1aa76c20e7", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272318481", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272318481", - "createdAt": "2026-06-27T00:10:41Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 100.28800368309021, - "p90": 105.0880029797554, - "p95": 107.32799768447876, - "p99": 112.67200112342834 + "p50": 115.167997777462, + "p90": 118.9119964838028, + "p95": 121.50400131940842, + "p99": 135.26399433612823 }, "combine": { - "p50": 80.22399991750717, - "p90": 87.10400015115738, - "p95": 87.42400258779526, - "p99": 89.40800279378891 + "p50": 66.27199798822403, + "p90": 67.4239993095398, + "p95": 68.35199892520905, + "p99": 74.17599856853485 }, "roundtrip": { - "p50": 155.5200070142746, - "p90": 160.70400178432465, - "p95": 162.04799711704254, - "p99": 166.30400717258453 + "p50": 172.0000058412552, + "p90": 174.68799650669098, + "p95": 176.83200538158417, + "p99": 191.6159987449646 }, "isolatedSum": { - "p50": 180.51200360059738, - "p90": 192.19200313091278, - "p95": 194.75200027227402, - "p99": 202.08000391721725 + "p50": 181.43999576568604, + "p90": 186.3359957933426, + "p95": 189.85600024461746, + "p99": 209.4399929046631 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 115.9679964184761, + "p90": 118.97599697113037, + "p95": 120.7360029220581, + "p99": 130.68799674510956 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 68.9919963479042, + "p95": 69.60000097751617, + "p99": 86.75199747085571 + }, + "roundtrip": { + "p50": 174.94399845600128, + "p90": 178.17600071430206, + "p95": 179.77599799633026, + "p99": 183.58400464057922 + }, + "isolatedSum": { + "p50": 183.1039935350418, + "p90": 187.96799331903458, + "p95": 190.33600389957428, + "p99": 217.43999421596527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -14176,34 +15183,71 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 105.24799674749374, - "p90": 198.68800044059753, - "p95": 200.80000162124634, - "p99": 207.68000185489655 + "p50": 122.65600264072418, + "p90": 126.91199779510498, + "p95": 131.42399489879608, + "p99": 145.53600549697876 }, "combine": { - "p50": 89.56799656152725, - "p90": 95.58399766683578, - "p95": 126.39999389648438, - "p99": 150.91200172901154 + "p50": 71.6480016708374, + "p90": 73.44000041484833, + "p95": 73.88799637556076, + "p99": 87.55200356245041 }, "roundtrip": { - "p50": 164.44799304008484, - "p90": 169.53599452972412, - "p95": 170.8800047636032, - "p99": 176.92799866199493 + "p50": 186.81600689888, + "p90": 191.77600741386414, + "p95": 198.08000326156616, + "p99": 232.44799673557281 }, "isolatedSum": { - "p50": 194.815993309021, - "p90": 294.2719981074333, - "p95": 327.1999955177307, - "p99": 358.5920035839081 + "p50": 194.30400431156158, + "p90": 200.3519982099533, + "p95": 205.31199127435684, + "p99": 233.08800905942917 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.60800457000732, + "p90": 141.34399592876434, + "p95": 144.57599818706512, + "p99": 157.4079990386963 + }, + "combine": { + "p50": 87.3280018568039, + "p90": 89.4400030374527, + "p95": 89.91999924182892, + "p99": 97.98400104045868 + }, + "roundtrip": { + "p50": 214.4639939069748, + "p90": 220.15999257564545, + "p95": 224.35200214385986, + "p99": 243.23199689388275 + }, + "isolatedSum": { + "p50": 223.93600642681122, + "p90": 230.78399896621704, + "p95": 234.49599742889404, + "p99": 255.39200007915497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -14213,34 +15257,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 137.82399892807007, - "p90": 144.99199390411377, - "p95": 146.55999839305878, - "p99": 148.41599762439728 + "p50": 159.84000265598297, + "p90": 163.64799439907074, + "p95": 165.56799411773682, + "p99": 176.15999281406403 }, "combine": { - "p50": 128.00000607967377, - "p90": 131.9359987974167, - "p95": 133.31200182437897, - "p99": 136.51199638843536 + "p50": 108.38399827480316, + "p90": 110.68800091743469, + "p95": 111.96800321340561, + "p99": 118.72000247240067 }, "roundtrip": { - "p50": 235.07200181484222, - "p90": 240.86399376392365, - "p95": 242.71999299526215, - "p99": 245.27999758720398 + "p50": 262.08001375198364, + "p90": 266.30398631095886, + "p95": 270.81599831581116, + "p99": 283.6799919605255 }, "isolatedSum": { - "p50": 265.82400500774384, - "p90": 276.92799270153046, - "p95": 279.87200021743774, - "p99": 284.92799401283264 + "p50": 268.22400093078613, + "p90": 274.33599531650543, + "p95": 277.5359973311424, + "p99": 294.8799952864647 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -14249,16 +15293,16 @@ ] }, { - "id": "cx-595b6f36", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", - "colorKey": "h100_55b1ee31", - "comparisonKey": "b84a29c0643a5455", + "id": "cx-67bd51f4", + "identity": "b300|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "df0e0b78e56d7652", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:11:41.163804+00:00", + "generatedAt": "2026-06-27T11:14:03.421071+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_17", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -14266,29 +15310,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s2", + "label": "B300 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s2", - "routingStep": 2, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -14301,235 +15346,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "675e15b52e37958", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272321917", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272321917", - "createdAt": "2026-06-27T00:10:49Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287503879", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503879", + "createdAt": "2026-06-27T11:14:03.421071+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 98.24000298976898, - "p90": 103.96800190210342, - "p95": 106.30399733781815, - "p99": 111.07199639081955 + "p50": 56.63999915122986, + "p90": 58.43200162053108, + "p95": 59.167999774217606, + "p99": 67.4239993095398 }, "combine": { - "p50": 79.52000200748444, - "p90": 86.87999844551086, - "p95": 87.52000331878662, - "p99": 88.0960002541542 + "p50": 59.67999994754791, + "p90": 61.24800071120262, + "p95": 62.463998794555664, + "p99": 73.27999919652939 }, "roundtrip": { - "p50": 153.28000485897064, - "p90": 161.3759994506836, - "p95": 163.4880006313324, - "p99": 455.80801367759705 + "p50": 119.80800330638885, + "p90": 122.78400361537933, + "p95": 129.63199615478516, + "p99": 147.74399995803833 }, "isolatedSum": { - "p50": 177.76000499725342, - "p90": 190.8480003476143, - "p95": 193.82400065660477, - "p99": 199.16799664497375 + "p50": 116.31999909877777, + "p90": 119.6800023317337, + "p95": 121.63199856877327, + "p99": 140.70399850606918 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 6, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 103.84000092744827, - "p90": 109.0560033917427, - "p95": 110.1439967751503, - "p99": 113.88800293207169 + "p50": 57.24800005555153, + "p90": 59.23200026154518, + "p95": 59.87200140953064, + "p99": 67.55200028419495 }, "combine": { - "p50": 87.87199854850769, - "p90": 95.32800316810608, - "p95": 95.90400010347366, - "p99": 96.25600278377533 + "p50": 60.99199876189232, + "p90": 62.880001962184906, + "p95": 63.26399743556976, + "p99": 65.43999910354614 }, "roundtrip": { - "p50": 161.98399662971497, - "p90": 168.99199783802032, - "p95": 170.56000232696533, - "p99": 175.80799758434296 + "p50": 121.5360015630722, + "p90": 123.87199699878693, + "p95": 125.44000148773193, + "p99": 139.74399864673615 }, "isolatedSum": { - "p50": 191.71199947595596, - "p90": 204.38400655984879, - "p95": 206.04799687862396, - "p99": 210.14400571584702 + "p50": 118.23999881744385, + "p90": 122.11200222373009, + "p95": 123.1359988451004, + "p99": 132.9919993877411 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 6, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 136.80000603199005, - "p90": 145.4399973154068, - "p95": 146.68799936771393, - "p99": 149.4079977273941 - }, + "p50": 59.23200026154518, + "p90": 61.43999844789505, + "p95": 62.3680017888546, + "p99": 68.00000369548798 + }, "combine": { - "p50": 123.99999797344208, - "p90": 129.05600666999817, - "p95": 130.36799430847168, - "p99": 136.00000739097595 + "p50": 65.05600363016129, + "p90": 67.07199662923813, + "p95": 67.391999065876, + "p99": 71.55200093984604 }, "roundtrip": { - "p50": 228.7999987602234, - "p90": 236.12800240516663, - "p95": 237.98400163650513, - "p99": 241.5039986371994 + "p50": 128.03199887275696, + "p90": 133.88800621032715, + "p95": 158.36800634860992, + "p99": 190.0160014629364 }, "isolatedSum": { - "p50": 260.80000400543213, - "p90": 274.49600398540497, - "p95": 277.0559936761856, - "p99": 285.40800511837006 + "p50": 124.28800389170647, + "p90": 128.51199507713318, + "p95": 129.7600008547306, + "p99": 139.55200463533401 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-f5ba95c3", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", - "colorKey": "h100_54b1ec9e", - "comparisonKey": "b84a29c0643a5455", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:09.752348+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s3", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "82b2963fc322419", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272325031", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272325031", - "createdAt": "2026-06-27T00:10:55Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 97.98400104045868, - "p90": 104.44799810647964, - "p95": 107.84000158309937, - "p99": 116.06399714946747 + "p50": 59.776000678539276, + "p90": 61.85600161552429, + "p95": 62.72000074386597, + "p99": 69.40799951553345 }, "combine": { - "p50": 81.02399855852127, - "p90": 87.64799684286118, - "p95": 88.06400001049042, - "p99": 96.00000083446503 + "p50": 65.60000032186508, + "p90": 67.45599955320358, + "p95": 68.7360018491745, + "p99": 75.80800354480743 }, "roundtrip": { - "p50": 156.41599893569946, - "p90": 162.62400150299072, - "p95": 165.75999557971954, - "p99": 176.7359972000122 + "p50": 128.83199751377106, + "p90": 131.71200454235077, + "p95": 135.42400300502777, + "p99": 151.0079950094223 }, "isolatedSum": { - "p50": 179.00799959897995, - "p90": 192.09599494934082, - "p95": 195.90400159358978, - "p99": 212.0639979839325 + "p50": 125.37600100040436, + "p90": 129.31200116872787, + "p95": 131.45600259304047, + "p99": 145.21600306034088 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 6, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.184000223875046, + "p90": 63.45599889755249, + "p95": 65.47199934720993, + "p99": 85.85599809885025 + }, + "combine": { + "p50": 67.35999882221222, + "p90": 69.40799951553345, + "p95": 70.46400010585785, + "p99": 73.79200309515 + }, + "roundtrip": { + "p50": 131.58400356769562, + "p90": 134.11200046539307, + "p95": 135.42400300502777, + "p99": 145.53600549697876 + }, + "isolatedSum": { + "p50": 128.54399904608727, + "p90": 132.86399841308594, + "p95": 135.93599945306778, + "p99": 159.64800119400024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -14538,35 +15550,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 103.20000350475311, - "p90": 107.39199817180634, - "p95": 111.51999980211258, - "p99": 119.00799721479416 + "p50": 64.00000303983688, + "p90": 65.60000032186508, + "p95": 66.65600091218948, + "p99": 75.74400305747986 }, "combine": { - "p50": 88.16000074148178, - "p90": 95.8079993724823, - "p95": 96.16000205278397, - "p99": 98.11200201511383 + "p50": 73.27999919652939, + "p90": 75.23199915885925, + "p95": 75.55200159549713, + "p99": 79.29600030183792 }, "roundtrip": { - "p50": 162.78399527072906, - "p90": 168.73599588871002, - "p95": 170.9440052509308, - "p99": 176.57600343227386 + "p50": 144.48000490665436, + "p90": 147.0080018043518, + "p95": 147.93600142002106, + "p99": 156.89599514007568 }, "isolatedSum": { - "p50": 191.3600042462349, - "p90": 203.19999754428864, - "p95": 207.68000185489655, - "p99": 217.119999229908 + "p50": 137.28000223636627, + "p90": 140.83199948072433, + "p95": 142.20800250768661, + "p99": 155.04000335931778 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 6, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 72.38399982452393, + "p90": 74.36800003051758, + "p95": 75.29599964618683, + "p99": 83.99999886751175 + }, + "combine": { + "p50": 88.22400122880936, + "p90": 90.11200070381165, + "p95": 91.58399701118469, + "p99": 97.98400104045868 + }, + "roundtrip": { + "p50": 173.8239973783493, + "p90": 176.54399573802948, + "p95": 177.37600207328796, + "p99": 183.67999792099 + }, + "isolatedSum": { + "p50": 160.60800105333328, + "p90": 164.48000073432922, + "p95": 166.87999665737152, + "p99": 181.98399990797043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -14575,35 +15624,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 137.28000223636627, - "p90": 149.1200029850006, - "p95": 151.0079950094223, - "p99": 153.18399667739868 + "p50": 86.36800199747086, + "p90": 89.37600255012512, + "p95": 90.52799642086029, + "p99": 98.11200201511383 }, "combine": { - "p50": 128.86400520801544, - "p90": 131.1360001564026, - "p95": 135.71199774742126, - "p99": 138.3039951324463 + "p50": 108.70400071144104, + "p90": 111.26399785280228, + "p95": 112.35199868679047, + "p99": 118.81600320339203 }, "roundtrip": { - "p50": 234.49599742889404, - "p90": 241.4720058441162, - "p95": 242.65600740909576, - "p99": 247.9040026664734 + "p50": 222.59199619293213, + "p90": 226.52800381183624, + "p95": 229.5680046081543, + "p99": 250.5599856376648 }, "isolatedSum": { - "p50": 266.1440074443817, - "p90": 280.2560031414032, - "p95": 286.71999275684357, - "p99": 291.48799180984497 + "p50": 195.0720027089119, + "p90": 200.6400004029274, + "p95": 202.87999510765076, + "p99": 216.92800521850586 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 6, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -14611,46 +15660,47 @@ ] }, { - "id": "cx-fb3ea9d7", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", - "colorKey": "h100_b654f9b2", - "comparisonKey": "10b5062b8e23fcad", + "id": "cx-4e513884", + "identity": "b300|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_c4c63f07", + "comparisonKey": "cf47e1b064e2e435", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:39.087780+00:00", + "generatedAt": "2026-06-27T09:51:14.282258+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "runner": "b300-nv_05", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "label": "B300 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -14663,8 +15713,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2ad5ef98d328fa1", - "workloadId": "set:4:286be993cd819ed9", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -14672,410 +15722,192 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271817166", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271817166", - "createdAt": "2026-06-26T23:54:31Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285682409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285682409", + "createdAt": "2026-06-27T09:51:14.282258+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 97.82399982213974, - "p90": 105.95200210809708, - "p95": 109.95200276374817, - "p99": 121.50400131940842 + "p50": 117.50400066375732, + "p90": 121.0239976644516, + "p95": 123.03999811410904, + "p99": 134.91199910640717 }, "combine": { - "p50": 80.25600016117096, - "p90": 81.88799768686295, - "p95": 83.3280012011528, - "p99": 89.37600255012512 + "p50": 60.19200012087822, + "p90": 61.72800064086914, + "p95": 62.52799928188324, + "p99": 66.94400310516357 }, "roundtrip": { - "p50": 152.12799608707428, - "p90": 158.78400206565857, - "p95": 160.64000129699707, - "p99": 166.81599617004395 + "p50": 167.87199676036835, + "p90": 171.55200242996216, + "p95": 176.09600722789764, + "p99": 186.0799938440323 }, "isolatedSum": { - "p50": 178.0799999833107, - "p90": 187.83999979496002, - "p95": 193.28000396490097, - "p99": 210.88000386953354 + "p50": 177.69600078463554, + "p90": 182.75199830532074, + "p95": 185.56799739599228, + "p99": 201.85600221157074 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, + "dispatchLogicalBytes": 301056, "combineLogicalBytes": 602112, "fanoutMean": 5.25, "recvTokensMax": 8, - "stragglerRank": 3, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 97.08800166845322, - "p90": 103.39199751615524, - "p95": 107.51999914646149, - "p99": 115.93600362539291 + "p50": 118.9119964838028, + "p90": 122.91199713945389, + "p95": 125.2480000257492, + "p99": 134.5279961824417 }, "combine": { - "p50": 80.89599758386612, - "p90": 84.03199911117554, - "p95": 87.42400258779526, - "p99": 89.47200328111649 + "p50": 62.55999952554703, + "p90": 64.4799992442131, + "p95": 65.05600363016129, + "p99": 74.33599978685379 }, "roundtrip": { - "p50": 153.60000729560852, - "p90": 161.15200519561768, - "p95": 163.83999586105347, - "p99": 171.55200242996216 + "p50": 173.7920045852661, + "p90": 177.0240068435669, + "p95": 179.4240027666092, + "p99": 209.56799387931824 }, "isolatedSum": { - "p50": 177.98399925231934, - "p90": 187.42399662733078, - "p95": 194.94400173425674, - "p99": 205.4080069065094 + "p50": 181.47199600934982, + "p90": 187.391996383667, + "p95": 190.3040036559105, + "p99": 208.8639959692955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 3, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 103.39199751615524, - "p90": 108.47999900579453, - "p95": 110.55999994277954, - "p99": 117.18399822711945 + "p50": 119.45600062608719, + "p90": 124.38400089740753, + "p95": 127.80800461769104, + "p99": 167.77600347995758 }, "combine": { - "p50": 89.34400230646133, - "p90": 95.551997423172, - "p95": 97.34400361776352, - "p99": 99.93600100278854 + "p50": 63.00800293684006, + "p90": 64.86400216817856, + "p95": 65.37599861621857, + "p99": 77.27999985218048 }, "roundtrip": { - "p50": 162.75200247764587, - "p90": 170.43200135231018, - "p95": 172.83199727535248, - "p99": 179.61600422859192 + "p50": 173.98400604724884, + "p90": 177.85599827766418, + "p95": 180.38399517536163, + "p99": 187.99999356269836 }, "isolatedSum": { - "p50": 192.73599982261658, - "p90": 204.03199642896652, - "p95": 207.90400356054306, - "p99": 217.119999229908 + "p50": 182.46400356292725, + "p90": 189.2480030655861, + "p95": 193.1840032339096, + "p99": 245.05600333213806 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 3, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 137.85600662231445, - "p90": 144.41600441932678, - "p95": 147.0080018043518, - "p99": 151.16800367832184 + "p50": 121.5360015630722, + "p90": 124.7360035777092, + "p95": 126.46399438381195, + "p99": 140.35199582576752 }, "combine": { - "p50": 128.83199751377106, - "p90": 131.23199343681335, - "p95": 131.99999928474426, - "p99": 137.95199990272522 + "p50": 66.68800115585327, + "p90": 68.4799998998642, + "p95": 69.24799829721451, + "p99": 75.71200281381607 }, "roundtrip": { - "p50": 233.75999927520752, - "p90": 239.3919974565506, - "p95": 240.92799425125122, - "p99": 245.1840043067932 + "p50": 179.51999604701996, + "p90": 184.12800133228302, + "p95": 190.528005361557, + "p99": 432.6399862766266 }, "isolatedSum": { - "p50": 266.6880041360855, - "p90": 275.64799785614014, - "p95": 279.00800108909607, - "p99": 289.12000358104706 + "p50": 188.22400271892548, + "p90": 193.2160034775734, + "p95": 195.71199268102646, + "p99": 216.0639986395836 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 3, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-e0ce741a", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", - "colorKey": "h100_b654f9b2", - "comparisonKey": "10b5062b8e23fcad", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:31.374180+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b6caf944f6bb621", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272004392", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272004392", - "createdAt": "2026-06-27T00:00:35Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 96.67199850082397, - "p90": 104.60799932479858, - "p95": 106.11200332641602, - "p99": 113.56800049543381 + "p50": 122.43200093507767, + "p90": 126.0479986667633, + "p95": 128.63999605178833, + "p99": 147.71200716495514 }, "combine": { - "p50": 79.00799810886383, - "p90": 82.0159986615181, - "p95": 82.36800134181976, - "p99": 87.67999708652496 + "p50": 67.64800101518631, + "p90": 69.43999975919724, + "p95": 71.29599899053574, + "p99": 89.08800035715103 }, "roundtrip": { - "p50": 147.2640037536621, - "p90": 154.59200739860535, - "p95": 157.3439985513687, - "p99": 161.5999937057495 + "p50": 181.11999332904816, + "p90": 185.12000143527985, + "p95": 187.6160055398941, + "p99": 205.28000593185425 }, "isolatedSum": { - "p50": 175.6799966096878, - "p90": 186.62399798631668, - "p95": 188.48000466823578, - "p99": 201.24799758195877 + "p50": 190.08000195026398, + "p90": 195.48799842596054, + "p95": 199.93599504232407, + "p99": 236.80000752210617 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 99.67999905347824, - "p90": 105.0880029797554, - "p95": 107.16799646615982, - "p99": 112.99200356006622 - }, - "combine": { - "p50": 81.11999928951263, - "p90": 82.49600231647491, - "p95": 83.03999900817871, - "p99": 87.2960016131401 - }, - "roundtrip": { - "p50": 147.0080018043518, - "p90": 153.6639928817749, - "p95": 155.71199357509613, - "p99": 159.10400450229645 - }, - "isolatedSum": { - "p50": 180.79999834299088, - "p90": 187.58400529623032, - "p95": 190.20799547433853, - "p99": 200.28800517320633 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, - "recvTokensMax": 16, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 97.18400239944458, - "p90": 103.93600165843964, - "p95": 106.30399733781815, - "p99": 122.04799801111221 - }, - "combine": { - "p50": 78.94399762153625, - "p90": 82.43200182914734, - "p95": 86.40000224113464, - "p99": 103.45599800348282 - }, - "roundtrip": { - "p50": 148.15999567508698, - "p90": 158.55999290943146, - "p95": 160.3199988603592, - "p99": 164.09599781036377 - }, - "isolatedSum": { - "p50": 176.12800002098083, - "p90": 186.36800348758698, - "p95": 192.7039995789528, - "p99": 225.50399601459503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 32, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.91200065612793, - "p90": 104.35199737548828, - "p95": 106.65600001811981, - "p99": 112.47999966144562 - }, - "combine": { - "p50": 81.24800026416779, - "p90": 83.3280012011528, - "p95": 87.0399996638298, - "p99": 87.93599903583527 - }, - "roundtrip": { - "p50": 153.4080058336258, - "p90": 159.61599349975586, - "p95": 161.47199273109436, - "p99": 165.21599888801575 - }, - "isolatedSum": { - "p50": 180.16000092029572, - "p90": 187.67999857664108, - "p95": 193.69599968194962, - "p99": 200.41599869728088 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.92800045013428, - "p90": 104.3199971318245, - "p95": 110.55999994277954, - "p99": 161.9199961423874 - }, - "combine": { - "p50": 81.4720019698143, - "p90": 87.2960016131401, - "p95": 87.8399983048439, - "p99": 90.27200192213058 - }, - "roundtrip": { - "p50": 153.43999862670898, - "p90": 160.19199788570404, - "p95": 162.78399527072906, - "p99": 169.98399794101715 - }, - "isolatedSum": { - "p50": 178.40000241994858, - "p90": 191.6159987449646, - "p95": 198.39999824762344, - "p99": 252.19199806451797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, - "fanoutMean": 5.3125, - "recvTokensMax": 128, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -15085,34 +15917,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 103.64799946546555, - "p90": 108.31999778747559, - "p95": 110.62400043010712, - "p99": 114.84800279140472 + "p50": 129.2800009250641, + "p90": 132.4159950017929, + "p95": 134.75200533866882, + "p99": 156.80000185966492 }, "combine": { - "p50": 87.5839963555336, - "p90": 91.839998960495, - "p95": 95.39200365543365, - "p99": 96.38399630784988 + "p50": 73.18399846553802, + "p90": 75.19999891519547, + "p95": 76.25599950551987, + "p99": 83.45600217580795 }, "roundtrip": { - "p50": 155.96799552440643, - "p90": 165.50399363040924, - "p95": 168.41599345207214, - "p99": 175.64800381660461 + "p50": 195.26399672031403, + "p90": 199.0399956703186, + "p95": 201.82399451732635, + "p99": 220.12799978256226 }, "isolatedSum": { - "p50": 191.23199582099915, - "p90": 200.15999674797058, - "p95": 206.01600408554077, - "p99": 211.2319990992546 + "p50": 202.4639993906021, + "p90": 207.61599391698837, + "p95": 211.0080048441887, + "p99": 240.25600403547287 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -15122,34 +15954,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 114.88000303506851, - "p90": 126.11199915409088, - "p95": 127.6479959487915, - "p99": 133.56800377368927 + "p50": 142.4960047006607, + "p90": 146.40000462532043, + "p95": 149.53599870204926, + "p99": 164.32000696659088 }, "combine": { - "p50": 98.43199700117111, - "p90": 103.96800190210342, - "p95": 105.8880016207695, - "p99": 119.71200257539749 + "p50": 88.79999816417694, + "p90": 90.97599983215332, + "p95": 91.839998960495, + "p99": 101.50399804115295 }, "roundtrip": { - "p50": 180.38399517536163, - "p90": 191.39200448989868, - "p95": 194.39999759197235, - "p99": 201.9840031862259 + "p50": 221.37600183486938, + "p90": 225.95199942588806, + "p95": 228.99200022220612, + "p99": 238.17600309848785 }, "isolatedSum": { - "p50": 213.31200003623962, - "p90": 230.0800010561943, - "p95": 233.535997569561, - "p99": 253.28000634908676 + "p50": 231.29600286483765, + "p90": 237.37600445747375, + "p95": 241.37599766254425, + "p99": 265.82400500774384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38621184, - "combineLogicalBytes": 38621184, - "fanoutMean": 5.26171875, - "recvTokensMax": 512, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -15159,34 +15991,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 141.79199934005737, - "p90": 147.2959965467453, - "p95": 149.82399344444275, - "p99": 153.3759981393814 + "p50": 167.04000532627106, + "p90": 171.23199999332428, + "p95": 174.01599884033203, + "p99": 183.1039935350418 }, "combine": { - "p50": 122.36800044775009, - "p90": 128.4160017967224, - "p95": 129.02399897575378, - "p99": 136.1600011587143 + "p50": 109.21599715948105, + "p90": 111.455999314785, + "p95": 112.70400136709213, + "p99": 125.91999769210815 }, "roundtrip": { - "p50": 231.77599906921387, - "p90": 241.85599386692047, - "p95": 244.9280023574829, - "p99": 248.76800179481506 + "p50": 270.2080011367798, + "p90": 274.2080092430115, + "p95": 276.5119969844818, + "p99": 297.7280020713806 }, "isolatedSum": { - "p50": 264.15999978780746, - "p90": 275.7119983434677, - "p95": 278.84799242019653, - "p99": 289.5359992980957 + "p50": 276.2560024857521, + "p90": 282.6879993081093, + "p95": 286.72000020742416, + "p99": 309.02399122714996 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -15195,50 +16027,51 @@ ] }, { - "id": "cx-73951147", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", - "colorKey": "h100_456a963c", - "comparisonKey": "12dbc31e8daf0a44", + "id": "cx-1911c35d", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_eee29686", + "comparisonKey": "37f5e47990ede677", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:37.187210+00:00", + "generatedAt": "2026-06-26T17:41:38.976776+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_01", - "sku": "h100", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "label": "B300 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -15247,50 +16080,50 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "e41f5099a9733ac", - "workloadId": "set:8:286be993cd819ed9", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.830078125, - "eplbImbalanceAfter": 1.0007595486111112, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272008867", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272008867", - "createdAt": "2026-06-27T00:00:42Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254479346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", + "createdAt": "2026-06-26T17:41:38.976776+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 68.41599941253662, - "p90": 76.1599987745285, - "p95": 77.69600301980972, - "p99": 84.83199775218964 + "p50": 56.03199824690819, + "p90": 58.240000158548355, + "p95": 59.61599946022034, + "p99": 69.56800073385239 }, "combine": { - "p50": 71.07199728488922, - "p90": 73.11999797821045, - "p95": 73.7600028514862, - "p99": 79.74400371313095 + "p50": 61.40799820423126, + "p90": 63.4239986538887, + "p95": 64.35199826955795, + "p99": 77.53600180149078 }, "roundtrip": { - "p50": 126.46399438381195, - "p90": 130.62399625778198, - "p95": 131.55199587345123, - "p99": 136.4479959011078 + "p50": 121.18399888277054, + "p90": 123.4240010380745, + "p95": 124.64000284671783, + "p99": 131.48799538612366 }, "isolatedSum": { - "p50": 139.48799669742584, - "p90": 149.27999675273895, - "p95": 151.45600587129593, - "p99": 164.5760014653206 + "p50": 117.43999645113945, + "p90": 121.66399881243706, + "p95": 123.96799772977829, + "p99": 147.10400253534317 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, @@ -15303,34 +16136,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 68.70400160551071, - "p90": 76.9599974155426, - "p95": 81.727996468544, - "p99": 107.10400342941284 + "p50": 56.92800134420395, + "p90": 59.23200026154518, + "p95": 60.19200012087822, + "p99": 68.4799998998642 }, "combine": { - "p50": 71.48800045251846, - "p90": 73.15199822187424, - "p95": 73.56800138950348, - "p99": 79.55200225114822 + "p50": 62.24000081419945, + "p90": 64.19199705123901, + "p95": 65.05600363016129, + "p99": 69.69600170850754 }, "roundtrip": { - "p50": 127.77599692344666, - "p90": 131.23199343681335, - "p95": 132.60799646377563, - "p99": 138.7840062379837 + "p50": 122.65600264072418, + "p90": 124.79999661445618, + "p95": 125.98399817943573, + "p99": 135.1040005683899 }, "isolatedSum": { - "p50": 140.19200205802917, - "p90": 150.11199563741684, - "p95": 155.29599785804749, - "p99": 186.65600568056107 + "p50": 119.1680021584034, + "p90": 123.4239973127842, + "p95": 125.2480037510395, + "p99": 138.17600160837173 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -15340,35 +16173,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 70.8480030298233, - "p90": 77.79199630022049, - "p95": 80.09599894285202, - "p99": 87.0399996638298 + "p50": 57.631999254226685, + "p90": 60.03199890255928, + "p95": 61.37600168585777, + "p99": 67.16799736022949 }, "combine": { - "p50": 72.4480003118515, - "p90": 73.56800138950348, - "p95": 74.27199929952621, - "p99": 79.80799674987793 + "p50": 63.93600255250931, + "p90": 65.43999910354614, + "p95": 65.88800251483917, + "p99": 69.023996591568 }, "roundtrip": { - "p50": 126.94400548934937, - "p90": 131.77600502967834, - "p95": 133.4719955921173, - "p99": 137.2479945421219 + "p50": 125.50400197505951, + "p90": 128.51199507713318, + "p95": 132.06399977207184, + "p99": 143.10400187969208 }, "isolatedSum": { - "p50": 143.2960033416748, - "p90": 151.35999768972397, - "p95": 154.36799824237823, - "p99": 166.84799641370773 + "p50": 121.56800180673599, + "p90": 125.47199800610542, + "p95": 127.26400420069695, + "p99": 136.19199395179749 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 23, - "stragglerRank": 4, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15377,35 +16210,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 70.11199742555618, - "p90": 76.9599974155426, - "p95": 79.3600007891655, - "p99": 86.14400029182434 + "p50": 59.487998485565186, + "p90": 61.88800185918808, + "p95": 62.81600147485733, + "p99": 73.2479989528656 }, "combine": { - "p50": 72.64000177383423, - "p90": 73.82400333881378, - "p95": 74.94399696588516, - "p99": 81.08799904584885 + "p50": 66.46399945020676, + "p90": 67.80800223350525, + "p95": 68.89600306749344, + "p99": 71.71200215816498 }, "roundtrip": { - "p50": 125.47199428081512, - "p90": 131.6480040550232, - "p95": 133.66399705410004, - "p99": 139.29599523544312 + "p50": 128.60800325870514, + "p90": 130.65600395202637, + "p95": 131.80799782276154, + "p99": 144.3520039319992 }, "isolatedSum": { - "p50": 142.7519991993904, - "p90": 150.78400075435638, - "p95": 154.30399775505066, - "p99": 167.2319993376732 + "p50": 125.95199793577194, + "p90": 129.69600409269333, + "p95": 131.71200454235077, + "p99": 144.96000111103058 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4730880, - "combineLogicalBytes": 4730880, - "fanoutMean": 5.15625, - "recvTokensMax": 44, - "stragglerRank": 4, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15414,34 +16247,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 81.40800148248672, - "p90": 83.99999886751175, - "p95": 86.33600175380707, - "p99": 91.36000275611877 + "p50": 60.67200005054474, + "p90": 62.880001962184906, + "p95": 63.74400109052658, + "p99": 69.82400268316269 }, "combine": { - "p50": 73.37599992752075, - "p90": 78.75200361013412, - "p95": 79.6160027384758, - "p99": 81.34400099515915 + "p50": 67.64800101518631, + "p90": 69.63200122117996, + "p95": 70.91200351715088, + "p99": 79.71200346946716 }, "roundtrip": { - "p50": 125.95200538635254, + "p50": 130.87999820709229, "p90": 133.15199315547943, - "p95": 134.5919966697693, - "p99": 140.32000303268433 + "p95": 134.43200290203094, + "p99": 141.88799262046814 }, "isolatedSum": { - "p50": 154.78400141000748, - "p90": 162.75200247764587, - "p95": 165.95200449228287, - "p99": 172.70400375127792 + "p50": 128.32000106573105, + "p90": 132.51200318336487, + "p95": 134.65600460767746, + "p99": 149.53600615262985 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9691136, - "combineLogicalBytes": 9691136, - "fanoutMean": 5.28125, - "recvTokensMax": 88, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -15451,34 +16284,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 90.08000046014786, - "p90": 92.54399687051773, - "p95": 94.4959968328476, - "p99": 98.52799773216248 + "p50": 63.61600011587143, + "p90": 65.47199934720993, + "p95": 66.23999774456024, + "p99": 72.54400104284286 }, "combine": { - "p50": 80.09599894285202, - "p90": 81.56800270080566, - "p95": 82.07999914884567, - "p99": 87.2960016131401 + "p50": 72.31999933719635, + "p90": 74.14399832487106, + "p95": 75.23199915885925, + "p99": 79.6160027384758 }, "roundtrip": { - "p50": 141.08799397945404, - "p90": 144.96000111103058, - "p95": 146.30399644374847, - "p99": 150.33599734306335 + "p50": 142.87999272346497, + "p90": 145.85599303245544, + "p95": 147.16799557209015, + "p99": 155.29599785804749 }, "isolatedSum": { - "p50": 170.17599940299988, - "p90": 174.1119995713234, - "p95": 176.57599598169327, - "p99": 185.82399934530258 + "p50": 135.93599945306778, + "p90": 139.615997672081, + "p95": 141.4719969034195, + "p99": 152.16000378131866 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19568640, - "combineLogicalBytes": 19568640, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -15488,35 +16321,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 95.23200243711472, - "p90": 113.24799805879593, - "p95": 114.59200084209442, - "p99": 119.10399794578552 + "p50": 72.64000177383423, + "p90": 75.3600001335144, + "p95": 76.51200145483017, + "p99": 82.65600353479385 }, "combine": { - "p50": 89.85599875450134, - "p90": 98.2080027461052, - "p95": 114.3679991364479, - "p99": 130.49599528312683 + "p50": 87.90399879217148, + "p90": 90.08000046014786, + "p95": 90.84799885749817, + "p99": 101.15200281143188 }, "roundtrip": { - "p50": 159.39199924468994, - "p90": 165.53600132465363, - "p95": 167.87199676036835, - "p99": 179.51999604701996 + "p50": 172.83199727535248, + "p90": 175.4239946603775, + "p95": 176.41599476337433, + "p99": 181.43999576568604 }, "isolatedSum": { - "p50": 185.08800119161606, - "p90": 211.45600080490112, - "p95": 228.95999997854233, - "p99": 249.59999322891235 + "p50": 160.5440005660057, + "p90": 165.44000059366226, + "p95": 167.36000031232834, + "p99": 183.80800634622574 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38750208, - "combineLogicalBytes": 38750208, - "fanoutMean": 5.279296875, - "recvTokensMax": 348, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15525,35 +16358,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 111.29599809646606, - "p90": 117.21599847078323, - "p95": 118.43200027942657, - "p99": 122.72000312805176 + "p50": 83.20000022649765, + "p90": 86.07999980449677, + "p95": 87.0399996638298, + "p99": 90.17600119113922 }, "combine": { - "p50": 106.39999806880951, - "p90": 112.28799819946289, - "p95": 113.11999708414078, - "p99": 114.33599889278412 + "p50": 108.70400071144104, + "p90": 110.97600311040878, + "p95": 112.06399649381638, + "p99": 116.41599982976913 }, "roundtrip": { - "p50": 197.63199985027313, - "p90": 202.11200416088104, - "p95": 203.39199900627136, - "p99": 206.9759964942932 + "p50": 218.07999908924103, + "p90": 221.343994140625, + "p95": 222.97599911689758, + "p99": 235.52000522613525 }, "isolatedSum": { - "p50": 217.69599616527557, - "p90": 229.50399667024612, - "p95": 231.55199736356735, - "p99": 237.05600202083588 + "p50": 191.9040009379387, + "p90": 197.05600291490555, + "p95": 199.10399615764618, + "p99": 206.59200102090836 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77342720, - "combineLogicalBytes": 77342720, - "fanoutMean": 5.2685546875, - "recvTokensMax": 687, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15561,50 +16394,51 @@ ] }, { - "id": "cx-fc133662", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", - "colorKey": "h100_d54acd03", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-fe6f5351", + "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_84b10b26", + "comparisonKey": "abf92acc41d9d301", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:31.132134+00:00", + "generatedAt": "2026-06-26T18:10:48.557544+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_01", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform·empty-rank", + "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "routingLabel": "uniform·empty-rank", + "routingLabel": "uniform", "routingStep": 0, - "unevenTokens": "empty-rank", + "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -15613,128 +16447,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "5621f0d4899ad7a", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272375977", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272375977", - "createdAt": "2026-06-27T00:12:38Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254499301", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", + "createdAt": "2026-06-26T18:10:48.557544+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.56800118088722, + "p90": 51.263999193906784, + "p95": 51.83999985456467, + "p99": 57.920001447200775 + }, + "combine": { + "p50": 62.24000081419945, + "p90": 63.680000603199005, + "p95": 64.51199948787689, + "p99": 66.3679987192154 + }, + "roundtrip": { + "p50": 114.78400230407715, + "p90": 116.86400324106216, + "p95": 118.01599711179733, + "p99": 126.68800354003906 + }, + "isolatedSum": { + "p50": 111.80800199508667, + "p90": 114.94399979710579, + "p95": 116.35199934244156, + "p99": 124.28800016641617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.92000013589859, + "p90": 51.80799961090088, + "p95": 52.76799947023392, + "p99": 58.9120015501976 + }, + "combine": { + "p50": 63.040003180503845, + "p90": 64.89600241184235, + "p95": 65.24799764156342, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 116.64000153541565, + "p90": 119.00799721479416, + "p95": 121.08799815177917, + "p99": 136.57599687576294 + }, + "isolatedSum": { + "p50": 112.96000331640244, + "p90": 116.70400202274323, + "p95": 118.01599711179733, + "p99": 133.02399963140488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.42400041222572, + "p90": 53.63199859857559, + "p95": 54.655998945236206, + "p99": 65.76000154018402 + }, + "combine": { + "p50": 63.10400366783142, + "p90": 64.96000289916992, + "p95": 65.63200056552887, + "p99": 75.93599706888199 + }, + "roundtrip": { + "p50": 117.53600090742111, + "p90": 119.87199634313583, + "p95": 120.86399644613266, + "p99": 132.192000746727 + }, + "isolatedSum": { + "p50": 114.52800408005714, + "p90": 118.59200149774551, + "p95": 120.28799951076508, + "p99": 141.695998609066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, - "globalTokens": 63, + "globalTokens": 64, "dispatch": { - "p50": 98.01600128412247, - "p90": 108.03200304508209, - "p95": 124.22399967908859, - "p99": 164.000004529953 + "p50": 52.41600051522255, + "p90": 54.496001452207565, + "p95": 55.71199953556061, + "p99": 60.5119988322258 }, "combine": { - "p50": 80.73599636554718, - "p90": 89.63199704885483, - "p95": 104.63999956846237, - "p99": 112.5440001487732 + "p50": 65.72800129652023, + "p90": 67.48799979686737, + "p95": 67.9360032081604, + "p99": 73.21599870920181 }, "roundtrip": { - "p50": 154.1759967803955, - "p90": 160.35200655460358, - "p95": 162.08000481128693, - "p99": 175.3920018672943 + "p50": 122.04799801111221, + "p90": 124.38400089740753, + "p95": 126.52799487113953, + "p99": 147.16799557209015 }, "isolatedSum": { - "p50": 178.75199764966965, - "p90": 197.66400009393692, - "p95": 228.86399924755096, - "p99": 276.5440046787262 + "p50": 118.14400181174278, + "p90": 121.98400124907494, + "p95": 123.64800274372101, + "p99": 133.7279975414276 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4888576, - "combineLogicalBytes": 4888576, - "fanoutMean": 5.412698268890381, - "recvTokensMax": 46, - "stragglerRank": 4, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.69599908590317, + "p90": 55.84000051021576, + "p95": 56.86400085687637, + "p99": 65.0240033864975 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 68.9919963479042, + "p95": 69.69600170850754, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 123.36000055074692, + "p90": 125.66399574279785, + "p95": 127.16799974441528, + "p99": 140.70400595664978 + }, + "isolatedSum": { + "p50": 120.86399644613266, + "p90": 124.83199685811996, + "p95": 126.56000256538391, + "p99": 143.0080011487007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { "tokensPerRank": 32, - "globalTokens": 252, + "globalTokens": 256, "dispatch": { - "p50": 104.76800054311752, - "p90": 134.0479999780655, - "p95": 136.1279934644699, - "p99": 144.41600441932678 + "p50": 57.66399949789047, + "p90": 59.776000678539276, + "p95": 60.63999980688095, + "p99": 65.72800129652023 }, "combine": { - "p50": 89.02399986982346, - "p90": 104.12800312042236, - "p95": 104.41599786281586, - "p99": 107.90400207042694 + "p50": 72.89600372314453, + "p90": 74.14399832487106, + "p95": 75.55200159549713, + "p99": 83.96799862384796 }, "roundtrip": { - "p50": 166.59200191497803, - "p90": 189.95200097560883, - "p95": 191.96799397468567, - "p99": 199.5840072631836 + "p50": 138.40000331401825, + "p90": 140.60799777507782, + "p95": 141.66399836540222, + "p99": 149.53599870204926 }, "isolatedSum": { - "p50": 193.79200041294098, - "p90": 238.17600309848785, - "p95": 240.54399132728577, - "p99": 252.32000648975372 + "p50": 130.560003221035, + "p90": 133.91999900341034, + "p95": 136.19200140237808, + "p99": 149.6959999203682 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19396608, - "combineLogicalBytes": 19396608, - "fanoutMean": 5.36904764175415, - "recvTokensMax": 180, - "stragglerRank": 4, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 66.04799628257751, + "p90": 68.15999746322632, + "p95": 69.88800317049026, + "p99": 74.91199672222137 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 90.08000046014786, + "p95": 91.74399822950363, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 164.76799547672272, + "p90": 167.42399334907532, + "p95": 169.3120002746582, + "p99": 185.92000007629395 + }, + "isolatedSum": { + "p50": 153.98399531841278, + "p90": 158.23999792337418, + "p95": 161.6320013999939, + "p99": 173.15199971199036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { "tokensPerRank": 128, - "globalTokens": 1022, + "globalTokens": 1024, "dispatch": { - "p50": 130.52800297737122, - "p90": 139.90400731563568, - "p95": 151.61600708961487, - "p99": 458.5599899291992 + "p50": 77.63200253248215, + "p90": 80.1599994301796, + "p95": 81.50400221347809, + "p99": 98.65599870681763 }, "combine": { - "p50": 120.7680031657219, - "p90": 127.93600559234619, - "p95": 128.54400277137756, - "p99": 129.50399518013 + "p50": 108.35199803113937, + "p90": 110.78400164842606, + "p95": 111.84000223875046, + "p99": 126.01600587368011 }, "roundtrip": { - "p50": 216.35200083255768, - "p90": 221.98399901390076, - "p95": 224.7679978609085, - "p99": 229.5359969139099 + "p50": 211.42399311065674, + "p90": 214.52799439430237, + "p95": 215.87200462818146, + "p99": 223.1999933719635 }, "isolatedSum": { - "p50": 251.2960061430931, - "p90": 267.8400129079819, - "p95": 280.16000986099243, - "p99": 588.0639851093292 + "p50": 185.98400056362152, + "p90": 190.94400107860565, + "p95": 193.34400445222855, + "p99": 224.67200458049774 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77529088, - "combineLogicalBytes": 77529088, - "fanoutMean": 5.2915849685668945, - "recvTokensMax": 722, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15742,46 +16761,47 @@ ] }, { - "id": "cx-e7e5caec", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", - "colorKey": "h100_f70758a0", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-83d0a7b9", + "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_49e66a7b", + "comparisonKey": "0abec2edede4ab05", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:24.801629+00:00", + "generatedAt": "2026-06-27T09:47:10.185475+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform·linear", + "label": "B300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "routingLabel": "uniform·linear", + "routingLabel": "uniform", "routingStep": 0, - "unevenTokens": "linear", + "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -15794,54 +16814,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272372388", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272372388", - "createdAt": "2026-06-27T00:12:31Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285590577", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285590577", + "createdAt": "2026-06-27T09:47:10.185475+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 98.24000298976898, - "p90": 103.64799946546555, - "p95": 106.4319983124733, - "p99": 112.5119999051094 + "p50": 49.247998744249344, + "p90": 51.552001386880875, + "p95": 52.799999713897705, + "p99": 63.35999816656113 }, "combine": { - "p50": 80.73599636554718, - "p90": 87.55200356245041, - "p95": 88.03199976682663, - "p99": 90.08000046014786 + "p50": 61.72800064086914, + "p90": 63.45599889755249, + "p95": 65.47199934720993, + "p99": 80.86399734020233 }, "roundtrip": { - "p50": 154.33600544929504, - "p90": 159.45599973201752, - "p95": 161.6639941930771, - "p99": 166.75199568271637 + "p50": 114.78400230407715, + "p90": 117.40799993276596, + "p95": 120.80000340938568, + "p99": 136.83199882507324 }, "isolatedSum": { - "p50": 178.97599935531616, - "p90": 191.20000302791595, - "p95": 194.46399807929993, - "p99": 202.59200036525726 + "p50": 110.97599938511848, + "p90": 115.00800028443336, + "p95": 118.27199906110764, + "p99": 144.22399550676346 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.27199909090996, + "p90": 52.5440014898777, + "p95": 54.46400120854378, + "p99": 61.08799949288368 + }, + "combine": { + "p50": 61.664000153541565, + "p90": 63.551999628543854, + "p95": 65.15199691057205, + "p99": 74.01599735021591 + }, + "roundtrip": { + "p50": 116.12799763679504, + "p90": 118.52800101041794, + "p95": 121.66400253772736, + "p99": 139.26400244235992 + }, + "isolatedSum": { + "p50": 111.93599924445152, + "p90": 116.09600111842155, + "p95": 119.61599811911583, + "p99": 135.1039968430996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.10400170087814, + "p90": 53.279999643564224, + "p95": 53.98400127887726, + "p99": 61.08799949288368 + }, + "combine": { + "p50": 62.880001962184906, + "p90": 64.86400216817856, + "p95": 65.63200056552887, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 117.91999638080597, + "p90": 121.05599790811539, + "p95": 122.81599640846252, + "p99": 135.16800105571747 + }, + "isolatedSum": { + "p50": 113.98400366306305, + "p90": 118.14400181174278, + "p95": 119.61600184440613, + "p99": 144.6080021560192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.38400027155876, + "p90": 54.687999188899994, + "p95": 55.71199953556061, + "p99": 60.127999633550644 + }, + "combine": { + "p50": 66.91200286149979, + "p90": 73.11999797821045, + "p95": 75.19999891519547, + "p99": 92.16000139713287 + }, + "roundtrip": { + "p50": 121.98399752378464, + "p90": 124.38400089740753, + "p95": 125.76000392436981, + "p99": 145.9520012140274 + }, + "isolatedSum": { + "p50": 119.29600313305855, + "p90": 127.80799716711044, + "p95": 130.91199845075607, + "p99": 152.28800103068352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.37600037455559, + "p90": 55.615998804569244, + "p95": 56.63999915122986, + "p99": 63.45599889755249 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 69.76000219583511, + "p95": 70.52800059318542, + "p99": 75.87199658155441 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 127.10399925708771, + "p95": 128.76799702644348, + "p99": 143.71199905872345 + }, + "isolatedSum": { + "p50": 121.24800309538841, + "p90": 125.37600100040436, + "p95": 127.16799974441528, + "p99": 139.3279954791069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15850,35 +17018,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 100.09600222110748, - "p90": 105.27999699115753, - "p95": 106.91200196743011, - "p99": 113.37599903345108 + "p50": 57.0559985935688, + "p90": 59.4559982419014, + "p95": 61.47199869155884, + "p99": 80.19199967384338 }, "combine": { - "p50": 89.53599631786346, - "p90": 96.16000205278397, - "p95": 96.73599898815155, - "p99": 98.43199700117111 + "p50": 73.21599870920181, + "p90": 75.32799988985062, + "p95": 76.19199901819229, + "p99": 83.52000266313553 }, "roundtrip": { - "p50": 163.39200735092163, - "p90": 168.99199783802032, - "p95": 170.43200135231018, - "p99": 174.81599748134613 + "p50": 137.56799697875977, + "p90": 140.44800400733948, + "p95": 143.51999759674072, + "p99": 164.0319973230362 }, "isolatedSum": { - "p50": 189.63199853897095, - "p90": 201.4399990439415, - "p95": 203.64800095558167, - "p99": 211.8079960346222 + "p50": 130.27199730277061, + "p90": 134.783998131752, + "p95": 137.66399770975113, + "p99": 163.7120023369789 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 66.46399945020676, + "p90": 69.05599683523178, + "p95": 70.23999840021133, + "p99": 80.9599980711937 + }, + "combine": { + "p50": 87.20000088214874, + "p90": 88.95999938249588, + "p95": 89.82399851083755, + "p99": 96.6079980134964 + }, + "roundtrip": { + "p50": 165.43999314308167, + "p90": 167.7439957857132, + "p95": 168.70400309562683, + "p99": 188.9919936656952 + }, + "isolatedSum": { + "p50": 153.6640003323555, + "p90": 158.01599621772766, + "p95": 160.0639969110489, + "p99": 177.5679960846901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15887,35 +17092,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 130.75199723243713, - "p90": 136.99199259281158, - "p95": 138.7840062379837, - "p99": 143.42400431632996 + "p50": 76.73600316047668, + "p90": 79.23199981451035, + "p95": 80.64000308513641, + "p99": 87.52000331878662 }, "combine": { - "p50": 128.1599998474121, - "p90": 130.40000200271606, - "p95": 135.8720064163208, - "p99": 278.6880135536194 + "p50": 108.57599973678589, + "p90": 111.10399663448334, + "p95": 112.86400258541107, + "p99": 119.6800023317337 }, "roundtrip": { - "p50": 225.75999796390533, - "p90": 231.74400627613068, - "p95": 232.80000686645508, - "p99": 235.6480062007904 + "p50": 211.2320065498352, + "p90": 214.27200734615326, + "p95": 216.06400609016418, + "p99": 229.8559993505478 }, "isolatedSum": { - "p50": 258.91199707984924, - "p90": 267.39199459552765, - "p95": 274.6560126543045, - "p99": 422.11201786994934 + "p50": 185.31200289726257, + "p90": 190.33599644899368, + "p95": 193.50400567054749, + "p99": 207.20000565052032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15923,16 +17128,16 @@ ] }, { - "id": "cx-5fad8218", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", - "colorKey": "h100_fb5b86de", - "comparisonKey": "bba2bec66db838b4", + "id": "cx-567c4192", + "identity": "b300|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_8688ff74", + "comparisonKey": "e2dc1b3bb397a94c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:15.450287+00:00", + "generatedAt": "2026-06-27T15:56:03.746973+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", + "runner": "b300-nv_09", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -15940,29 +17145,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "label": "B300 EP8 · deepep · fp8-directcast", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, + "experts": 256, "routing": "uniform", - "routingLabel": "uniform+eplb", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8-directcast", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -15975,54 +17181,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "73351bbcd4d02de", + "traceSignature": "ac583971f94b176", "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.078125, - "eplbImbalanceAfter": 1.00048828125, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271923814", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271923814", - "createdAt": "2026-06-26T23:58:04Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28294160895", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294160895", + "createdAt": "2026-06-27T15:56:03.746973+00:00", + "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 96.99200093746185, - "p90": 104.89600151777267, - "p95": 107.04000294208527, - "p99": 111.68000102043152 + "p50": 57.08799883723259, + "p90": 59.20000001788139, + "p95": 61.37600168585777, + "p99": 70.62400132417679 }, "combine": { - "p50": 75.29599964618683, - "p90": 81.28000050783157, - "p95": 81.69600367546082, - "p99": 83.20000022649765 + "p50": 61.63199990987778, + "p90": 63.391998410224915, + "p95": 64.06400352716446, + "p99": 67.61600077152252 }, "roundtrip": { - "p50": 146.27200365066528, - "p90": 154.11199629306793, - "p95": 156.031996011734, - "p99": 158.6879938840866 + "p50": 121.63200229406357, + "p90": 123.77600371837616, + "p95": 125.791996717453, + "p99": 143.39199662208557 }, "isolatedSum": { - "p50": 172.28800058364868, - "p90": 186.17600202560425, - "p95": 188.73600661754608, - "p99": 194.88000124692917 + "p50": 118.71999874711037, + "p90": 122.59199842810631, + "p95": 125.44000521302223, + "p99": 138.2400020956993 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 516096, - "combineLogicalBytes": 516096, - "fanoutMean": 4.5, - "recvTokensMax": 6, - "stragglerRank": 5, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16031,35 +17237,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 72.09599763154984, - "p90": 103.87200117111206, - "p95": 106.4319983124733, - "p99": 113.76000195741653 + "p50": 57.5999990105629, + "p90": 60.38400158286095, + "p95": 61.85600161552429, + "p99": 71.96799665689468 }, "combine": { - "p50": 72.67200201749802, - "p90": 81.18399977684021, - "p95": 81.82399719953537, - "p99": 84.28800106048584 + "p50": 63.29599767923355, + "p90": 65.15199691057205, + "p95": 65.60000032186508, + "p99": 68.96000355482101 }, "roundtrip": { - "p50": 127.48800218105316, - "p90": 153.76000106334686, - "p95": 156.3200056552887, - "p99": 158.720001578331 + "p50": 124.86399710178375, + "p90": 127.13600695133209, + "p95": 128.4479945898056, + "p99": 137.15200126171112 }, "isolatedSum": { - "p50": 144.76799964904785, - "p90": 185.05600094795227, - "p95": 188.25599551200867, - "p99": 198.04800301790237 + "p50": 120.89599668979645, + "p90": 125.535998493433, + "p95": 127.45600193738937, + "p99": 140.9280002117157 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1089536, - "combineLogicalBytes": 1089536, - "fanoutMean": 4.75, - "recvTokensMax": 11, - "stragglerRank": 4, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16068,35 +17274,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 71.26399874687195, - "p90": 100.89600086212158, - "p95": 104.19200360774994, - "p99": 112.96000331640244 + "p50": 59.84000116586685, + "p90": 62.30400130152702, + "p95": 63.32799792289734, + "p99": 72.1919983625412 }, "combine": { - "p50": 72.7040022611618, - "p90": 80.4160013794899, - "p95": 80.6720033288002, - "p99": 87.80799806118011 + "p50": 64.38399851322174, + "p90": 66.68800115585327, + "p95": 67.48799979686737, + "p99": 74.30399954319 }, "roundtrip": { - "p50": 130.0159990787506, - "p90": 154.78399395942688, - "p95": 158.81599485874176, - "p99": 165.53600132465363 + "p50": 127.29600071907043, + "p90": 130.11200726032257, + "p95": 132.47999548912048, + "p99": 155.74400126934052 }, "isolatedSum": { - "p50": 143.96800100803375, - "p90": 181.31200224161148, - "p95": 184.86400693655014, - "p99": 200.76800137758255 + "p50": 124.22399967908859, + "p90": 128.9920024573803, + "p95": 130.8159977197647, + "p99": 146.4959979057312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2207744, - "combineLogicalBytes": 2207744, - "fanoutMean": 4.8125, - "recvTokensMax": 23, - "stragglerRank": 6, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16105,35 +17311,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 96.79999947547913, - "p90": 103.16800326108932, - "p95": 105.79200088977814, - "p99": 110.46399921178818 + "p50": 60.5119988322258, + "p90": 63.519999384880066, + "p95": 66.20799750089645, + "p99": 72.64000177383423 }, "combine": { - "p50": 80.73599636554718, - "p90": 81.98399841785431, - "p95": 82.36800134181976, - "p99": 89.75999802350998 + "p50": 67.87200272083282, + "p90": 69.98399645090103, + "p95": 71.42399996519089, + "p99": 86.87999844551086 }, "roundtrip": { - "p50": 150.2400040626526, - "p90": 156.47999942302704, - "p95": 158.91200304031372, - "p99": 168.2240068912506 + "p50": 129.95199859142303, + "p90": 132.22399353981018, + "p95": 133.63200426101685, + "p99": 141.37600362300873 }, "isolatedSum": { - "p50": 177.5359958410263, - "p90": 185.15200167894363, - "p95": 188.1600022315979, - "p99": 200.22399723529816 + "p50": 128.38400155305862, + "p90": 133.5039958357811, + "p95": 137.63199746608734, + "p99": 159.5200002193451 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4558848, - "combineLogicalBytes": 4558848, - "fanoutMean": 4.96875, - "recvTokensMax": 46, - "stragglerRank": 5, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16142,35 +17348,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 96.83199971914291, - "p90": 102.24000364542007, - "p95": 104.44799810647964, - "p99": 107.77600109577179 + "p50": 61.664000153541565, + "p90": 64.03200328350067, + "p95": 65.34399837255478, + "p99": 70.62400132417679 }, "combine": { - "p50": 81.05599880218506, - "p90": 87.80799806118011, - "p95": 88.70399743318558, - "p99": 89.75999802350998 + "p50": 68.57600063085556, + "p90": 70.23999840021133, + "p95": 71.42399996519089, + "p99": 81.63200318813324 }, "roundtrip": { - "p50": 152.73599326610565, - "p90": 160.73599457740784, - "p95": 162.75200247764587, - "p99": 167.55199432373047 + "p50": 132.38400220870972, + "p90": 135.55200397968292, + "p95": 136.99199259281158, + "p99": 152.0639955997467 }, "isolatedSum": { - "p50": 177.88799852132797, - "p90": 190.0480017066002, - "p95": 193.15199553966522, - "p99": 197.53599911928177 + "p50": 130.24000078439713, + "p90": 134.272001683712, + "p95": 136.76799833774567, + "p99": 152.25600451231003 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9347072, - "combineLogicalBytes": 9347072, - "fanoutMean": 5.09375, - "recvTokensMax": 86, - "stragglerRank": 5, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16179,35 +17385,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 91.32800251245499, - "p90": 101.9200012087822, - "p95": 104.19200360774994, - "p99": 108.57599973678589 + "p50": 64.7360011935234, + "p90": 66.94400310516357, + "p95": 68.06399673223495, + "p99": 75.68000257015228 }, "combine": { - "p50": 81.216000020504, - "p90": 90.01599997282028, - "p95": 90.40000289678574, - "p99": 97.88800030946732 + "p50": 73.66400212049484, + "p90": 75.45600086450577, + "p95": 75.93599706888199, + "p99": 81.79199695587158 }, "roundtrip": { - "p50": 142.2400027513504, - "p90": 161.8880033493042, - "p95": 163.96799683570862, - "p99": 168.67199540138245 + "p50": 143.99999380111694, + "p90": 146.55999839305878, + "p95": 147.93600142002106, + "p99": 157.82399475574493 }, "isolatedSum": { - "p50": 172.54400253295898, - "p90": 191.93600118160248, - "p95": 194.59200650453568, - "p99": 206.4640000462532 + "p50": 138.40000331401825, + "p90": 142.40000396966934, + "p95": 143.99999380111694, + "p99": 157.47199952602386 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 18995200, - "combineLogicalBytes": 18995200, - "fanoutMean": 5.17578125, - "recvTokensMax": 178, - "stragglerRank": 1, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16216,35 +17422,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 108.25599730014801, - "p90": 114.9120032787323, - "p95": 117.08799749612808, - "p99": 121.72800302505493 + "p50": 73.37599992752075, + "p90": 75.99999755620956, + "p95": 77.72800326347351, + "p99": 170.46399414539337 }, "combine": { - "p50": 96.0640013217926, - "p90": 97.85600006580353, - "p95": 102.11200267076492, - "p99": 108.96000266075134 + "p50": 87.99999952316284, + "p90": 90.04800021648407, + "p95": 90.87999910116196, + "p99": 99.96800124645233 }, "roundtrip": { - "p50": 166.46400094032288, - "p90": 181.63199722766876, - "p95": 186.0159933567047, - "p99": 189.91999328136444 + "p50": 171.26399278640747, + "p90": 174.27200078964233, + "p95": 175.9359985589981, + "p99": 191.03999435901642 }, "isolatedSum": { - "p50": 204.3199986219406, - "p90": 212.76800334453583, - "p95": 219.200000166893, - "p99": 230.68800568580627 + "p50": 161.3759994506836, + "p90": 166.04799777269363, + "p95": 168.60800236463547, + "p99": 270.4319953918457 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38291456, - "combineLogicalBytes": 38291456, - "fanoutMean": 5.216796875, - "recvTokensMax": 348, - "stragglerRank": 5, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16253,34 +17459,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 112.12799698114395, - "p90": 131.26400113105774, - "p95": 135.6479972600937, - "p99": 141.05600118637085 + "p50": 83.96799862384796, + "p90": 86.68799698352814, + "p95": 87.61599659919739, + "p99": 95.87199985980988 }, "combine": { - "p50": 106.36799782514572, - "p90": 117.37599968910217, - "p95": 120.80000340938568, - "p99": 121.8239963054657 + "p50": 109.0560033917427, + "p90": 112.0000034570694, + "p95": 112.89600282907486, + "p99": 120.99199742078781 }, "roundtrip": { - "p50": 195.68000733852386, - "p90": 214.59199488162994, - "p95": 216.60800278186798, - "p99": 221.91999852657318 + "p50": 219.00799870491028, + "p90": 222.08000719547272, + "p95": 224.16000068187714, + "p99": 238.8480007648468 }, "isolatedSum": { - "p50": 218.49599480628967, - "p90": 248.6400008201599, - "p95": 256.44800066947937, - "p99": 262.87999749183655 + "p50": 193.02400201559067, + "p90": 198.68800044059753, + "p95": 200.51199942827225, + "p99": 216.8639972805977 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77113344, - "combineLogicalBytes": 77113344, - "fanoutMean": 5.2529296875, - "recvTokensMax": 685, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -16289,16 +17495,16 @@ ] }, { - "id": "cx-7f743bfe", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h100_aa268d13", - "comparisonKey": "791af0af2f802328", + "id": "cx-10314900", + "identity": "b300|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_5b8a7672", + "comparisonKey": "facc765e5a3b34b6", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:41.322977+00:00", + "generatedAt": "2026-06-27T15:56:09.517904+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "runner": "b300-nv_13", + "sku": "b300", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -16306,29 +17512,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · deepep · fp8-pertoken", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8-pertoken", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -16341,8 +17548,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -16350,45 +17557,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271945409", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409", - "createdAt": "2026-06-26T23:58:46Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28294164589", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294164589", + "createdAt": "2026-06-27T15:56:09.517904+00:00", + "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 93.37600320577621, - "p90": 101.59999877214432, - "p95": 103.16800326108932, - "p99": 108.15999656915665 + "p50": 57.08799883723259, + "p90": 59.51999872922897, + "p95": 60.864001512527466, + "p99": 74.65600222349167 }, "combine": { - "p50": 73.69600236415863, - "p90": 78.17599922418594, - "p95": 79.99999821186066, - "p99": 82.59200304746628 + "p50": 63.1679967045784, + "p90": 65.11999666690826, + "p95": 66.20799750089645, + "p99": 72.4480003118515 }, "roundtrip": { - "p50": 142.59199798107147, - "p90": 150.62400698661804, - "p95": 152.54400670528412, - "p99": 159.5200002193451 + "p50": 124.83199685811996, + "p90": 126.97599828243256, + "p95": 128.48000228405, + "p99": 136.73600554466248 }, "isolatedSum": { - "p50": 167.07200556993484, - "p90": 179.77599799633026, - "p95": 183.16800147294998, - "p99": 190.75199961662292 + "p50": 120.25599554181099, + "p90": 124.63999539613724, + "p95": 127.07199901342392, + "p99": 147.10400253534317 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 0, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16397,35 +17604,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 96.0640013217926, - "p90": 100.89600086212158, - "p95": 101.82400047779083, - "p99": 107.07200318574905 + "p50": 57.72799998521805, + "p90": 59.967998415231705, + "p95": 60.99199876189232, + "p99": 68.70400160551071 }, "combine": { - "p50": 74.43200051784515, - "p90": 80.48000186681747, - "p95": 81.216000020504, - "p99": 82.11199939250946 + "p50": 63.58399987220764, + "p90": 65.18399715423584, + "p95": 66.0799965262413, + "p99": 72.25599884986877 }, "roundtrip": { - "p50": 143.39199662208557, - "p90": 147.87200093269348, - "p95": 153.31199765205383, - "p99": 168.60799491405487 + "p50": 124.4800016283989, + "p90": 127.07200646400452, + "p95": 128.86400520801544, + "p99": 151.5520066022873 }, "isolatedSum": { - "p50": 170.49600183963776, - "p90": 181.37600272893906, - "p95": 183.04000049829483, - "p99": 189.18400257825851 + "p50": 121.31199985742569, + "p90": 125.15199556946754, + "p95": 127.07199528813362, + "p99": 140.9600004553795 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 0, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16434,35 +17641,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 95.87199985980988, - "p90": 100.73599964380264, - "p95": 102.81600058078766, - "p99": 109.95200276374817 + "p50": 59.167999774217606, + "p90": 61.02399900555611, + "p95": 62.431998550891876, + "p99": 81.40800148248672 }, "combine": { - "p50": 74.30399954319, - "p90": 80.89599758386612, - "p95": 81.4720019698143, - "p99": 84.19200032949448 + "p50": 63.87200206518173, + "p90": 65.5359998345375, + "p95": 66.49599969387054, + "p99": 72.95999675989151 }, "roundtrip": { - "p50": 142.752006649971, - "p90": 153.02400290966034, - "p95": 154.9759954214096, - "p99": 160.0639969110489 + "p50": 125.21600723266602, + "p90": 128.09599936008453, + "p95": 130.23999333381653, + "p99": 148.19200336933136 }, "isolatedSum": { - "p50": 170.17599940299988, - "p90": 181.63199722766876, - "p95": 184.28800255060196, - "p99": 194.14400309324265 + "p50": 123.04000183939934, + "p90": 126.55999884009361, + "p95": 128.92799824476242, + "p99": 154.36799824237823 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 3, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16471,35 +17678,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 97.88800030946732, - "p90": 101.82400047779083, - "p95": 103.96800190210342, - "p99": 111.42399907112122 + "p50": 61.08799949288368, + "p90": 63.29599767923355, + "p95": 64.57599997520447, + "p99": 72.95999675989151 }, "combine": { - "p50": 75.6160020828247, - "p90": 81.4720019698143, - "p95": 82.04799890518188, - "p99": 84.03199911117554 + "p50": 67.58400052785873, + "p90": 69.21599805355072, + "p95": 69.63200122117996, + "p99": 79.52000200748444 }, "roundtrip": { - "p50": 146.7519998550415, - "p90": 153.47200632095337, - "p95": 154.9759954214096, - "p99": 167.9680049419403 + "p50": 130.14400005340576, + "p90": 133.18400084972382, + "p95": 136.35200262069702, + "p99": 151.8400013446808 }, "isolatedSum": { - "p50": 173.50400239229202, - "p90": 183.29600244760513, - "p95": 186.0160008072853, - "p99": 195.45599818229675 + "p50": 128.67200002074242, + "p90": 132.51199573278427, + "p95": 134.20800119638443, + "p99": 152.47999876737595 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 0, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16508,35 +17715,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 97.08800166845322, - "p90": 100.67199915647507, - "p95": 104.25599664449692, - "p99": 110.6560006737709 + "p50": 61.792001128196716, + "p90": 64.51199948787689, + "p95": 65.5680000782013, + "p99": 77.05599814653397 }, "combine": { - "p50": 78.94399762153625, - "p90": 82.04799890518188, - "p95": 82.78399705886841, - "p99": 89.40800279378891 + "p50": 68.80000233650208, + "p90": 70.65600156784058, + "p95": 71.29599899053574, + "p99": 77.66400277614594 }, "roundtrip": { - "p50": 150.7200002670288, - "p90": 159.10400450229645, - "p95": 161.69600188732147, - "p99": 167.07199811935425 + "p50": 133.215993642807, + "p90": 135.3919953107834, + "p95": 136.99199259281158, + "p99": 163.32800686359406 }, "isolatedSum": { - "p50": 176.03199928998947, - "p90": 182.71999806165695, - "p95": 187.03999370336533, - "p99": 200.06400346755981 + "p50": 130.5920034646988, + "p90": 135.16800105571747, + "p95": 136.86399906873703, + "p99": 154.7200009226799 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 0, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16545,35 +17752,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 96.47999703884125, - "p90": 101.31199657917023, - "p95": 104.5759990811348, - "p99": 110.62400043010712 + "p50": 64.83200192451477, + "p90": 66.91200286149979, + "p95": 67.45599955320358, + "p99": 70.78400254249573 }, "combine": { - "p50": 86.46400272846222, - "p90": 90.11200070381165, - "p95": 90.62399715185165, - "p99": 93.18400174379349 + "p50": 73.53600114583969, + "p90": 75.58400183916092, + "p95": 76.4480009675026, + "p99": 98.11200201511383 }, "roundtrip": { - "p50": 158.75199437141418, - "p90": 163.55200111865997, - "p95": 164.89599645137787, - "p99": 169.21600699424744 + "p50": 145.24799585342407, + "p90": 147.64800667762756, + "p95": 148.95999431610107, + "p99": 166.4000004529953 }, "isolatedSum": { - "p50": 182.94399976730347, - "p90": 191.42399728298187, - "p95": 195.19999623298645, - "p99": 203.8080021739006 + "p50": 138.36800307035446, + "p90": 142.4960047006607, + "p95": 143.90400052070618, + "p99": 168.89600455760956 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 0, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16582,35 +17789,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 106.9440022110939, - "p90": 138.36799561977386, - "p95": 143.0400013923645, - "p99": 250.2720057964325 + "p50": 73.53600114583969, + "p90": 75.9039968252182, + "p95": 76.73600316047668, + "p99": 81.34400099515915 }, "combine": { - "p50": 95.0080007314682, - "p90": 98.39999675750732, - "p95": 98.91200065612793, - "p99": 105.59999942779541 + "p50": 87.77599781751633, + "p90": 89.91999924182892, + "p95": 90.7839983701706, + "p99": 105.0880029797554 }, "roundtrip": { - "p50": 176.67199671268463, - "p90": 184.03199315071106, - "p95": 187.3600035905838, - "p99": 190.5599981546402 + "p50": 173.08799922466278, + "p90": 175.6799966096878, + "p95": 177.37600207328796, + "p99": 204.73599433898926 }, "isolatedSum": { - "p50": 201.9520029425621, - "p90": 236.7679923772812, - "p95": 241.95200204849243, - "p99": 355.8720052242279 + "p50": 161.31199896335602, + "p90": 165.82399606704712, + "p95": 167.52000153064728, + "p99": 186.43200397491455 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 2, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16619,34 +17826,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 128.4160017967224, - "p90": 145.9520012140274, - "p95": 148.83199334144592, - "p99": 151.99999511241913 + "p50": 84.19200032949448, + "p90": 87.0399996638298, + "p95": 88.86399865150452, + "p99": 98.01600128412247 }, "combine": { - "p50": 119.74400281906128, - "p90": 122.56000190973282, - "p95": 123.80799651145935, - "p99": 129.7920048236847 + "p50": 109.40799862146378, + "p90": 111.93600296974182, + "p95": 112.67200112342834, + "p99": 124.44800138473511 }, "roundtrip": { - "p50": 228.2560020685196, - "p90": 233.88800024986267, - "p95": 236.12800240516663, - "p99": 240.28800427913666 + "p50": 219.7760045528412, + "p90": 223.4240025281906, + "p95": 225.2800017595291, + "p99": 244.80000138282776 }, "isolatedSum": { - "p50": 248.1600046157837, - "p90": 268.5120031237602, - "p95": 272.6399898529053, - "p99": 281.7919999361038 + "p50": 193.59999895095825, + "p90": 198.97600263357162, + "p95": 201.53599977493286, + "p99": 222.46400266885757 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -16655,34 +17862,35 @@ ] }, { - "id": "cx-456ed1f6", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", - "colorKey": "h100_aa268d13", - "comparisonKey": "791af0af2f802328", + "id": "cx-5fc48052", + "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_c9569580", + "comparisonKey": "789db7396b5cd7a2", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:00.953910+00:00", + "generatedAt": "2026-06-27T11:14:23.346610+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_16", - "sku": "h100", + "runner": "b300-nv_02", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -16692,9 +17900,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -16707,8 +17915,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "1fa7fe74d0e30a3", - "workloadId": "set:4:f5576e2b712d38c3", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -16716,156 +17924,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271802749", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271802749", - "createdAt": "2026-06-26T23:54:05Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287508460", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460", + "createdAt": "2026-06-27T11:14:23.346610+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 97.34400361776352, - "p90": 106.33599758148193, - "p95": 108.99200290441513, - "p99": 118.14399808645248 + "p50": 82.56000280380249, + "p90": 85.02399921417236, + "p95": 88.16000074148178, + "p99": 96.3520035147667 }, "combine": { - "p50": 78.72000336647034, - "p90": 81.11999928951263, - "p95": 82.14399963617325, - "p99": 87.42400258779526 + "p50": 91.48799628019333, + "p90": 93.9520001411438, + "p95": 94.55999732017517, + "p99": 102.94400155544281 }, "roundtrip": { - "p50": 148.76799285411835, - "p90": 160.5439931154251, - "p95": 164.73600268363953, - "p99": 172.44799435138702 + "p50": 158.39999914169312, + "p90": 166.24000668525696, + "p95": 167.80799627304077, + "p99": 184.4799965620041 }, "isolatedSum": { - "p50": 176.06400698423386, - "p90": 187.45599687099457, - "p95": 191.13600254058838, - "p99": 205.56800067424774 + "p50": 174.04799908399582, + "p90": 178.97599935531616, + "p95": 182.71999806165695, + "p99": 199.2960050702095 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 97.50399738550186, - "p90": 104.38399761915207, - "p95": 108.99200290441513, - "p99": 137.2479945421219 + "p50": 124.70400333404541, + "p90": 127.23200023174286, + "p95": 128.7360042333603, + "p99": 135.83999872207642 }, "combine": { - "p50": 79.39200103282928, - "p90": 86.68799698352814, - "p95": 87.52000331878662, - "p99": 103.90400141477585 + "p50": 128.48000228405, + "p90": 130.5920034646988, + "p95": 131.45600259304047, + "p99": 141.02399349212646 }, "roundtrip": { - "p50": 152.99199521541595, - "p90": 162.9759967327118, - "p95": 165.69599509239197, - "p99": 171.55200242996216 + "p50": 231.6800057888031, + "p90": 237.95199394226074, + "p95": 239.29600417613983, + "p99": 251.52000784873962 }, "isolatedSum": { - "p50": 176.89599841833115, - "p90": 191.0719946026802, - "p95": 196.51200622320175, - "p99": 241.15199595689774 + "p50": 253.1840056180954, + "p90": 257.82400369644165, + "p95": 260.19200682640076, + "p99": 276.8639922142029 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 5, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 102.33599692583084, - "p90": 111.68000102043152, - "p95": 115.68000167608261, - "p99": 123.74400347471237 + "p50": 174.17599260807037, + "p90": 177.21599340438843, + "p95": 179.07199263572693, + "p99": 195.0400024652481 }, "combine": { - "p50": 87.45600283145905, - "p90": 94.81599926948547, - "p95": 95.32800316810608, - "p99": 96.3200032711029 + "p50": 191.64800643920898, + "p90": 201.02399587631226, + "p95": 201.56799256801605, + "p99": 213.6639952659607 }, "roundtrip": { - "p50": 160.7999950647354, - "p90": 168.67199540138245, - "p95": 171.29600048065186, - "p99": 178.52799594402313 + "p50": 346.8480110168457, + "p90": 351.26399993896484, + "p95": 352.86399722099304, + "p99": 362.39999532699585 }, "isolatedSum": { - "p50": 189.7919997572899, - "p90": 206.496000289917, - "p95": 211.0080048441887, - "p99": 220.06400674581528 + "p50": 365.82399904727936, + "p90": 378.2399892807007, + "p95": 380.639985203743, + "p99": 408.7039977312088 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 139.39200341701508, - "p90": 145.34400403499603, - "p95": 147.5200057029724, - "p99": 163.71199488639832 + "p50": 289.34401273727417, + "p90": 292.4480140209198, + "p95": 293.88800263404846, + "p99": 305.34398555755615 }, "combine": { - "p50": 120.15999853610992, - "p90": 128.1599998474121, - "p95": 128.86400520801544, - "p99": 129.88799810409546 + "p50": 389.1200125217438, + "p90": 398.5919952392578, + "p95": 400.9599983692169, + "p99": 410.1119935512543 }, "roundtrip": { - "p50": 227.87199914455414, - "p90": 232.7360063791275, - "p95": 235.32800376415253, - "p99": 255.13601303100586 + "p50": 597.5040197372437, + "p90": 608.1600189208984, + "p95": 612.7039790153503, + "p99": 631.8399906158447 }, "isolatedSum": { - "p50": 259.552001953125, - "p90": 273.50400388240814, - "p95": 276.38401091098785, - "p99": 293.5999929904938 + "p50": 678.464025259018, + "p90": 691.0400092601776, + "p95": 694.8480010032654, + "p99": 715.4559791088104 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 5, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 520.6720232963562, + "p90": 525.439977645874, + "p95": 530.9439897537231, + "p99": 536.0640287399292 + }, + "combine": { + "p50": 754.9759745597839, + "p90": 765.7920122146606, + "p95": 766.9119834899902, + "p99": 778.6880135536194 + }, + "roundtrip": { + "p50": 1255.5840015411377, + "p90": 1263.7759447097778, + "p95": 1268.1920528411865, + "p99": 1274.8479843139648 + }, + "isolatedSum": { + "p50": 1275.6479978561401, + "p90": 1291.2319898605347, + "p95": 1297.8559732437134, + "p99": 1314.7520422935486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 979.5200228691101, + "p90": 990.1760220527649, + "p95": 992.0960068702698, + "p99": 1001.5039443969727 + }, + "combine": { + "p50": 1442.304015159607, + "p90": 1454.1120529174805, + "p95": 1455.1680088043213, + "p99": 1493.7599897384644 + }, + "roundtrip": { + "p50": 2391.200065612793, + "p90": 2402.9760360717773, + "p95": 2407.7439308166504, + "p99": 2476.6080379486084 + }, + "isolatedSum": { + "p50": 2421.824038028717, + "p90": 2444.2880749702454, + "p95": 2447.264015674591, + "p99": 2495.263934135437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16873,34 +18155,35 @@ ] }, { - "id": "cx-db353ddd", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", - "colorKey": "h100_002beb29", - "comparisonKey": "d83561aeea03cdbc", + "id": "cx-65c7aa3e", + "identity": "b300|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_307ed708", + "comparisonKey": "691973c29c59446c", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:11.693533+00:00", + "generatedAt": "2026-06-27T09:52:08.477764+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", + "runner": "b300-nv_06", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -16910,9 +18193,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -16925,8 +18208,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "22da8b58646609c", - "workloadId": "set:8:6b84350720aa8233", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -16934,304 +18217,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271987393", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271987393", - "createdAt": "2026-06-27T00:00:08Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285702163", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285702163", + "createdAt": "2026-06-27T09:52:08.477764+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 94.14400160312653, - "p90": 104.41599786281586, - "p95": 109.8560020327568, - "p99": 133.69600474834442 - }, - "combine": { - "p50": 71.32799923419952, - "p90": 75.03999769687653, - "p95": 80.86399734020233, - "p99": 237.34399676322937 - }, - "roundtrip": { - "p50": 141.2159949541092, - "p90": 150.39999783039093, - "p95": 151.8079936504364, - "p99": 244.73600089550018 - }, - "isolatedSum": { - "p50": 165.47200083732605, - "p90": 179.45599555969238, - "p95": 190.71999937295914, - "p99": 371.0400015115738 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 94.43199634552002, - "p90": 101.50399804115295, - "p95": 103.04000228643417, - "p99": 105.85600137710571 - }, - "combine": { - "p50": 72.03199714422226, - "p90": 73.95199686288834, - "p95": 74.5600014925003, - "p99": 79.80799674987793 - }, - "roundtrip": { - "p50": 141.02399349212646, - "p90": 147.77599275112152, - "p95": 150.176003575325, - "p99": 175.6799966096878 - }, - "isolatedSum": { - "p50": 166.46399348974228, - "p90": 175.4559949040413, - "p95": 177.60000377893448, - "p99": 185.66399812698364 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 95.20000219345093, - "p90": 101.47199779748917, - "p95": 103.13600301742554, - "p99": 108.12799632549286 + "p50": 82.68799632787704, + "p90": 86.30400151014328, + "p95": 89.66399729251862, + "p99": 95.29600292444229 }, "combine": { - "p50": 70.8480030298233, - "p90": 78.65600287914276, - "p95": 79.0719985961914, - "p99": 81.53600245714188 + "p50": 92.22400188446045, + "p90": 94.43199634552002, + "p95": 101.6319990158081, + "p99": 103.96800190210342 }, "roundtrip": { - "p50": 143.93599331378937, - "p90": 152.41600573062897, - "p95": 155.61600029468536, - "p99": 564.3519759178162 + "p50": 159.9999964237213, + "p90": 167.90400445461273, + "p95": 170.49600183963776, + "p99": 177.12000012397766 }, "isolatedSum": { - "p50": 166.04800522327423, - "p90": 180.12800067663193, - "p95": 182.20800161361694, - "p99": 189.66399878263474 + "p50": 174.9119982123375, + "p90": 180.7359978556633, + "p95": 191.29599630832672, + "p99": 199.26400482654572 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, - "stragglerRank": 4, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 96.25600278377533, - "p90": 103.58399897813797, - "p95": 107.58399963378906, - "p99": 168.09600591659546 + "p50": 123.29600006341934, + "p90": 127.00800597667694, + "p95": 128.22400033473969, + "p99": 140.03199338912964 }, "combine": { - "p50": 75.71200281381607, - "p90": 80.1599994301796, - "p95": 80.83199709653854, - "p99": 82.30400085449219 + "p50": 127.9039978981018, + "p90": 129.82399761676788, + "p95": 131.9359987974167, + "p99": 143.42400431632996 }, "roundtrip": { - "p50": 144.73600685596466, - "p90": 150.81599354743958, - "p95": 152.79999375343323, - "p99": 157.95199573040009 + "p50": 229.5999974012375, + "p90": 235.83999276161194, + "p95": 237.34399676322937, + "p99": 241.60000681877136 }, "isolatedSum": { - "p50": 171.9680055975914, - "p90": 183.74399840831757, - "p95": 188.4159967303276, - "p99": 250.40000677108765 + "p50": 251.19999796152115, + "p90": 256.8320035934448, + "p95": 260.1599991321564, + "p99": 283.4559977054596 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 96.3200032711029, - "p90": 102.39999741315842, - "p95": 104.51199859380722, - "p99": 110.27199774980545 + "p50": 173.75999689102173, + "p90": 177.18400061130524, + "p95": 178.6240041255951, + "p99": 186.97600066661835 }, "combine": { - "p50": 78.65600287914276, - "p90": 81.37600123882294, - "p95": 81.82399719953537, - "p99": 87.0399996638298 + "p50": 191.64800643920898, + "p90": 200.3519982099533, + "p95": 200.8640021085739, + "p99": 212.3199999332428 }, "roundtrip": { - "p50": 146.33600413799286, - "p90": 152.38399803638458, - "p95": 153.76000106334686, - "p99": 157.82399475574493 + "p50": 345.7599878311157, + "p90": 350.816011428833, + "p95": 352.6400029659271, + "p99": 360.1279854774475 }, "isolatedSum": { - "p50": 174.97600615024567, - "p90": 183.77599865198135, - "p95": 186.3359957933426, - "p99": 197.31199741363525 + "p50": 365.4080033302307, + "p90": 377.53599882125854, + "p95": 379.488006234169, + "p99": 399.29600059986115 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 96.67199850082397, - "p90": 101.95200145244598, - "p95": 103.87200117111206, - "p99": 109.56799983978271 + "p50": 289.0560030937195, + "p90": 292.86399483680725, + "p95": 294.1119968891144, + "p99": 311.71199679374695 }, "combine": { - "p50": 83.20000022649765, - "p90": 88.639996945858, - "p95": 89.28000181913376, - "p99": 90.27200192213058 + "p50": 397.599995136261, + "p90": 408.9280068874359, + "p95": 410.0160002708435, + "p99": 421.7279851436615 }, "roundtrip": { - "p50": 154.27200496196747, - "p90": 159.90400314331055, - "p95": 161.8880033493042, - "p99": 171.64799571037292 + "p50": 594.3359732627869, + "p90": 600.6079912185669, + "p95": 604.4480204582214, + "p99": 610.5920076370239 }, "isolatedSum": { - "p50": 179.87199872732162, - "p90": 190.59199839830399, - "p95": 193.15200299024582, - "p99": 199.8400017619133 + "p50": 686.6559982299805, + "p90": 701.7920017242432, + "p95": 704.1279971599579, + "p99": 733.4399819374084 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 104.80000078678131, - "p90": 112.5440001487732, - "p95": 115.35999923944473, - "p99": 119.64800208806992 + "p50": 521.3119983673096, + "p90": 528.544008731842, + "p95": 534.0480208396912, + "p99": 546.8479990959167 }, "combine": { - "p50": 95.32800316810608, - "p90": 97.6639986038208, - "p95": 98.14400225877762, - "p99": 103.45599800348282 + "p50": 755.2000284194946, + "p90": 765.887975692749, + "p95": 766.6559815406799, + "p99": 781.5039753913879 }, "roundtrip": { - "p50": 173.21600019931793, - "p90": 177.47199535369873, - "p95": 178.97599935531616, - "p99": 184.09599363803864 + "p50": 1255.0400495529175, + "p90": 1264.8320198059082, + "p95": 1271.3279724121094, + "p99": 1316.3199424743652 }, "isolatedSum": { - "p50": 200.1280039548874, - "p90": 210.207998752594, - "p95": 213.50400149822235, - "p99": 223.10400009155273 + "p50": 1276.5120267868042, + "p90": 1294.431984424591, + "p95": 1300.704002380371, + "p99": 1328.3519744873047 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11210752, - "combineLogicalBytes": 11210752, - "fanoutMean": 1.52734375, - "recvTokensMax": 512, - "stragglerRank": 0, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 120.64000219106674, - "p90": 141.9840008020401, - "p95": 143.23200285434723, - "p99": 148.54399859905243 + "p50": 980.0000190734863, + "p90": 991.0719990730286, + "p95": 995.0399994850159, + "p99": 1015.1040554046631 }, "combine": { - "p50": 119.48800086975098, - "p90": 122.04799801111221, - "p95": 122.56000190973282, - "p99": 123.58400225639343 + "p50": 1441.856026649475, + "p90": 1453.5679817199707, + "p95": 1456.9599628448486, + "p99": 1492.5119876861572 }, "roundtrip": { - "p50": 219.84000504016876, - "p90": 226.17599368095398, - "p95": 227.29599475860596, - "p99": 232.16000199317932 + "p50": 2390.6240463256836, + "p90": 2406.9759845733643, + "p95": 2415.616035461426, + "p99": 2474.3359088897705 }, "isolatedSum": { - "p50": 240.12800306081772, - "p90": 264.0319988131523, - "p95": 265.79200476408005, - "p99": 272.12800085544586 + "p50": 2421.8560457229614, + "p90": 2444.6399807929993, + "p95": 2451.9999623298645, + "p99": 2507.6160430908203 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -17239,34 +18448,35 @@ ] }, { - "id": "cx-acf36978", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", - "colorKey": "h100_002beb29", - "comparisonKey": "d83561aeea03cdbc", + "id": "cx-ec7ecdcc", + "identity": "b300|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_307ed708", + "comparisonKey": "03e634138c74f76f", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:11.297271+00:00", + "generatedAt": "2026-06-27T09:52:35.993019+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "runner": "b300-nv_13", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "label": "B300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -17276,9 +18486,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -17291,8 +18501,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "47fddabb3277bec", - "workloadId": "set:4:6b84350720aa8233", + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -17300,156 +18510,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271810135", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271810135", - "createdAt": "2026-06-26T23:54:18Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285713494", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285713494", + "createdAt": "2026-06-27T09:52:35.993019+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 95.83999961614609, - "p90": 101.27999633550644, - "p95": 104.86400127410889, - "p99": 111.51999980211258 + "p50": 92.51199662685394, + "p90": 98.81599992513657, + "p95": 100.12800246477127, + "p99": 117.50400066375732 }, "combine": { - "p50": 71.74400240182877, - "p90": 73.95199686288834, - "p95": 79.03999835252762, - "p99": 81.08799904584885 + "p50": 103.13600301742554, + "p90": 104.22399640083313, + "p95": 104.96000200510025, + "p99": 114.01599645614624 }, "roundtrip": { - "p50": 142.5279974937439, - "p90": 149.79200065135956, - "p95": 151.71200037002563, - "p99": 156.73600137233734 + "p50": 176.60799622535706, + "p90": 182.8799992799759, + "p95": 184.92799997329712, + "p99": 195.5520063638687 }, "isolatedSum": { - "p50": 167.58400201797485, - "p90": 175.23199319839478, - "p95": 183.9039996266365, - "p99": 192.60799884796143 + "p50": 195.64799964427948, + "p90": 203.0399963259697, + "p95": 205.08800446987152, + "p99": 231.51999711990356 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 98.1760025024414, - "p90": 104.96000200510025, - "p95": 106.91200196743011, - "p99": 112.44799941778183 + "p50": 119.52000111341476, + "p90": 124.95999783277512, + "p95": 126.46399438381195, + "p99": 136.25599443912506 }, "combine": { - "p50": 73.34399968385696, - "p90": 79.99999821186066, - "p95": 80.48000186681747, - "p99": 85.08799970149994 + "p50": 139.96799290180206, + "p90": 141.37600362300873, + "p95": 142.7839994430542, + "p99": 151.48800611495972 }, "roundtrip": { - "p50": 146.14400267601013, - "p90": 152.6080071926117, - "p95": 154.7520011663437, - "p99": 160.73599457740784 + "p50": 244.54399943351746, + "p90": 249.4720071554184, + "p95": 251.10399723052979, + "p99": 258.08000564575195 }, "isolatedSum": { - "p50": 171.52000218629837, - "p90": 184.9600002169609, - "p95": 187.3920038342476, - "p99": 197.53599911928177 + "p50": 259.4879940152168, + "p90": 266.33600145578384, + "p95": 269.24799382686615, + "p99": 287.7440005540848 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 6, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 98.91200065612793, - "p90": 105.92000186443329, - "p95": 108.47999900579453, - "p99": 115.93600362539291 + "p50": 185.85599958896637, + "p90": 189.05599415302277, + "p95": 191.48799777030945, + "p99": 201.7280012369156 }, "combine": { - "p50": 82.87999778985977, - "p90": 88.54400366544724, - "p95": 88.92799913883209, - "p99": 90.27200192213058 + "p50": 214.62400257587433, + "p90": 224.48000311851501, + "p95": 225.43999552726746, + "p99": 236.4799976348877 }, "roundtrip": { - "p50": 156.19200468063354, - "p90": 162.84799575805664, - "p95": 165.56799411773682, - "p99": 169.72799599170685 + "p50": 372.76801466941833, + "p90": 379.2319893836975, + "p95": 381.632000207901, + "p99": 400.9599983692169 }, "isolatedSum": { - "p50": 181.7919984459877, - "p90": 194.46400552988052, - "p95": 197.40799814462662, - "p99": 206.2080055475235 + "p50": 400.4800021648407, + "p90": 413.5359972715378, + "p95": 416.9279932975769, + "p99": 438.2079988718033 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 6, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 121.88799679279327, - "p90": 129.88799810409546, - "p95": 131.16799294948578, - "p99": 136.1279934644699 + "p50": 303.0720055103302, + "p90": 310.65601110458374, + "p95": 313.1519854068756, + "p99": 327.84000039100647 }, "combine": { - "p50": 114.68800157308578, - "p90": 121.18399888277054, - "p95": 122.079998254776, - "p99": 129.2160004377365 + "p50": 436.2240135669708, + "p90": 445.47200202941895, + "p95": 445.8880126476288, + "p99": 458.9439928531647 }, "roundtrip": { - "p50": 219.90400552749634, - "p90": 224.73600506782532, - "p95": 226.623997092247, - "p99": 230.30400276184082 + "p50": 699.4240283966064, + "p90": 707.9359889030457, + "p95": 712.2560143470764, + "p99": 739.520013332367 }, "isolatedSum": { - "p50": 236.57599836587906, - "p90": 251.071996986866, - "p95": 253.24799120426178, - "p99": 265.3439939022064 + "p50": 739.296019077301, + "p90": 756.1280131340027, + "p95": 759.0399980545044, + "p99": 786.7839932441711 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 6, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 548.9280223846436, + "p90": 558.0160021781921, + "p95": 559.7119927406311, + "p99": 571.5199708938599 + }, + "combine": { + "p50": 779.3279886245728, + "p90": 790.4639840126038, + "p95": 791.263997554779, + "p99": 803.9360046386719 + }, + "roundtrip": { + "p50": 1311.1679553985596, + "p90": 1321.3759660720825, + "p95": 1328.3519744873047, + "p99": 1356.0960292816162 + }, + "isolatedSum": { + "p50": 1328.2560110092163, + "p90": 1348.479986190796, + "p95": 1350.9759902954102, + "p99": 1375.4559755325317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1032.3200225830078, + "p90": 1042.688012123108, + "p95": 1046.6879606246948, + "p99": 1057.919979095459 + }, + "combine": { + "p50": 1477.4080514907837, + "p90": 1481.4079999923706, + "p95": 1490.9759759902954, + "p99": 1538.9440059661865 + }, + "roundtrip": { + "p50": 2480.6079864501953, + "p90": 2492.9919242858887, + "p95": 2498.624086380005, + "p99": 2541.7280197143555 + }, + "isolatedSum": { + "p50": 2509.7280740737915, + "p90": 2524.0960121154785, + "p95": 2537.6639366149902, + "p99": 2596.8639850616455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -17457,46 +18741,47 @@ ] }, { - "id": "cx-18fdfbeb", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", - "colorKey": "h100_c44978e5", - "comparisonKey": "26b5ab23f62d3389", + "id": "cx-99771256", + "identity": "b300|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_c9569580", + "comparisonKey": "f9f9af4879f1b5f6", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:10.918377+00:00", + "generatedAt": "2026-06-27T11:13:49.871789+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", + "runner": "b300-nv_06", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "label": "B300 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", - "routingStep": 0, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -17509,313 +18794,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "5a3054422534366", - "workloadId": "set:8:6b84350720aa8233", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.40625, - "eplbImbalanceAfter": 1.0004417782738093, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271992225", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271992225", - "createdAt": "2026-06-27T00:00:15Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287497246", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287497246", + "createdAt": "2026-06-27T11:13:49.871789+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 94.01600062847137, - "p90": 101.59999877214432, - "p95": 102.68799960613251, - "p99": 107.96800255775452 - }, - "combine": { - "p50": 71.87200337648392, - "p90": 78.87999713420868, - "p95": 79.48800176382065, - "p99": 80.99199831485748 - }, - "roundtrip": { - "p50": 138.72000575065613, - "p90": 147.2640037536621, - "p95": 148.76799285411835, - "p99": 153.08800339698792 - }, - "isolatedSum": { - "p50": 165.8880040049553, - "p90": 180.479995906353, - "p95": 182.17600136995316, - "p99": 188.960000872612 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 559104, - "combineLogicalBytes": 559104, - "fanoutMean": 4.875, - "recvTokensMax": 6, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 69.92000341415405, - "p90": 99.64799880981445, - "p95": 101.43999755382538, - "p99": 106.84800148010254 - }, - "combine": { - "p50": 71.6480016708374, - "p90": 79.71200346946716, - "p95": 80.64000308513641, - "p99": 81.91999793052673 - }, - "roundtrip": { - "p50": 129.34400141239166, - "p90": 143.71199905872345, - "p95": 146.08000218868256, - "p99": 150.39999783039093 - }, - "isolatedSum": { - "p50": 141.56800508499146, - "p90": 179.36000227928162, - "p95": 182.0800006389618, - "p99": 188.76799941062927 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 71.74400240182877, - "p90": 99.80800002813339, - "p95": 101.79200023412704, - "p99": 107.96800255775452 + "p50": 99.29600358009338, + "p90": 102.14400291442871, + "p95": 102.9760017991066, + "p99": 110.55999994277954 }, "combine": { - "p50": 72.67200201749802, - "p90": 81.56800270080566, - "p95": 86.43200248479843, - "p99": 88.73599767684937 + "p50": 105.69600015878677, + "p90": 114.20799791812897, + "p95": 114.62400108575821, + "p99": 128.83199751377106 }, "roundtrip": { - "p50": 129.50399518013, - "p90": 156.47999942302704, - "p95": 159.13599729537964, - "p99": 162.6880019903183 + "p50": 184.57600474357605, + "p90": 188.83199989795685, + "p95": 190.17599523067474, + "p99": 198.08000326156616 }, "isolatedSum": { - "p50": 144.41600441932678, - "p90": 181.37600272893906, - "p95": 188.22400271892548, - "p99": 196.70400023460388 + "p50": 204.99200373888016, + "p90": 216.35200083255768, + "p95": 217.6000028848648, + "p99": 239.3919974565506 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2465792, - "combineLogicalBytes": 2465792, - "fanoutMean": 5.375, - "recvTokensMax": 25, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 72.12799787521362, - "p90": 96.16000205278397, - "p95": 98.30400347709656, - "p99": 103.64799946546555 + "p50": 132.38400220870972, + "p90": 137.472003698349, + "p95": 139.42399621009827, + "p99": 147.20000326633453 }, "combine": { - "p50": 72.9919970035553, - "p90": 81.08799904584885, - "p95": 81.60000294446945, - "p99": 87.13600039482117 + "p50": 150.14399588108063, + "p90": 151.61600708961487, + "p95": 151.7760008573532, + "p99": 154.11199629306793 }, "roundtrip": { - "p50": 127.9039978981018, - "p90": 152.16000378131866, - "p95": 155.90399503707886, - "p99": 157.24800527095795 + "p50": 259.93600487709045, + "p90": 264.0640139579773, + "p95": 265.1520073413849, + "p99": 282.81599283218384 }, "isolatedSum": { - "p50": 145.11999487876892, - "p90": 177.2480010986328, - "p95": 179.904006421566, - "p99": 190.7839998602867 + "p50": 282.52799808979034, + "p90": 289.08801078796387, + "p95": 291.1999970674515, + "p99": 301.31199955940247 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4988928, - "combineLogicalBytes": 4988928, - "fanoutMean": 5.4375, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 82.0159986615181, - "p90": 98.55999797582626, - "p95": 101.50399804115295, - "p99": 106.33599758148193 + "p50": 197.37599790096283, + "p90": 201.34399831295013, + "p95": 202.36800611019135, + "p99": 210.40000021457672 }, "combine": { - "p50": 73.56800138950348, - "p90": 87.87199854850769, - "p95": 88.8959988951683, - "p99": 89.88799899816513 + "p50": 238.81599307060242, + "p90": 248.79999458789825, + "p95": 249.85599517822266, + "p99": 255.74401021003723 }, "roundtrip": { - "p50": 127.71199643611908, - "p90": 159.32799875736237, - "p95": 160.99199652671814, - "p99": 163.90399634838104 + "p50": 410.4959964752197, + "p90": 417.7919924259186, + "p95": 420.8959937095642, + "p99": 438.01599740982056 }, "isolatedSum": { - "p50": 155.58400005102158, - "p90": 186.43199652433395, - "p95": 190.39999693632126, - "p99": 196.22399657964706 + "p50": 436.19199097156525, + "p90": 450.1439929008484, + "p95": 452.224001288414, + "p99": 466.14401042461395 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9791488, - "combineLogicalBytes": 9791488, - "fanoutMean": 5.3359375, - "recvTokensMax": 94, - "stragglerRank": 3, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 89.82399851083755, - "p90": 101.27999633550644, - "p95": 102.65599936246872, - "p99": 107.29599744081497 + "p50": 319.42400336265564, + "p90": 327.5519907474518, + "p95": 330.24001121520996, + "p99": 346.94400429725647 }, "combine": { - "p50": 80.73599636554718, - "p90": 89.4400030374527, - "p95": 89.85599875450134, - "p99": 95.42399644851685 + "p50": 444.89601254463196, + "p90": 447.61601090431213, + "p95": 449.0880072116852, + "p99": 458.3680033683777 }, "roundtrip": { - "p50": 141.59999787807465, - "p90": 158.9439958333969, - "p95": 161.18399798870087, - "p99": 167.32800006866455 + "p50": 742.464005947113, + "p90": 748.960018157959, + "p95": 751.6160011291504, + "p99": 762.1440291404724 }, "isolatedSum": { - "p50": 170.55999487638474, - "p90": 190.71999937295914, - "p95": 192.51199811697006, - "p99": 202.71999388933182 + "p50": 764.3200159072876, + "p90": 775.1680016517639, + "p95": 779.3280184268951, + "p99": 805.3120076656342 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19410944, - "combineLogicalBytes": 19410944, - "fanoutMean": 5.2890625, - "recvTokensMax": 178, - "stragglerRank": 7, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 96.09600156545639, - "p90": 118.1119978427887, - "p95": 120.57600170373917, - "p99": 127.83999741077423 + "p50": 568.2880282402039, + "p90": 572.9600191116333, + "p95": 574.176013469696, + "p99": 603.4560203552246 }, "combine": { - "p50": 89.82399851083755, - "p90": 103.20000350475311, - "p95": 103.80800068378448, - "p99": 104.70400005578995 + "p50": 802.4640083312988, + "p90": 813.7279748916626, + "p95": 814.9759769439697, + "p99": 830.847978591919 }, "roundtrip": { - "p50": 160.288006067276, - "p90": 180.95999956130981, - "p95": 185.18400192260742, - "p99": 188.60800564289093 + "p50": 1348.5759496688843, + "p90": 1358.5599660873413, + "p95": 1367.3280477523804, + "p99": 1390.0799751281738 }, "isolatedSum": { - "p50": 185.92000007629395, - "p90": 221.3120013475418, - "p95": 224.38400238752365, - "p99": 232.54399746656418 + "p50": 1370.7520365715027, + "p90": 1386.687994003296, + "p95": 1389.1519904136658, + "p99": 1434.3039989471436 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38678528, - "combineLogicalBytes": 38678528, - "fanoutMean": 5.26953125, - "recvTokensMax": 360, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 114.52800035476685, - "p90": 135.0719928741455, - "p95": 136.6720050573349, - "p99": 140.00000059604645 + "p50": 1055.3920269012451, + "p90": 1064.5760297775269, + "p95": 1068.1920051574707, + "p99": 1080.191969871521 }, "combine": { - "p50": 106.01600259542465, - "p90": 119.71200257539749, - "p95": 120.35199999809265, - "p99": 122.14399874210358 + "p50": 1502.8799772262573, + "p90": 1514.464020729065, + "p95": 1516.8319940567017, + "p99": 1539.6159887313843 }, "roundtrip": { - "p50": 195.96800208091736, - "p90": 214.33599293231964, - "p95": 216.86400473117828, - "p99": 220.44800221920013 + "p50": 2540.4160022735596, + "p90": 2552.6719093322754, + "p95": 2560.512065887451, + "p99": 2638.6559009552 }, "isolatedSum": { - "p50": 220.5440029501915, - "p90": 254.783995449543, - "p95": 257.02400505542755, - "p99": 262.14399933815 + "p50": 2558.2720041275024, + "p90": 2579.040050506592, + "p95": 2585.0239992141724, + "p99": 2619.8079586029053 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 77285376, - "fanoutMean": 5.2646484375, - "recvTokensMax": 704, - "stragglerRank": 5, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -17823,34 +19034,35 @@ ] }, { - "id": "cx-efff3174", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", - "colorKey": "h100_9aa30544", - "comparisonKey": "c4aa2e0da9446ced", + "id": "cx-46706f1e", + "identity": "b300|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_307ed708", + "comparisonKey": "b477f7e33cf027ec", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:21.116102+00:00", + "generatedAt": "2026-06-27T09:53:05.143387+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", + "runner": "b300-nv_05", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "label": "B300 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -17860,9 +19072,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -17875,8 +19087,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f3df51be7d5c32b", - "workloadId": "set:8:289b7f9c14292e96", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -17884,304 +19096,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271958693", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271958693", - "createdAt": "2026-06-26T23:59:13Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285723416", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285723416", + "createdAt": "2026-06-27T09:53:05.143387+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 97.28000313043594, - "p90": 104.70400005578995, - "p95": 106.11200332641602, - "p99": 112.73600161075592 + "p50": 99.35999661684036, + "p90": 101.72799974679947, + "p95": 102.59199887514114, + "p99": 109.0560033917427 }, "combine": { - "p50": 79.71200346946716, - "p90": 82.65600353479385, - "p95": 99.13600236177444, - "p99": 275.4560112953186 + "p50": 104.8320010304451, + "p90": 113.88800293207169, + "p95": 114.20799791812897, + "p99": 117.34399944543839 }, "roundtrip": { - "p50": 147.61599898338318, - "p90": 155.32800555229187, - "p95": 156.73600137233734, - "p99": 162.91199624538422 + "p50": 185.15199422836304, + "p90": 189.28000330924988, + "p95": 191.23199582099915, + "p99": 221.95200622081757 }, "isolatedSum": { - "p50": 176.9920065999031, - "p90": 187.3600035905838, - "p95": 205.24800568819046, - "p99": 388.1920129060745 + "p50": 204.19199764728546, + "p90": 215.61600267887115, + "p95": 216.7999967932701, + "p99": 226.4000028371811 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 72.80000299215317, - "p90": 102.7199998497963, - "p95": 104.89600151777267, - "p99": 109.66400057077408 + "p50": 128.54400277137756, + "p90": 136.00000739097595, + "p95": 137.66400516033173, + "p99": 154.08000349998474 }, "combine": { - "p50": 73.15199822187424, - "p90": 81.44000172615051, - "p95": 81.88799768686295, - "p99": 82.91199803352356 + "p50": 142.94399321079254, + "p90": 152.0639955997467, + "p95": 152.41600573062897, + "p99": 176.35199427604675 }, "roundtrip": { - "p50": 129.4720023870468, - "p90": 153.3759981393814, - "p95": 156.15999698638916, - "p99": 164.92800414562225 + "p50": 259.64799523353577, + "p90": 263.5200023651123, + "p95": 265.9519910812378, + "p99": 286.1120104789734 }, "isolatedSum": { - "p50": 145.9520012140274, - "p90": 184.1600015759468, - "p95": 186.78399920463562, - "p99": 192.57599860429764 + "p50": 271.4879959821701, + "p90": 288.06400299072266, + "p95": 290.0800108909607, + "p99": 330.4319977760315 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1103872, - "combineLogicalBytes": 1103872, - "fanoutMean": 4.8125, - "recvTokensMax": 16, - "stragglerRank": 5, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 76.25599950551987, - "p90": 102.62399911880493, - "p95": 105.24799674749374, - "p99": 109.47199910879135 + "p50": 196.57599925994873, + "p90": 200.3519982099533, + "p95": 201.63199305534363, + "p99": 223.23200106620789 }, "combine": { - "p50": 73.31199944019318, - "p90": 81.4720019698143, - "p95": 86.20800077915192, - "p99": 89.34400230646133 + "p50": 239.45599794387817, + "p90": 249.34400618076324, + "p95": 250.11199712753296, + "p99": 262.4320089817047 }, "roundtrip": { - "p50": 129.56799566745758, - "p90": 157.9200029373169, - "p95": 160.35200655460358, - "p99": 166.04800522327423 + "p50": 409.40800309181213, + "p90": 418.17599534988403, + "p95": 426.144003868103, + "p99": 449.7919976711273 }, "isolatedSum": { - "p50": 149.56799894571304, - "p90": 184.09600108861923, - "p95": 191.45599752664566, - "p99": 198.81600141525269 + "p50": 436.0319972038269, + "p90": 449.69600439071655, + "p95": 451.7439901828766, + "p99": 485.6640100479126 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2250752, - "combineLogicalBytes": 2250752, - "fanoutMean": 4.90625, - "recvTokensMax": 31, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 76.83199644088745, - "p90": 101.79200023412704, - "p95": 105.02400249242783, - "p99": 109.31199789047241 + "p50": 316.0000145435333, + "p90": 321.6319978237152, + "p95": 326.55999064445496, + "p99": 339.1680121421814 }, "combine": { - "p50": 73.5040009021759, - "p90": 82.04799890518188, - "p95": 86.40000224113464, - "p99": 88.54400366544724 + "p50": 445.15201449394226, + "p90": 446.78398966789246, + "p95": 448.60801100730896, + "p99": 472.03201055526733 }, "roundtrip": { - "p50": 130.23999333381653, - "p90": 159.39199924468994, - "p95": 161.82400286197662, - "p99": 165.98400473594666 + "p50": 743.2000041007996, + "p90": 750.0799894332886, + "p95": 757.5039863586426, + "p99": 775.7120132446289 }, "isolatedSum": { - "p50": 150.33599734306335, - "p90": 183.83999913930893, - "p95": 191.42400473356247, - "p99": 197.85600155591965 + "p50": 761.1520290374756, + "p90": 768.4159874916077, + "p95": 775.1680016517639, + "p99": 811.2000226974487 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4472832, - "combineLogicalBytes": 4472832, - "fanoutMean": 4.875, - "recvTokensMax": 62, - "stragglerRank": 5, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 96.00000083446503, - "p90": 104.73600029945374, - "p95": 108.51199924945831, - "p99": 115.74400216341019 + "p50": 567.0719742774963, + "p90": 570.9760189056396, + "p95": 573.2799768447876, + "p99": 593.5360193252563 }, "combine": { - "p50": 80.03199845552444, - "p90": 87.23200112581253, - "p95": 88.51200342178345, - "p99": 90.01599997282028 + "p50": 801.7920255661011, + "p90": 805.8239817619324, + "p95": 815.1040077209473, + "p99": 850.6879806518555 }, "roundtrip": { - "p50": 135.1040005683899, - "p90": 161.40800714492798, - "p95": 164.5440012216568, - "p99": 169.50400173664093 + "p50": 1346.336007118225, + "p90": 1356.7359447479248, + "p95": 1364.0960454940796, + "p99": 1429.535984992981 }, "isolatedSum": { - "p50": 176.03199928998947, - "p90": 191.96800142526627, - "p95": 197.02400267124176, - "p99": 205.76000213623047 + "p50": 1368.8639998435974, + "p90": 1376.800000667572, + "p95": 1388.3839845657349, + "p99": 1444.2239999771118 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8888320, - "combineLogicalBytes": 8888320, - "fanoutMean": 4.84375, - "recvTokensMax": 124, - "stragglerRank": 5, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 90.65599739551544, - "p90": 102.75200009346008, - "p95": 105.69600015878677, - "p99": 109.37599837779999 + "p50": 1061.4080429077148, + "p90": 1067.039966583252, + "p95": 1075.32799243927, + "p99": 1103.9040088653564 }, "combine": { - "p50": 81.60000294446945, - "p90": 90.59199690818787, - "p95": 95.32800316810608, - "p99": 97.47199714183807 + "p50": 1503.2000541687012, + "p90": 1515.2640342712402, + "p95": 1526.9759893417358, + "p99": 1554.8160076141357 }, "roundtrip": { - "p50": 145.1839953660965, - "p90": 165.56799411773682, - "p95": 168.5439944267273, - "p99": 174.68799650669098 + "p50": 2543.2960987091064, + "p90": 2558.880090713501, + "p95": 2570.847988128662, + "p99": 2619.1680431365967 }, "isolatedSum": { - "p50": 172.2560003399849, - "p90": 193.34399700164795, - "p95": 201.02400332689285, - "p99": 206.84799551963806 + "p50": 2564.608097076416, + "p90": 2582.304000854492, + "p95": 2602.303981781006, + "p99": 2658.720016479492 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 17733632, - "combineLogicalBytes": 17733632, - "fanoutMean": 4.83203125, - "recvTokensMax": 248, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 101.43999755382538, - "p90": 116.89600348472595, - "p95": 119.77600306272507, - "p99": 138.7840062379837 - }, - "combine": { - "p50": 90.59199690818787, - "p90": 103.35999727249146, - "p95": 104.3199971318245, - "p99": 105.92000186443329 - }, - "roundtrip": { - "p50": 168.7680035829544, - "p90": 185.88800728321075, - "p95": 188.6720061302185, - "p99": 193.37600469589233 - }, - "isolatedSum": { - "p50": 192.03199446201324, - "p90": 220.2560007572174, - "p95": 224.09600019454956, - "p99": 244.704008102417 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 35424256, - "combineLogicalBytes": 35424256, - "fanoutMean": 4.826171875, - "recvTokensMax": 492, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.5920021533966, - "p90": 134.91199910640717, - "p95": 136.9280070066452, - "p99": 143.64799857139587 - }, - "combine": { - "p50": 115.07199704647064, - "p90": 128.63999605178833, - "p95": 130.40000200271606, - "p99": 139.71200585365295 - }, - "roundtrip": { - "p50": 215.5199944972992, - "p90": 233.66400599479675, - "p95": 235.35999655723572, - "p99": 240.12799561023712 - }, - "isolatedSum": { - "p50": 237.66399919986725, - "p90": 263.5519951581955, - "p95": 267.32800900936127, - "p99": 283.3600044250488 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 7, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18189,46 +19327,47 @@ ] }, { - "id": "cx-6d1780ec", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", - "colorKey": "h100_e8b903ea", - "comparisonKey": "0d93a7b7a0fcf6d0", + "id": "cx-238797ce", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", + "colorKey": "b300_c9569580", + "comparisonKey": "c4fbb2dad9521e3e", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:17.527263+00:00", + "generatedAt": "2026-06-26T23:57:38.465863+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_01", - "sku": "h100", + "runner": "b300-nv_13", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -18241,312 +19380,420 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "16babcaf4204243", - "workloadId": "set:8:289b7f9c14292e96", + "traceSignature": "157ca81687ddb63", + "workloadId": "set:3:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.61328125, - "eplbImbalanceAfter": 1.0009114583333334, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271962037", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271962037", - "createdAt": "2026-06-26T23:59:20Z", + "id": "28271869301", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271869301", + "createdAt": "2026-06-26T23:57:38.465863+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 98.55999797582626, - "p90": 106.33599758148193, - "p95": 108.51199924945831, - "p99": 113.21599781513214 + "p50": 93.66399794816971, + "p90": 99.42399710416794, + "p95": 101.24800354242325, + "p99": 112.15999722480774 }, "combine": { - "p50": 79.39200103282928, - "p90": 81.85599744319916, - "p95": 82.56000280380249, - "p99": 87.10400015115738 + "p50": 115.7120019197464, + "p90": 116.54400080442429, + "p95": 117.47200042009354, + "p99": 128.7039965391159 }, "roundtrip": { - "p50": 145.50399780273438, - "p90": 154.7199934720993, - "p95": 156.8640023469925, - "p99": 160.7999950647354 + "p50": 195.3279972076416, + "p90": 199.072003364563, + "p95": 200.57600736618042, + "p99": 214.1440063714981 }, "isolatedSum": { - "p50": 177.95199900865555, - "p90": 188.1919950246811, - "p95": 191.0720020532608, - "p99": 200.31999796628952 + "p50": 209.3759998679161, + "p90": 215.96799790859222, + "p95": 218.72000396251678, + "p99": 240.86399376392365 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 73.60000163316727, - "p90": 108.31999778747559, - "p95": 109.66400057077408, - "p99": 115.13599753379822 + "p50": 193.7599927186966, + "p90": 200.3519982099533, + "p95": 202.94399559497833, + "p99": 209.75999534130096 }, "combine": { - "p50": 72.51200079917908, - "p90": 81.60000294446945, - "p95": 82.36800134181976, - "p99": 87.20000088214874 + "p50": 272.92799949645996, + "p90": 275.04000067710876, + "p95": 275.6800055503845, + "p99": 289.4720137119293 }, "roundtrip": { - "p50": 129.05600666999817, - "p90": 156.47999942302704, - "p95": 160.0639969110489, - "p99": 162.1759980916977 + "p50": 434.5279932022095, + "p90": 444.95999813079834, + "p95": 448.1920003890991, + "p99": 461.37601137161255 }, "isolatedSum": { - "p50": 146.11200243234634, - "p90": 189.92000073194504, - "p95": 192.03200191259384, - "p99": 202.33599841594696 + "p50": 466.68799221515656, + "p90": 475.3919988870621, + "p95": 478.62400114536285, + "p99": 499.2320090532303 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1189888, - "combineLogicalBytes": 1189888, - "fanoutMean": 5.1875, - "recvTokensMax": 12, - "stragglerRank": 7, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 97.120001912117, - "p90": 103.87200117111206, - "p95": 105.66399991512299, - "p99": 110.68800091743469 + "p50": 577.6960253715515, + "p90": 582.6879739761353, + "p95": 584.6400260925293, + "p99": 595.7120060920715 }, "combine": { - "p50": 79.55200225114822, - "p90": 82.20800012350082, - "p95": 86.30400151014328, - "p99": 88.3840024471283 + "p50": 818.336009979248, + "p90": 828.4479975700378, + "p95": 838.3679986000061, + "p99": 852.6399731636047 }, "roundtrip": { - "p50": 151.32799744606018, - "p90": 159.61599349975586, - "p95": 161.15200519561768, - "p99": 167.71200299263 + "p50": 1377.7920007705688, + "p90": 1387.3920440673828, + "p95": 1397.2480297088623, + "p99": 1410.4640483856201 }, "isolatedSum": { - "p50": 176.67200416326523, - "p90": 186.08000129461288, - "p95": 191.96800142526627, - "p99": 199.072003364563 + "p50": 1396.0320353507996, + "p90": 1411.135971546173, + "p95": 1423.0080246925354, + "p99": 1448.3519792556763 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 23, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-20a284d3", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_c9569580", + "comparisonKey": "0484fdcbaa6c315c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:05.756924+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286434915", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286434915", + "createdAt": "2026-06-27T10:26:05.756924+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 96.83199971914291, - "p90": 103.07200253009796, - "p95": 104.47999835014343, - "p99": 111.48799955844879 + "p50": 94.33600306510925, + "p90": 98.33600372076035, + "p95": 101.40799731016159, + "p99": 131.1040073633194 }, "combine": { - "p50": 79.48800176382065, - "p90": 82.49600231647491, - "p95": 87.0399996638298, - "p99": 88.76799792051315 + "p50": 115.99999666213989, + "p90": 117.47200042009354, + "p95": 118.6240017414093, + "p99": 131.071999669075 }, "roundtrip": { - "p50": 152.38399803638458, - "p90": 159.96800363063812, - "p95": 162.20800578594208, - "p99": 166.59200191497803 + "p50": 194.4960057735443, + "p90": 200.70399343967438, + "p95": 203.3279985189438, + "p99": 237.34399676322937 }, "isolatedSum": { - "p50": 176.32000148296356, - "p90": 185.56800484657288, - "p95": 191.51999801397324, - "p99": 200.25599747896194 + "p50": 210.33599972724915, + "p90": 215.80800414085388, + "p95": 220.0319990515709, + "p99": 262.1760070323944 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 96.92800045013428, - "p90": 102.01600193977356, - "p95": 104.76800054311752, - "p99": 113.02399635314941 + "p50": 136.63999736309052, + "p90": 140.1599943637848, + "p95": 141.53599739074707, + "p99": 167.32800006866455 }, "combine": { - "p50": 80.86399734020233, - "p90": 88.3840024471283, - "p95": 89.63199704885483, - "p99": 94.65599805116653 + "p50": 156.70399367809296, + "p90": 165.02399742603302, + "p95": 165.6319946050644, + "p99": 177.50400304794312 }, "roundtrip": { - "p50": 153.21600437164307, - "p90": 159.39199924468994, - "p95": 160.8320027589798, - "p99": 165.3759926557541 + "p50": 273.21600914001465, + "p90": 279.4240117073059, + "p95": 281.2480032444, + "p99": 292.4160063266754 }, "isolatedSum": { - "p50": 177.7919977903366, - "p90": 190.40000438690186, - "p95": 194.39999759197235, - "p99": 207.67999440431595 + "p50": 293.3439910411835, + "p90": 305.1839917898178, + "p95": 307.16799199581146, + "p99": 344.83200311660767 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9605120, - "combineLogicalBytes": 9605120, - "fanoutMean": 5.234375, - "recvTokensMax": 93, - "stragglerRank": 4, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 91.5519967675209, - "p90": 105.27999699115753, - "p95": 106.52799904346466, - "p99": 110.55999994277954 + "p50": 194.4960057735443, + "p90": 202.81599462032318, + "p95": 204.16000485420227, + "p99": 231.455996632576 }, "combine": { - "p50": 81.216000020504, - "p90": 90.17600119113922, - "p95": 94.33600306510925, - "p99": 96.79999947547913 + "p50": 266.59199595451355, + "p90": 275.519996881485, + "p95": 277.3759961128235, + "p99": 302.3679852485657 }, "roundtrip": { - "p50": 144.1279947757721, - "p90": 167.52000153064728, - "p95": 168.99199783802032, - "p99": 173.567995429039 + "p50": 437.6319944858551, + "p90": 447.9359984397888, + "p95": 454.0480077266693, + "p99": 517.6960229873657 }, "isolatedSum": { - "p50": 172.7679967880249, - "p90": 195.45599818229675, - "p95": 200.8640021085739, - "p99": 207.35999941825867 + "p50": 461.08800172805786, + "p90": 478.33599150180817, + "p95": 481.53600096702576, + "p99": 533.8239818811417 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19367936, - "combineLogicalBytes": 19367936, - "fanoutMean": 5.27734375, - "recvTokensMax": 182, - "stragglerRank": 2, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 104.80000078678131, - "p90": 116.35199934244156, - "p95": 118.81600320339203, - "p99": 122.97599762678146 + "p50": 326.55999064445496, + "p90": 330.3360044956207, + "p95": 334.49599146842957, + "p99": 353.15200686454773 }, "combine": { - "p50": 96.38399630784988, - "p90": 104.00000214576721, - "p95": 104.5759990811348, - "p99": 106.4319983124733 + "p50": 459.3279957771301, + "p90": 462.72000670433044, + "p95": 471.0400104522705, + "p99": 533.5680246353149 }, "roundtrip": { - "p50": 177.76000499725342, - "p90": 185.44000387191772, - "p95": 187.16800212860107, - "p99": 190.3039962053299 + "p50": 764.9279832839966, + "p90": 773.1519937515259, + "p95": 777.1520018577576, + "p99": 811.0399842262268 }, "isolatedSum": { - "p50": 201.1839970946312, - "p90": 220.35200148820877, - "p95": 223.39200228452682, - "p99": 229.40799593925476 + "p50": 785.8879864215851, + "p90": 793.0560111999512, + "p95": 805.5360019207001, + "p99": 886.7200314998627 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38535168, - "combineLogicalBytes": 38535168, - "fanoutMean": 5.25, - "recvTokensMax": 358, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 113.56800049543381, - "p90": 131.58400356769562, - "p95": 133.66399705410004, - "p99": 139.96799290180206 + "p50": 574.9120116233826, + "p90": 586.7840051651001, + "p95": 597.0879793167114, + "p99": 678.4639954566956 }, "combine": { - "p50": 106.55999928712845, - "p90": 119.55200135707855, - "p95": 120.09599804878235, - "p99": 121.05599790811539 + "p50": 818.2399868965149, + "p90": 828.7360072135925, + "p95": 832.7360153198242, + "p99": 879.8080086708069 }, "roundtrip": { - "p50": 198.46400618553162, - "p90": 217.6000028848648, - "p95": 218.75199675559998, - "p99": 224.2880016565323 + "p50": 1376.1279582977295, + "p90": 1384.7039937973022, + "p95": 1398.1120586395264, + "p99": 1485.0879907608032 }, "isolatedSum": { - "p50": 220.12799978256226, - "p90": 251.13600492477417, - "p95": 253.75999510288239, - "p99": 261.02399080991745 + "p50": 1393.1519985198975, + "p90": 1415.5200123786926, + "p95": 1429.8239946365356, + "p99": 1558.2720041275024 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76869632, - "combineLogicalBytes": 76869632, - "fanoutMean": 5.236328125, - "recvTokensMax": 688, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1068.3519840240479, + "p90": 1078.0800580978394, + "p95": 1086.4640474319458, + "p99": 1142.624020576477 + }, + "combine": { + "p50": 1529.47199344635, + "p90": 1541.3119792938232, + "p95": 1551.8079996109009, + "p99": 1614.9120330810547 + }, + "roundtrip": { + "p50": 2586.5280628204346, + "p90": 2602.7839183807373, + "p95": 2617.6319122314453, + "p99": 2691.5199756622314 + }, + "isolatedSum": { + "p50": 2597.823977470398, + "p90": 2619.3920373916626, + "p95": 2638.2720470428467, + "p99": 2757.5360536575317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -18555,34 +19802,35 @@ ] }, { - "id": "cx-9d829c00", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h100_552a4b73", - "comparisonKey": "95c165fc74bc43c0", + "id": "cx-330e7a0b", + "identity": "b300|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_307ed708", + "comparisonKey": "669ed990dbfd00e2", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:35.674306+00:00", + "generatedAt": "2026-06-27T09:51:13.255714+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", + "runner": "b300-nv_11", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -18592,9 +19840,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -18607,8 +19855,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:120a8dc1dba92ca9", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -18616,303 +19864,229 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271971983", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271971983", - "createdAt": "2026-06-26T23:59:40Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285680003", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285680003", + "createdAt": "2026-06-27T09:51:13.255714+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.8079993724823, - "p90": 103.10400277376175, - "p95": 104.16000336408615, - "p99": 110.01600325107574 - }, - "combine": { - "p50": 74.33599978685379, - "p90": 81.56800270080566, - "p95": 81.98399841785431, - "p99": 83.29600095748901 - }, - "roundtrip": { - "p50": 142.2719955444336, - "p90": 148.67199957370758, - "p95": 150.4639983177185, - "p99": 154.11199629306793 - }, - "isolatedSum": { - "p50": 170.1439991593361, - "p90": 184.6720054745674, - "p95": 186.14400178194046, - "p99": 193.31200420856476 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.88000327348709, - "p90": 101.98400169610977, - "p95": 102.94400155544281, - "p99": 106.01600259542465 - }, - "combine": { - "p50": 72.4480003118515, - "p90": 81.40800148248672, - "p95": 81.95199817419052, - "p99": 85.7279971241951 - }, - "roundtrip": { - "p50": 128.7039965391159, - "p90": 147.71200716495514, - "p95": 149.59999918937683, - "p99": 152.79999375343323 - }, - "isolatedSum": { - "p50": 143.3280035853386, - "p90": 183.3920031785965, - "p95": 184.89599972963333, - "p99": 191.74399971961975 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 73.18399846553802, - "p90": 93.82399916648865, - "p95": 96.41599655151367, - "p99": 104.99200224876404 + "p50": 93.44000369310379, + "p90": 96.54399752616882, + "p95": 100.09600222110748, + "p99": 102.94400155544281 }, "combine": { - "p50": 70.8480030298233, - "p90": 77.82399654388428, - "p95": 78.59200239181519, - "p99": 83.45600217580795 + "p50": 115.26399850845337, + "p90": 116.09599739313126, + "p95": 117.34399944543839, + "p99": 127.77599692344666 }, "roundtrip": { - "p50": 125.44000148773193, - "p90": 151.74399316310883, - "p95": 154.1759967803955, - "p99": 160.09600460529327 + "p50": 192.06400215625763, + "p90": 198.7520009279251, + "p95": 199.71199333667755, + "p99": 215.68000316619873 }, "isolatedSum": { - "p50": 144.03200149536133, - "p90": 171.64799571037292, - "p95": 175.00799894332886, - "p99": 188.448004424572 + "p50": 208.70400220155716, + "p90": 212.63999491930008, + "p95": 217.44000166654587, + "p99": 230.71999847888947 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 5, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 75.83999633789062, - "p90": 100.22400319576263, - "p95": 102.39999741315842, - "p99": 107.4879989027977 + "p50": 136.54400408267975, + "p90": 139.29599523544312, + "p95": 141.12000167369843, + "p99": 151.10400319099426 }, "combine": { - "p50": 73.18399846553802, - "p90": 81.44000172615051, - "p95": 82.24000036716461, - "p99": 87.23200112581253 + "p50": 162.9440039396286, + "p90": 164.60800170898438, + "p95": 165.18400609493256, + "p99": 178.52799594402313 }, "roundtrip": { - "p50": 126.27199292182922, - "p90": 154.88000214099884, - "p95": 157.47199952602386, - "p99": 159.4880074262619 + "p50": 271.84000611305237, + "p90": 277.75999903678894, + "p95": 280.0639867782593, + "p99": 295.48799991607666 }, "isolatedSum": { - "p50": 149.02399480342865, - "p90": 181.66400492191315, - "p95": 184.63999778032303, - "p99": 194.72000002861023 + "p50": 299.48800802230835, + "p90": 303.9039969444275, + "p95": 306.304007768631, + "p99": 329.6319991350174 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 5, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 79.3600007891655, - "p90": 100.0640019774437, - "p95": 123.80799651145935, - "p99": 229.76000607013702 + "p50": 192.7040070295334, + "p90": 198.7520009279251, + "p95": 200.95999538898468, + "p99": 214.27200734615326 }, "combine": { - "p50": 73.88799637556076, - "p90": 82.2720006108284, - "p95": 83.36000144481659, - "p99": 89.28000181913376 + "p50": 264.8960053920746, + "p90": 274.27199482917786, + "p95": 274.87999200820923, + "p99": 286.3039970397949 }, "roundtrip": { - "p50": 130.17599284648895, - "p90": 154.62400019168854, - "p95": 157.3760062456131, - "p99": 162.7199947834015 + "p50": 443.36000084877014, + "p90": 448.86401295661926, + "p95": 453.0560076236725, + "p99": 460.640013217926 }, "isolatedSum": { - "p50": 153.24799716472626, - "p90": 182.3360025882721, - "p95": 207.16799795627594, - "p99": 319.0400078892708 + "p50": 457.60001242160797, + "p90": 473.02399575710297, + "p95": 475.8399873971939, + "p99": 500.5760043859482 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 5, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 88.44800293445587, - "p90": 103.71199995279312, - "p95": 105.76000064611435, - "p99": 110.1439967751503 + "p50": 326.84800028800964, + "p90": 329.75998520851135, + "p95": 331.36001229286194, + "p99": 340.9599959850311 }, "combine": { - "p50": 81.60000294446945, - "p90": 89.6959975361824, - "p95": 90.27200192213058, - "p99": 91.80799871683121 + "p50": 458.97600054740906, + "p90": 462.46400475502014, + "p95": 470.335990190506, + "p99": 474.36800599098206 }, "roundtrip": { - "p50": 141.34399592876434, - "p90": 161.98399662971497, - "p95": 163.455992937088, - "p99": 169.24799978733063 + "p50": 764.2880082130432, + "p90": 772.0639705657959, + "p95": 773.5360264778137, + "p99": 783.8079929351807 }, "isolatedSum": { - "p50": 170.04800587892532, - "p90": 193.40799748897552, - "p95": 196.03200256824493, - "p99": 201.9519954919815 + "p50": 785.8240008354187, + "p90": 792.2239899635315, + "p95": 801.6960024833679, + "p99": 815.3280019760132 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 1, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 102.36799716949463, - "p90": 119.6800023317337, - "p95": 121.31199985742569, - "p99": 123.77600371837616 + "p50": 575.9680271148682, + "p90": 583.1040143966675, + "p95": 584.6719741821289, + "p99": 595.4880118370056 }, "combine": { - "p50": 89.9839997291565, - "p90": 96.03200107812881, - "p95": 99.48799759149551, - "p99": 102.04800218343735 + "p50": 817.6640272140503, + "p90": 827.7760148048401, + "p95": 828.2240033149719, + "p99": 840.1280045509338 }, "roundtrip": { - "p50": 165.69599509239197, - "p90": 182.43199586868286, - "p95": 184.1599941253662, - "p99": 187.51999735832214 + "p50": 1376.7679929733276, + "p90": 1384.5759630203247, + "p95": 1390.3679847717285, + "p99": 1429.6319484710693 }, "isolatedSum": { - "p50": 192.35199689865112, - "p90": 215.71200340986252, - "p95": 220.7999974489212, - "p99": 225.8240059018135 + "p50": 1393.6320543289185, + "p90": 1410.8800292015076, + "p95": 1412.8959774971008, + "p99": 1435.6160163879395 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 5, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 125.91999769210815, - "p90": 144.70399916172028, - "p95": 145.9520012140274, - "p99": 148.00000190734863 + "p50": 1069.6320533752441, + "p90": 1077.6959657669067, + "p95": 1080.1600217819214, + "p99": 1091.4560556411743 }, "combine": { - "p50": 114.56000059843063, - "p90": 119.99999731779099, - "p95": 122.30399996042252, - "p99": 126.91199779510498 + "p50": 1529.0240049362183, + "p90": 1540.2239561080933, + "p95": 1541.0560369491577, + "p99": 1551.5199899673462 }, "roundtrip": { - "p50": 218.9760059118271, - "p90": 233.63199830055237, - "p95": 235.1360023021698, - "p99": 238.304004073143 + "p50": 2583.616018295288, + "p90": 2593.696117401123, + "p95": 2599.3599891662598, + "p99": 2626.4960765838623 }, "isolatedSum": { - "p50": 240.4799982905388, - "p90": 264.70399647951126, - "p95": 268.2560011744499, - "p99": 274.9119997024536 + "p50": 2598.6560583114624, + "p90": 2617.919921875, + "p95": 2621.216058731079, + "p99": 2642.9760456085205 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -18921,46 +20095,47 @@ ] }, { - "id": "cx-c61b6088", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h100_106a51ab", - "comparisonKey": "6643ae5a97d68820", + "id": "cx-d4f1db50", + "identity": "b300|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_c9569580", + "comparisonKey": "70142fedc425dd51", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:43.354862+00:00", + "generatedAt": "2026-06-27T11:14:26.079004+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "label": "B300 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -18973,313 +20148,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:120a8dc1dba92ca9", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271975554", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271975554", - "createdAt": "2026-06-26T23:59:47Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287503016", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503016", + "createdAt": "2026-06-27T11:14:26.079004+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 69.72800195217133, - "p90": 76.7040029168129, - "p95": 82.24000036716461, - "p99": 100.09600222110748 + "p50": 1799.6480464935303, + "p90": 2024.2879390716553, + "p95": 2855.3919792175293, + "p99": 3412.2560024261475 }, "combine": { - "p50": 70.78400254249573, - "p90": 73.11999797821045, - "p95": 73.53600114583969, - "p99": 78.3040001988411 + "p50": 1812.8000497817993, + "p90": 1949.5359659194946, + "p95": 2620.09596824646, + "p99": 2830.048084259033 }, "roundtrip": { - "p50": 124.35200065374374, - "p90": 129.88799810409546, - "p95": 131.20000064373016, - "p99": 137.40800321102142 + "p50": 1900.1920223236084, + "p90": 2016.5760517120361, + "p95": 2611.488103866577, + "p99": 3049.344062805176 }, "isolatedSum": { - "p50": 140.51200449466705, - "p90": 149.82400089502335, - "p95": 155.7760015130043, - "p99": 178.40000241994858 + "p50": 3612.4480962753296, + "p90": 3973.82390499115, + "p95": 5475.487947463989, + "p99": 6242.304086685181 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 69.92000341415405, - "p90": 77.79199630022049, - "p95": 80.19199967384338, - "p99": 96.19200229644775 + "p50": 1876.1919736862183, + "p90": 2189.120054244995, + "p95": 2922.816038131714, + "p99": 3402.240037918091 }, "combine": { - "p50": 71.16799801588058, - "p90": 73.27999919652939, - "p95": 73.85600358247757, - "p99": 78.94399762153625 + "p50": 1860.6079816818237, + "p90": 1970.52800655365, + "p95": 2403.167963027954, + "p99": 2977.8881072998047 }, "roundtrip": { - "p50": 126.94400548934937, - "p90": 130.91200590133667, - "p95": 132.1280002593994, - "p99": 138.33600282669067 + "p50": 1979.2640209197998, + "p90": 2097.536087036133, + "p95": 2794.1761016845703, + "p99": 3157.9198837280273 }, "isolatedSum": { - "p50": 141.08800143003464, - "p90": 151.07199549674988, - "p95": 154.04800325632095, - "p99": 175.135999917984 + "p50": 3736.799955368042, + "p90": 4159.648060798645, + "p95": 5325.984001159668, + "p99": 6380.1281452178955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 74.07999783754349, - "p90": 101.34399682283401, - "p95": 103.13600301742554, - "p99": 111.39199882745743 + "p50": 1976.0639667510986, + "p90": 2366.368055343628, + "p95": 2979.0399074554443, + "p99": 3521.440029144287 }, "combine": { - "p50": 72.9919970035553, - "p90": 82.0159986615181, - "p95": 87.00799942016602, - "p99": 89.31200206279755 + "p50": 1994.1760301589966, + "p90": 2153.6319255828857, + "p95": 2808.351993560791, + "p99": 3210.304021835327 }, "roundtrip": { - "p50": 131.32800161838531, - "p90": 158.59200060367584, - "p95": 163.13600540161133, - "p99": 169.69600319862366 + "p50": 2184.7360134124756, + "p90": 2389.280080795288, + "p95": 3086.7199897766113, + "p99": 3524.319887161255 }, "isolatedSum": { - "p50": 147.07199484109879, - "p90": 183.3599954843521, - "p95": 190.14400243759155, - "p99": 200.70400089025497 + "p50": 3970.239996910095, + "p90": 4519.999980926514, + "p95": 5787.391901016235, + "p99": 6731.744050979614 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 4, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 75.6480023264885, - "p90": 100.76799988746643, - "p95": 102.01600193977356, - "p99": 105.95200210809708 + "p50": 2102.2400856018066, + "p90": 2479.5520305633545, + "p95": 3182.1439266204834, + "p99": 4024.6081352233887 }, "combine": { - "p50": 72.9919970035553, - "p90": 79.68000322580338, - "p95": 80.6720033288002, - "p99": 85.88799834251404 + "p50": 2238.5919094085693, + "p90": 2511.5840435028076, + "p95": 3066.6239261627197, + "p99": 3605.247974395752 }, "roundtrip": { - "p50": 129.63199615478516, - "p90": 154.91199493408203, - "p95": 156.47999942302704, - "p99": 159.96800363063812 + "p50": 2536.7679595947266, + "p90": 2645.951986312866, + "p95": 3478.5280227661133, + "p99": 4007.6160430908203 }, "isolatedSum": { - "p50": 148.6399993300438, - "p90": 180.4480031132698, - "p95": 182.68800526857376, - "p99": 191.84000045061111 + "p50": 4340.831995010376, + "p90": 4991.136074066162, + "p95": 6248.767852783203, + "p99": 7629.856109619141 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 82.49600231647491, - "p90": 100.73599964380264, - "p95": 103.04000228643417, - "p99": 106.81600123643875 + "p50": 2352.7679443359375, + "p90": 2601.088047027588, + "p95": 3376.3840198516846, + "p99": 4238.1439208984375 }, "combine": { - "p50": 74.36800003051758, - "p90": 87.0399996638298, - "p95": 87.90399879217148, - "p99": 89.63199704885483 + "p50": 2585.2479934692383, + "p90": 2841.9840335845947, + "p95": 3667.9999828338623, + "p99": 4010.7522010803223 }, "roundtrip": { - "p50": 132.38400220870972, - "p90": 161.02400422096252, - "p95": 162.81600296497345, - "p99": 166.72000288963318 + "p50": 3136.607885360718, + "p90": 3412.1599197387695, + "p95": 4064.095973968506, + "p99": 6203.680038452148 }, "isolatedSum": { - "p50": 156.8640023469925, - "p90": 187.77599930763245, - "p95": 190.94400107860565, - "p99": 196.44799828529358 + "p50": 4938.015937805176, + "p90": 5443.072080612183, + "p95": 7044.384002685547, + "p99": 8248.89612197876 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 3, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 90.30400216579437, - "p90": 103.32799702882767, - "p95": 104.35199737548828, - "p99": 109.6000000834465 + "p50": 2850.5918979644775, + "p90": 3381.5360069274902, + "p95": 3976.288080215454, + "p99": 5621.503829956055 }, "combine": { - "p50": 81.31200075149536, - "p90": 89.75999802350998, - "p95": 90.43200314044952, - "p99": 91.61599725484848 + "p50": 3287.7440452575684, + "p90": 3433.759927749634, + "p95": 3676.8319606781006, + "p99": 4466.11213684082 }, "roundtrip": { - "p50": 142.20799505710602, - "p90": 158.65600109100342, - "p95": 161.50400042533875, - "p99": 167.39200055599213 + "p50": 4338.784217834473, + "p90": 4467.199802398682, + "p95": 4870.207786560059, + "p99": 5583.968162536621 }, "isolatedSum": { - "p50": 171.61600291728973, - "p90": 193.08799505233765, - "p95": 194.7840005159378, - "p99": 201.21599733829498 + "p50": 6138.335943222046, + "p90": 6815.295934677124, + "p95": 7653.120040893555, + "p99": 10087.615966796875 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 96.89600020647049, - "p90": 116.60800129175186, - "p95": 118.43200027942657, - "p99": 124.32000041007996 - }, - "combine": { - "p50": 90.30400216579437, - "p90": 103.32799702882767, - "p95": 103.74400019645691, - "p99": 104.25599664449692 - }, - "roundtrip": { - "p50": 162.08000481128693, - "p90": 178.8800060749054, - "p95": 181.85600638389587, - "p99": 186.49600446224213 - }, - "isolatedSum": { - "p50": 187.20000237226486, - "p90": 219.93599832057953, - "p95": 222.17600047588348, - "p99": 228.57599705457687 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.07999759912491, - "p90": 135.3279948234558, - "p95": 138.2399946451187, - "p99": 140.57600498199463 - }, - "combine": { - "p50": 106.84800148010254, - "p90": 119.45600062608719, - "p95": 119.74400281906128, - "p99": 120.54400146007538 - }, - "roundtrip": { - "p50": 198.84799420833588, - "p90": 216.2880003452301, - "p95": 219.67999637126923, - "p99": 221.47199511528015 - }, - "isolatedSum": { - "p50": 224.92799907922745, - "p90": 254.783995449543, - "p95": 257.98399746418, - "p99": 261.12000644207 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 5, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19287,46 +20388,47 @@ ] }, { - "id": "cx-a38d13e8", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h100_769b9c4b", - "comparisonKey": "115d84ad1ee38d09", + "id": "cx-0ef62f98", + "identity": "b300|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_307ed708", + "comparisonKey": "6ef04ab36d1b6989", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:11.807854+00:00", + "generatedAt": "2026-06-27T09:51:40.258532+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "runner": "b300-nv_08", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "label": "B300 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -19339,313 +20441,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271948775", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775", - "createdAt": "2026-06-26T23:58:53Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285690957", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285690957", + "createdAt": "2026-06-27T09:51:40.258532+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 70.39999961853027, - "p90": 100.832000374794, - "p95": 105.56799918413162, - "p99": 192.73599982261658 + "p50": 94.14400160312653, + "p90": 98.4639972448349, + "p95": 102.94400155544281, + "p99": 110.91200262308121 }, "combine": { - "p50": 73.18399846553802, - "p90": 88.44800293445587, - "p95": 188.38399648666382, - "p99": 344.2560136318207 + "p50": 115.26399850845337, + "p90": 116.12799763679504, + "p95": 117.60000139474869, + "p99": 127.23200023174286 }, "roundtrip": { - "p50": 123.77600371837616, - "p90": 133.08799266815186, - "p95": 149.4400054216385, - "p99": 156.12800419330597 + "p50": 192.86400079727173, + "p90": 199.45600628852844, + "p95": 202.07999646663666, + "p99": 214.78399634361267 }, "isolatedSum": { - "p50": 143.5839980840683, - "p90": 189.28000330924988, - "p95": 293.95199567079544, - "p99": 536.9920134544373 + "p50": 209.4080001115799, + "p90": 214.59199488162994, + "p95": 220.5440029501915, + "p99": 238.14400285482407 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 68.38399916887283, - "p90": 75.71200281381607, - "p95": 77.11999863386154, - "p99": 95.61599791049957 + "p50": 131.74399733543396, + "p90": 138.7840062379837, + "p95": 141.184002161026, + "p99": 154.4319987297058 }, "combine": { - "p50": 71.29599899053574, - "p90": 73.44000041484833, - "p95": 74.36800003051758, - "p99": 82.2720006108284 + "p50": 161.85599565505981, + "p90": 164.2560064792633, + "p95": 164.99200463294983, + "p99": 175.04000663757324 }, "roundtrip": { - "p50": 126.68800354003906, - "p90": 130.87999820709229, - "p95": 133.56800377368927, - "p99": 142.59199798107147 + "p50": 276.5760123729706, + "p90": 284.31999683380127, + "p95": 288.4159982204437, + "p99": 299.80799555778503 }, "isolatedSum": { - "p50": 139.67999815940857, - "p90": 149.1520032286644, - "p95": 151.48799866437912, - "p99": 177.88799852132797 + "p50": 293.5999929904938, + "p90": 303.040012717247, + "p95": 306.17600679397583, + "p99": 329.47200536727905 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.54400104284286, - "p90": 99.2640033364296, - "p95": 102.08000242710114, - "p99": 107.39199817180634 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 79.71200346946716, - "p95": 84.22400057315826, - "p99": 87.39200234413147 - }, - "roundtrip": { - "p50": 130.23999333381653, - "p90": 156.41599893569946, - "p95": 160.22400557994843, - "p99": 165.53600132465363 - }, - "isolatedSum": { - "p50": 145.53599804639816, - "p90": 178.97600680589676, - "p95": 186.3040030002594, - "p99": 194.7840005159378 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 96.19200229644775, - "p90": 109.56799983978271, - "p95": 112.73600161075592, - "p99": 155.87200224399567 + "p50": 192.60799884796143, + "p90": 199.52000677585602, + "p95": 202.43200659751892, + "p99": 214.23999965190887 }, "combine": { - "p50": 75.45600086450577, - "p90": 88.06400001049042, - "p95": 89.4400030374527, - "p99": 97.37599641084671 + "p50": 265.28000831604004, + "p90": 274.4640111923218, + "p95": 275.1680016517639, + "p99": 287.1679961681366 }, "roundtrip": { - "p50": 130.94399869441986, - "p90": 154.4319987297058, - "p95": 156.44800662994385, - "p99": 176.67199671268463 + "p50": 434.7200095653534, + "p90": 443.3920085430145, + "p95": 447.1360146999359, + "p99": 463.00798654556274 }, "isolatedSum": { - "p50": 171.64800316095352, - "p90": 197.63199985027313, - "p95": 202.17600464820862, - "p99": 253.24799865484238 + "p50": 457.88800716400146, + "p90": 473.9840179681778, + "p95": 477.60000824928284, + "p99": 501.40799582004547 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 82.97599852085114, - "p90": 100.16000270843506, - "p95": 103.55199873447418, - "p99": 106.72000050544739 - }, - "combine": { - "p50": 74.14399832487106, - "p90": 87.3280018568039, - "p95": 88.95999938249588, - "p99": 89.82399851083755 - }, - "roundtrip": { - "p50": 131.6480040550232, - "p90": 158.9760035276413, - "p95": 161.31199896335602, - "p99": 166.78400337696075 - }, - "isolatedSum": { - "p50": 157.1199968457222, - "p90": 187.48800456523895, - "p95": 192.51199811697006, - "p99": 196.54399901628494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 90.30400216579437, - "p90": 105.6319996714592, - "p95": 106.6880002617836, - "p99": 111.04000359773636 + "p50": 326.9760012626648, + "p90": 330.27198910713196, + "p95": 331.36001229286194, + "p99": 341.8239951133728 }, "combine": { - "p50": 80.99199831485748, - "p90": 89.15200084447861, - "p95": 89.88799899816513, - "p99": 90.91199934482574 + "p50": 458.3039879798889, + "p90": 462.3039960861206, + "p95": 470.2720046043396, + "p99": 482.7840030193329 }, "roundtrip": { - "p50": 142.17600226402283, - "p90": 157.6640009880066, - "p95": 160.44799983501434, - "p99": 164.8319959640503 + "p50": 764.2560005187988, + "p90": 772.1920013427734, + "p95": 775.4560112953186, + "p99": 788.320004940033 }, "isolatedSum": { - "p50": 171.29600048065186, - "p90": 194.7840005159378, - "p95": 196.57599925994873, - "p99": 201.9520029425621 + "p50": 785.2799892425537, + "p90": 792.5759851932526, + "p95": 801.6320168972015, + "p99": 824.6079981327057 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 2, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 95.74399888515472, - "p90": 116.2559986114502, - "p95": 121.98399752378464, - "p99": 398.6560106277466 + "p50": 567.9680109024048, + "p90": 572.2560286521912, + "p95": 577.9520273208618, + "p99": 588.7359976768494 }, "combine": { - "p50": 90.20800143480301, - "p90": 101.1200025677681, - "p95": 104.25599664449692, - "p99": 111.55200004577637 + "p50": 807.4560165405273, + "p90": 816.864013671875, + "p95": 826.2720108032227, + "p99": 877.1520256996155 }, "roundtrip": { - "p50": 160.76800227165222, - "p90": 181.536003947258, - "p95": 185.37600338459015, - "p99": 188.35200369358063 + "p50": 1359.0079545974731, + "p90": 1367.6799535751343, + "p95": 1373.7280368804932, + "p99": 1425.5039691925049 }, "isolatedSum": { - "p50": 185.95200031995773, - "p90": 217.3760011792183, - "p95": 226.23999416828156, - "p99": 510.20801067352295 + "p50": 1375.4240274429321, + "p90": 1389.1200423240662, + "p95": 1404.2240381240845, + "p99": 1465.8880233764648 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 114.3679991364479, - "p90": 133.44000279903412, - "p95": 137.31199502944946, - "p99": 142.7839994430542 + "p50": 1064.0640258789062, + "p90": 1069.1200494766235, + "p95": 1075.103998184204, + "p99": 1101.088047027588 }, "combine": { - "p50": 108.15999656915665, - "p90": 120.2239990234375, - "p95": 121.24799937009811, - "p99": 123.99999797344208 + "p50": 1516.2559747695923, + "p90": 1527.4560451507568, + "p95": 1529.4400453567505, + "p99": 1576.3520002365112 }, "roundtrip": { - "p50": 199.35999810695648, - "p90": 217.31199324131012, - "p95": 220.15999257564545, - "p99": 380.8319866657257 + "p50": 2562.78395652771, + "p90": 2572.5440979003906, + "p95": 2577.984094619751, + "p99": 2608.351945877075 }, "isolatedSum": { - "p50": 222.52799570560455, - "p90": 253.66400182247162, - "p95": 258.5599943995476, - "p99": 266.7839974164963 + "p50": 2580.3200006484985, + "p90": 2596.5760946273804, + "p95": 2604.5440435409546, + "p99": 2677.440047264099 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 4, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19653,48 +20681,49 @@ ] }, { - "id": "cx-4ad32f1a", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "2a087c80bac58077", + "id": "cx-1f1575ee", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_77566238", + "comparisonKey": "89f8d104edbb2508", "schemaVersion": 3, - "generatedAt": "2026-06-26T15:27:59.966964+00:00", + "generatedAt": "2026-06-27T09:48:40.157886+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", + "runner": "b300-nv_16", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "unknown", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, @@ -19705,8 +20734,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -19714,155 +20743,229 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28247603308", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308", - "createdAt": "2026-06-26T15:22:55Z", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + "id": "28285615307", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285615307", + "createdAt": "2026-06-27T09:48:40.157886+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 96.73599898815155, - "p90": 102.49599814414978, - "p95": 104.12800312042236, - "p99": 112.19199746847153 + "p50": 1811.2640380859375, + "p90": 2052.7360439300537, + "p95": 2767.9359912872314, + "p99": 3486.1440658569336 }, "combine": { - "p50": 79.42400127649307, - "p90": 81.4720019698143, - "p95": 82.14399963617325, - "p99": 87.93599903583527 + "p50": 1848.6720323562622, + "p90": 1981.9200038909912, + "p95": 2632.8959465026855, + "p99": 3014.080047607422 }, "roundtrip": { - "p50": 146.84799313545227, - "p90": 156.15999698638916, - "p95": 159.13599729537964, - "p99": 164.000004529953 + "p50": 1926.3039827346802, + "p90": 2019.2639827728271, + "p95": 2607.0079803466797, + "p99": 3037.4081134796143 }, "isolatedSum": { - "p50": 176.16000026464462, - "p90": 183.96800011396408, - "p95": 186.2720027565956, - "p99": 200.1279965043068 + "p50": 3659.9360704421997, + "p90": 4034.656047821045, + "p95": 5400.831937789917, + "p99": 6500.2241134643555 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 98.33600372076035, - "p90": 103.93600165843964, - "p95": 106.52799904346466, - "p99": 111.58400028944016 + "p50": 1909.9199771881104, + "p90": 2291.3599014282227, + "p95": 2951.96795463562, + "p99": 4049.7918128967285 }, "combine": { - "p50": 80.03199845552444, - "p90": 86.84799820184708, - "p95": 87.61599659919739, - "p99": 88.06400001049042 + "p50": 1909.9839925765991, + "p90": 2116.7359352111816, + "p95": 2735.680103302002, + "p99": 3026.4639854431152 }, "roundtrip": { - "p50": 151.64799988269806, - "p90": 159.16800498962402, - "p95": 160.35200655460358, - "p99": 165.50399363040924 + "p50": 2060.3199005126953, + "p90": 2157.792091369629, + "p95": 2832.7999114990234, + "p99": 3228.3198833465576 }, "isolatedSum": { - "p50": 178.3680021762848, - "p90": 190.7839998602867, - "p95": 194.14399564266205, - "p99": 199.64800029993057 + "p50": 3819.9039697647095, + "p90": 4408.095836639404, + "p95": 5687.648057937622, + "p99": 7076.255798339844 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 99.90400075912476, - "p90": 105.76000064611435, - "p95": 108.15999656915665, - "p99": 116.60800129175186 + "p50": 2026.7200469970703, + "p90": 2262.399911880493, + "p95": 2992.89608001709, + "p99": 3506.0160160064697 }, "combine": { - "p50": 87.90399879217148, - "p90": 90.55999666452408, - "p95": 95.23200243711472, - "p99": 96.57599776983261 + "p50": 2108.9279651641846, + "p90": 2252.255916595459, + "p95": 2964.672088623047, + "p99": 3763.808012008667 }, "roundtrip": { - "p50": 157.82399475574493, - "p90": 163.7759953737259, - "p95": 166.78400337696075, - "p99": 169.95200514793396 + "p50": 2335.0400924682617, + "p90": 2459.1360092163086, + "p95": 3039.2000675201416, + "p99": 3627.135992050171 }, "isolatedSum": { - "p50": 187.80799955129623, - "p90": 196.31999731063843, - "p95": 203.39199900627136, - "p99": 213.18399906158447 + "p50": 4135.648012161255, + "p90": 4514.655828475952, + "p95": 5957.568168640137, + "p99": 7269.824028015137 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 128.60800325870514, - "p90": 133.53599607944489, - "p95": 135.51999628543854, - "p99": 138.49599659442902 + "p50": 2215.167999267578, + "p90": 2474.047899246216, + "p95": 2963.9999866485596, + "p99": 3755.0079822540283 }, "combine": { - "p50": 112.57600039243698, - "p90": 120.4800009727478, - "p95": 120.7680031657219, - "p99": 122.40000069141388 + "p50": 2386.8160247802734, + "p90": 2521.951913833618, + "p95": 3310.7199668884277, + "p99": 3616.895914077759 }, "roundtrip": { - "p50": 208.3519995212555, - "p90": 215.71199595928192, - "p95": 217.56799519062042, - "p99": 220.5439954996109 + "p50": 2777.695894241333, + "p90": 2873.3439445495605, + "p95": 3295.2001094818115, + "p99": 4089.024066925049 }, "isolatedSum": { - "p50": 241.18400365114212, - "p90": 254.0159970521927, - "p95": 256.28799945116043, - "p99": 260.8959972858429 + "p50": 4601.984024047852, + "p90": 4995.999813079834, + "p95": 6274.719953536987, + "p99": 7371.903896331787 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2534.015893936157, + "p90": 2614.207983016968, + "p95": 3331.199884414673, + "p99": 3946.6240406036377 + }, + "combine": { + "p50": 2894.8159217834473, + "p90": 2969.0239429473877, + "p95": 3296.128034591675, + "p99": 4143.392086029053 + }, + "roundtrip": { + "p50": 3649.6639251708984, + "p90": 3799.5200157165527, + "p95": 4219.871997833252, + "p99": 4852.320194244385 + }, + "isolatedSum": { + "p50": 5428.8318157196045, + "p90": 5583.2319259643555, + "p95": 6627.327919006348, + "p99": 8090.01612663269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3252.351999282837, + "p90": 3331.104040145874, + "p95": 3698.4639167785645, + "p99": 4560.927867889404 + }, + "combine": { + "p50": 3938.591957092285, + "p90": 4131.968021392822, + "p95": 4414.432048797607, + "p99": 5301.055908203125 + }, + "roundtrip": { + "p50": 5385.6000900268555, + "p90": 5495.0079917907715, + "p95": 6258.880138397217, + "p99": 6821.216106414795 + }, + "isolatedSum": { + "p50": 7190.943956375122, + "p90": 7463.072061538696, + "p95": 8112.895965576172, + "p99": 9861.98377609253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -19871,34 +20974,35 @@ ] }, { - "id": "cx-b5d97134", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.1|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", + "id": "cx-a989dada", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", + "colorKey": "b300_77566238", + "comparisonKey": "0cdc743c580a47d3", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:27:16.815311+00:00", + "generatedAt": "2026-06-26T23:58:19.169974+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -19907,14 +21011,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.1, - "achievedFraction": 0.0985, - "configuredUnits": 13, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -19923,8 +21027,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "9e6ac678a09f7f8", + "workloadId": "set:3:2dad1a73ff872905", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -19932,156 +21036,119 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254271442", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254271442", - "createdAt": "2026-06-26T17:26:00Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271876366", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271876366", + "createdAt": "2026-06-26T23:58:19.169974+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.24800288677216, - "p90": 103.39199751615524, - "p95": 105.8880016207695, - "p99": 111.13599687814713 - }, - "combine": { - "p50": 78.84799689054489, - "p90": 81.727996468544, - "p95": 85.11999994516373, - "p99": 89.02399986982346 - }, - "roundtrip": { - "p50": 151.36000514030457, - "p90": 157.53600001335144, - "p95": 159.67999398708344, - "p99": 164.63999450206757 - }, - "isolatedSum": { - "p50": 176.09599977731705, - "p90": 185.11999398469925, - "p95": 191.00800156593323, - "p99": 200.15999674797058 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 99.29600358009338, - "p90": 104.70400005578995, - "p95": 106.72000050544739, - "p99": 113.53600025177002 + "p50": 1816.2239789962769, + "p90": 2297.152042388916, + "p95": 2896.320104598999, + "p99": 3506.6559314727783 }, "combine": { - "p50": 79.58400249481201, - "p90": 86.97599917650223, - "p95": 87.39200234413147, - "p99": 91.5519967675209 + "p50": 1859.1680526733398, + "p90": 2047.4560260772705, + "p95": 2707.1681022644043, + "p99": 3027.2960662841797 }, "roundtrip": { - "p50": 153.85599434375763, - "p90": 161.28000617027283, - "p95": 162.432000041008, - "p99": 166.07999801635742 + "p50": 1932.8960180282593, + "p90": 2138.335943222046, + "p95": 2772.9599475860596, + "p99": 3193.279981613159 }, "isolatedSum": { - "p50": 178.8800060749054, - "p90": 191.67999923229218, - "p95": 194.11200284957886, - "p99": 205.08799701929092 + "p50": 3675.3920316696167, + "p90": 4344.6080684661865, + "p95": 5603.488206863403, + "p99": 6533.951997756958 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 103.29599678516388, - "p90": 107.64800012111664, - "p95": 109.98400300741196, - "p99": 121.40800058841705 + "p50": 2029.6320915222168, + "p90": 2355.0078868865967, + "p95": 3023.6799716949463, + "p99": 3532.543897628784 }, "combine": { - "p50": 87.74399757385254, - "p90": 95.20000219345093, - "p95": 95.48799693584442, - "p99": 97.18400239944458 + "p50": 2128.671884536743, + "p90": 2460.576057434082, + "p95": 3003.5200119018555, + "p99": 3345.4079627990723 }, "roundtrip": { - "p50": 161.6639941930771, - "p90": 169.50400173664093, - "p95": 170.9440052509308, - "p99": 175.52000284194946 + "p50": 2337.8241062164307, + "p90": 2708.159923553467, + "p95": 3375.744104385376, + "p99": 3673.952102661133 }, "isolatedSum": { - "p50": 191.03999435901642, - "p90": 202.84800231456757, - "p95": 205.47199994325638, - "p99": 218.59200298786163 + "p50": 4158.30397605896, + "p90": 4815.583944320679, + "p95": 6027.199983596802, + "p99": 6877.951860427856 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 129.66400384902954, - "p90": 137.79200613498688, - "p95": 139.55199718475342, - "p99": 143.93599331378937 + "p50": 2545.1838970184326, + "p90": 2883.19993019104, + "p95": 3424.1280555725098, + "p99": 3852.544069290161 }, "combine": { - "p50": 113.72800171375275, - "p90": 120.15999853610992, - "p95": 120.83200365304947, - "p99": 123.55200201272964 + "p50": 2903.520107269287, + "p90": 3124.959945678711, + "p95": 3718.2400226593018, + "p99": 4377.791881561279 }, "roundtrip": { - "p50": 211.776003241539, - "p90": 217.21599996089935, - "p95": 218.9439982175827, - "p99": 222.75200486183167 + "p50": 3660.6719493865967, + "p90": 3928.3199310302734, + "p95": 4631.743907928467, + "p99": 5148.064136505127 }, "isolatedSum": { - "p50": 243.3920055627823, - "p90": 257.9520046710968, - "p95": 260.3840008378029, - "p99": 267.487995326519 + "p50": 5448.70400428772, + "p90": 6008.159875869751, + "p95": 7142.3680782318115, + "p99": 8230.33595085144 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -20089,34 +21156,35 @@ ] }, { - "id": "cx-2f9f6948", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", + "id": "cx-092ff174", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_a314501b", + "comparisonKey": "c51826952291f0ba", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:02.253264+00:00", + "generatedAt": "2026-06-26T23:57:58.409823+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "runner": "b300-nv_14", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -20125,14 +21193,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -20141,8 +21209,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "7aa44c7b86748b9", + "workloadId": "set:3:388ff74baef05c72", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -20150,304 +21218,412 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254315809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", - "createdAt": "2026-06-26T17:26:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271883343", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271883343", + "createdAt": "2026-06-26T23:57:58.409823+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 95.74399888515472, - "p90": 102.78400033712387, - "p95": 104.99200224876404, - "p99": 109.37599837779999 + "p50": 69.37599927186966, + "p90": 71.03999704122543, + "p95": 73.37599992752075, + "p99": 81.69600367546082 }, "combine": { - "p50": 79.32800054550171, - "p90": 82.07999914884567, - "p95": 82.87999778985977, - "p99": 88.03199976682663 + "p50": 67.61600077152252, + "p90": 69.60000097751617, + "p95": 77.02399790287018, + "p99": 83.39200168848038 }, "roundtrip": { - "p50": 147.74399995803833, - "p90": 154.6880006790161, - "p95": 157.44000673294067, - "p99": 171.9360053539276 + "p50": 119.93599683046341, + "p90": 126.01600587368011, + "p95": 128.48000228405, + "p99": 135.55200397968292 }, "isolatedSum": { - "p50": 175.07199943065643, - "p90": 184.86399948596954, - "p95": 187.8720000386238, - "p99": 197.40799814462662 + "p50": 136.99200004339218, + "p90": 140.6399980187416, + "p95": 150.39999783039093, + "p99": 165.0880053639412 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 71.23199850320816, - "p90": 101.27999633550644, - "p95": 102.52799838781357, - "p99": 107.87200182676315 + "p50": 93.98400038480759, + "p90": 98.68799895048141, + "p95": 100.28800368309021, + "p99": 105.72800040245056 }, "combine": { - "p50": 72.22399860620499, - "p90": 80.92799782752991, - "p95": 81.44000172615051, - "p99": 84.76799726486206 + "p50": 115.52000045776367, + "p90": 116.5120005607605, + "p95": 116.73600226640701, + "p99": 123.48800152540207 }, "roundtrip": { - "p50": 127.45599448680878, - "p90": 153.02400290966034, - "p95": 155.64799308776855, - "p99": 159.4880074262619 + "p50": 193.08799505233765, + "p90": 197.88800179958344, + "p95": 198.59200716018677, + "p99": 204.0960043668747 }, "isolatedSum": { - "p50": 143.45599710941315, - "p90": 182.20799416303635, - "p95": 183.96800011396408, - "p99": 192.6399990916252 + "p50": 209.50400084257126, + "p90": 215.1999995112419, + "p95": 217.02400594949722, + "p99": 229.21600192785263 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 95.23200243711472, - "p90": 102.36799716949463, - "p95": 107.84000158309937, - "p99": 439.64800238609314 + "p50": 197.37599790096283, + "p90": 199.96799528598785, + "p95": 200.80000162124634, + "p99": 207.10399746894836 }, "combine": { - "p50": 72.95999675989151, - "p90": 81.66400343179703, - "p95": 86.81599795818329, - "p99": 88.92799913883209 + "p50": 248.1600046157837, + "p90": 249.9839961528778, + "p95": 250.68798661231995, + "p99": 253.79198789596558 }, "roundtrip": { - "p50": 128.7360042333603, - "p90": 159.19999778270721, - "p95": 161.31199896335602, - "p99": 167.1680063009262 + "p50": 429.8880100250244, + "p90": 434.30399894714355, + "p95": 436.2879991531372, + "p99": 442.84799695014954 }, "isolatedSum": { - "p50": 168.19199919700623, - "p90": 184.03200060129166, - "p95": 194.65599954128265, - "p99": 528.5760015249252 + "p50": 445.5360025167465, + "p90": 449.95199143886566, + "p95": 451.4879882335663, + "p99": 460.89598536491394 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 95.42399644851685, - "p90": 102.52799838781357, - "p95": 104.89600151777267, - "p99": 113.53600025177002 - }, - "combine": { - "p50": 79.58400249481201, - "p90": 82.91199803352356, - "p95": 87.07199990749359, - "p99": 87.96799927949905 - }, - "roundtrip": { - "p50": 151.48800611495972, - "p90": 159.90400314331055, - "p95": 162.20800578594208, - "p99": 169.47199404239655 - }, - "isolatedSum": { - "p50": 175.00799894332886, - "p90": 185.43999642133713, - "p95": 191.96800142526627, - "p99": 201.50399953126907 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, + } + ] + }, + { + "id": "cx-91ac2845", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_592e9a16", + "comparisonKey": "0a480d3d40419b1c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T09:48:29.790713+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_03", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28285617940", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285617940", + "createdAt": "2026-06-27T09:48:29.790713+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.25600081682205, + "p90": 88.16000074148178, + "p95": 89.21600133180618, + "p99": 96.03200107812881 + }, + "combine": { + "p50": 82.2720006108284, + "p90": 90.71999788284302, + "p95": 90.97599983215332, + "p99": 102.49599814414978 + }, + "roundtrip": { + "p50": 146.40000462532043, + "p90": 149.1200029850006, + "p95": 150.68799257278442, + "p99": 157.31200575828552 + }, + "isolatedSum": { + "p50": 166.52800142765045, + "p90": 178.8799986243248, + "p95": 180.1920011639595, + "p99": 198.5279992222786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 95.71199864149094, - "p90": 100.8640006184578, - "p95": 102.68799960613251, - "p99": 106.49599879980087 + "p50": 96.12800180912018, + "p90": 98.59199821949005, + "p95": 100.44799745082855, + "p99": 120.12799829244614 }, "combine": { - "p50": 80.64000308513641, - "p90": 87.90399879217148, - "p95": 89.24800157546997, - "p99": 95.23200243711472 + "p50": 104.92800176143646, + "p90": 113.92000317573547, + "p95": 114.43199962377548, + "p99": 116.38399958610535 }, "roundtrip": { - "p50": 152.319997549057, - "p90": 160.19199788570404, - "p95": 162.23999857902527, - "p99": 168.92799735069275 + "p50": 184.28799510002136, + "p90": 191.74399971961975, + "p95": 194.14399564266205, + "p99": 206.01600408554077 }, "isolatedSum": { - "p50": 176.35200172662735, - "p90": 188.76799941062927, - "p95": 191.93600118160248, - "p99": 201.7280012369156 + "p50": 201.05600357055664, + "p90": 212.51200139522552, + "p95": 214.87999707460403, + "p99": 236.51199787855148 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 80.86399734020233, - "p90": 103.26399654150009, - "p95": 105.47199845314026, - "p99": 113.18399757146835 + "p50": 131.55199587345123, + "p90": 138.91200721263885, + "p95": 140.19200205802917, + "p99": 149.85600113868713 }, "combine": { - "p50": 80.35200089216232, - "p90": 89.31200206279755, - "p95": 90.04800021648407, - "p99": 95.74399888515472 + "p50": 142.65599846839905, + "p90": 151.90400183200836, + "p95": 152.41600573062897, + "p99": 164.09599781036377 }, "roundtrip": { - "p50": 136.48000359535217, - "p90": 164.60800170898438, - "p95": 167.10400581359863, - "p99": 175.10400712490082 + "p50": 258.59200954437256, + "p90": 264.6400034427643, + "p95": 268.38400959968567, + "p99": 282.943993806839 }, "isolatedSum": { - "p50": 161.21599823236465, - "p90": 192.57599860429764, - "p95": 195.51999866962433, - "p99": 208.92799645662308 + "p50": 274.2079943418503, + "p90": 290.8160090446472, + "p95": 292.60800778865814, + "p99": 313.9519989490509 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 103.4879982471466, - "p90": 112.8000020980835, - "p95": 114.3679991364479, - "p99": 125.72799623012543 + "p50": 199.8399943113327, + "p90": 207.10399746894836, + "p95": 214.36800062656403, + "p99": 236.4799976348877 }, "combine": { - "p50": 96.83199971914291, - "p90": 104.12800312042236, - "p95": 104.99200224876404, - "p99": 106.33599758148193 + "p50": 262.36799359321594, + "p90": 262.9759907722473, + "p95": 263.35999369621277, + "p99": 272.5119888782501 }, "roundtrip": { - "p50": 170.71999609470367, - "p90": 181.21600151062012, - "p95": 182.91200697422028, - "p99": 186.81600689888 + "p50": 435.5199933052063, + "p90": 441.9200122356415, + "p95": 445.4079866409302, + "p99": 463.29599618911743 }, "isolatedSum": { - "p50": 200.31999796628952, - "p90": 216.92800521850586, - "p95": 219.36000138521194, - "p99": 232.06399381160736 + "p50": 462.20798790454865, + "p90": 470.0799882411957, + "p95": 477.7279943227768, + "p99": 508.9919865131378 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 111.29599809646606, - "p90": 130.87999820709229, - "p95": 133.5040032863617, - "p99": 139.93600010871887 + "p50": 345.7599878311157, + "p90": 352.7039885520935, + "p95": 355.9040129184723, + "p99": 390.3999924659729 }, "combine": { - "p50": 106.27199709415436, - "p90": 119.58400160074234, - "p95": 119.99999731779099, - "p99": 122.3360002040863 + "p50": 459.55199003219604, + "p90": 462.911993265152, + "p95": 470.8159863948822, + "p99": 483.6159944534302 }, "roundtrip": { - "p50": 197.56799936294556, - "p90": 215.80800414085388, - "p95": 217.92000532150269, - "p99": 219.80799734592438 + "p50": 786.9439721107483, + "p90": 792.8640246391296, + "p95": 797.5040078163147, + "p99": 829.7920227050781 }, "isolatedSum": { - "p50": 217.56799519062042, - "p90": 250.46399980783463, - "p95": 253.50400060415268, - "p99": 262.2720003128052 + "p50": 805.3119778633118, + "p90": 815.6159818172455, + "p95": 826.7199993133545, + "p99": 874.0159869194031 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 648.5120058059692, + "p90": 655.6479930877686, + "p95": 660.5439782142639, + "p99": 667.7119731903076 + }, + "combine": { + "p50": 828.0959725379944, + "p90": 838.4320139884949, + "p95": 840.6400084495544, + "p99": 855.0400137901306 + }, + "roundtrip": { + "p50": 1455.3279876708984, + "p90": 1466.5919542312622, + "p95": 1471.0079431533813, + "p99": 1482.4320077896118 + }, + "isolatedSum": { + "p50": 1476.6079783439636, + "p90": 1494.0800070762634, + "p95": 1501.1839866638184, + "p99": 1522.7519869804382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -20455,34 +21631,35 @@ ] }, { - "id": "cx-4d84166e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", + "id": "cx-eac6e215", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "colorKey": "b300_5b993222", + "comparisonKey": "d3d6cc25fee96bc7", "schemaVersion": 3, - "generatedAt": "2026-06-26T15:26:50.881953+00:00", + "generatedAt": "2026-06-26T23:58:52.035249+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", + "runner": "b300-nv_09", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -20491,12 +21668,12 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "unknown", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, @@ -20507,8 +21684,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "38fd0bcf7109c32", + "workloadId": "set:3:b952d4a43d688b50", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -20516,192 +21693,118 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28247565431", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247565431", - "createdAt": "2026-06-26T15:22:16Z", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + "id": "28271903494", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271903494", + "createdAt": "2026-06-26T23:58:52.035249+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.4079966545105, - "p90": 298.2720136642456, - "p95": 307.0400059223175, - "p99": 323.61599802970886 - }, - "combine": { - "p50": 81.08799904584885, - "p90": 171.9360053539276, - "p95": 204.96000349521637, - "p99": 212.8639966249466 - }, - "roundtrip": { - "p50": 150.62400698661804, - "p90": 249.439999461174, - "p95": 253.53598594665527, - "p99": 263.0079984664917 - }, - "isolatedSum": { - "p50": 178.49599570035934, - "p90": 470.2080190181732, - "p95": 512.0000094175339, - "p99": 536.4799946546555 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 97.02400118112564, - "p90": 167.93599724769592, - "p95": 188.51199746131897, - "p99": 195.42400538921356 - }, - "combine": { - "p50": 82.0159986615181, - "p90": 120.12799829244614, - "p95": 129.37599420547485, - "p99": 155.008003115654 - }, - "roundtrip": { - "p50": 151.93599462509155, - "p90": 232.2559952735901, - "p95": 258.9440047740936, - "p99": 280.8319926261902 - }, - "isolatedSum": { - "p50": 179.03999984264374, - "p90": 288.06399554014206, - "p95": 317.8879916667938, - "p99": 350.43200850486755 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 102.39999741315842, - "p90": 305.4080009460449, - "p95": 323.3279883861542, - "p99": 333.6640000343323 + "p50": 106.08000308275223, + "p90": 107.87200182676315, + "p95": 109.15199667215347, + "p99": 120.28799951076508 }, "combine": { - "p50": 82.07999914884567, - "p90": 138.7840062379837, - "p95": 148.67199957370758, - "p99": 171.77599668502808 + "p50": 127.83999741077423, + "p90": 129.85600531101227, + "p95": 130.97600638866425, + "p99": 139.5840048789978 }, "roundtrip": { - "p50": 155.61600029468536, - "p90": 289.8240089416504, - "p95": 331.6799998283386, - "p99": 397.3439931869507 + "p50": 219.39200162887573, + "p90": 224.16000068187714, + "p95": 225.055992603302, + "p99": 235.35999655723572 }, "isolatedSum": { - "p50": 184.4799965620041, - "p90": 444.1920071840286, - "p95": 471.99998795986176, - "p99": 505.43999671936035 + "p50": 233.92000049352646, + "p90": 237.72800713777542, + "p95": 240.12800306081772, + "p99": 259.8720043897629 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 97.37599641084671, - "p90": 105.12000322341919, - "p95": 109.69600081443787, - "p99": 118.17599833011627 + "p50": 217.95199811458588, + "p90": 224.03199970722198, + "p95": 228.83200645446777, + "p99": 252.70399451255798 }, "combine": { - "p50": 81.727996468544, - "p90": 83.36000144481659, - "p95": 87.45600283145905, - "p99": 90.30400216579437 + "p50": 336.38399839401245, + "p90": 338.49599957466125, + "p95": 339.9040102958679, + "p99": 348.4160006046295 }, "roundtrip": { - "p50": 154.81600165367126, - "p90": 297.88801074028015, - "p95": 379.2319893836975, - "p99": 438.4320080280304 + "p50": 535.8399748802185, + "p90": 546.0159778594971, + "p95": 551.3280034065247, + "p99": 558.3680272102356 }, "isolatedSum": { - "p50": 179.10399287939072, - "p90": 188.48000466823578, - "p95": 197.1520036458969, - "p99": 208.48000049591064 + "p50": 554.3359965085983, + "p90": 562.5279992818832, + "p95": 568.7360167503357, + "p99": 601.1199951171875 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 98.33600372076035, - "p90": 176.70400440692902, - "p95": 189.37599658966064, - "p99": 203.8400024175644 + "p50": 685.0559711456299, + "p90": 694.5599913597107, + "p95": 696.3199973106384, + "p99": 705.3760290145874 }, "combine": { - "p50": 83.10399949550629, - "p90": 112.64000087976456, - "p95": 115.29599875211716, - "p99": 132.51200318336487 + "p50": 1085.4400396347046, + "p90": 1086.3360166549683, + "p95": 1087.6480340957642, + "p99": 1096.7680215835571 }, "roundtrip": { - "p50": 157.98400342464447, - "p90": 187.16800212860107, - "p95": 204.8639953136444, - "p99": 235.9360009431839 + "p50": 1752.511978149414, + "p90": 1760.3199481964111, + "p95": 1762.0480060577393, + "p99": 1772.6080417633057 }, "isolatedSum": { - "p50": 181.44000321626663, - "p90": 289.3440052866936, - "p95": 304.6719953417778, - "p99": 336.35200560092926 + "p50": 1770.4960107803345, + "p90": 1780.896008014679, + "p95": 1783.9680314064026, + "p99": 1802.1440505981445 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -20710,34 +21813,35 @@ ] }, { - "id": "cx-85608159", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.35|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", + "id": "cx-b38b286e", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_5b993222", + "comparisonKey": "acefe503588b8e8a", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:27:03.132747+00:00", + "generatedAt": "2026-06-27T09:50:40.107682+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", + "runner": "b300-nv_13", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -20746,14 +21850,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.35, - "achievedFraction": 0.3485, - "configuredUnits": 46, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -20762,8 +21866,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "bfbb64a166e9f1c", + "workloadId": "set:6:b952d4a43d688b50", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -20771,156 +21875,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254279368", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254279368", - "createdAt": "2026-06-26T17:26:09Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285666343", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285666343", + "createdAt": "2026-06-27T09:50:40.107682+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 96.63999825716019, - "p90": 102.84800082445145, - "p95": 105.18400371074677, - "p99": 111.13599687814713 + "p50": 104.12800312042236, + "p90": 105.79200088977814, + "p95": 106.65600001811981, + "p99": 124.79999661445618 }, "combine": { - "p50": 79.16799932718277, - "p90": 81.37600123882294, - "p95": 81.85599744319916, - "p99": 88.44800293445587 + "p50": 128.9599984884262, + "p90": 138.59200477600098, + "p95": 139.42399621009827, + "p99": 144.16000247001648 }, "roundtrip": { - "p50": 146.464005112648, - "p90": 155.35999834537506, - "p95": 157.60000050067902, - "p99": 163.35999965667725 + "p50": 217.3759937286377, + "p90": 224.0000069141388, + "p95": 225.055992603302, + "p99": 228.89600694179535 }, "isolatedSum": { - "p50": 175.80799758434296, - "p90": 184.22400206327438, - "p95": 187.04000115394592, - "p99": 199.583999812603 + "p50": 233.08800160884857, + "p90": 244.3840056657791, + "p95": 246.07999622821808, + "p99": 268.95999908447266 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 100.16000270843506, - "p90": 136.63999736309052, - "p95": 139.77600634098053, - "p99": 245.728000998497 + "p50": 141.76000654697418, + "p90": 143.61600577831268, + "p95": 145.53600549697876, + "p99": 164.44799304008484 }, "combine": { - "p50": 80.70400357246399, - "p90": 82.62400329113007, - "p95": 86.62399649620056, - "p99": 89.37600255012512 + "p50": 188.38399648666382, + "p90": 190.17599523067474, + "p95": 192.00000166893005, + "p99": 201.9840031862259 }, "roundtrip": { - "p50": 151.71200037002563, - "p90": 158.27199816703796, - "p95": 160.09600460529327, - "p99": 165.47200083732605 + "p50": 318.11198592185974, + "p90": 323.64800572395325, + "p95": 325.0240087509155, + "p99": 335.3919982910156 }, "isolatedSum": { - "p50": 180.86400628089905, - "p90": 219.26400065422058, - "p95": 226.4000028371811, - "p99": 335.10400354862213 + "p50": 330.144003033638, + "p90": 333.7920010089874, + "p95": 337.5360071659088, + "p99": 366.43199622631073 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 101.85600072145462, - "p90": 138.72000575065613, - "p95": 140.99200069904327, - "p99": 146.7839926481247 + "p50": 216.8319970369339, + "p90": 220.0320065021515, + "p95": 223.32799434661865, + "p99": 231.29600286483765 }, "combine": { - "p50": 88.99199962615967, - "p90": 104.06400263309479, - "p95": 104.25599664449692, - "p99": 111.455999314785 + "p50": 336.5760147571564, + "p90": 338.20798993110657, + "p95": 339.6799862384796, + "p99": 351.23199224472046 }, "roundtrip": { - "p50": 158.49600732326508, - "p90": 194.84800100326538, - "p95": 196.99199497699738, - "p99": 201.37600600719452 + "p50": 534.6879959106445, + "p90": 541.5040254592896, + "p95": 543.8399910926819, + "p99": 547.327995300293 }, "isolatedSum": { - "p50": 190.8480003476143, - "p90": 242.78400838375092, - "p95": 245.2479973435402, - "p99": 258.2399919629097 + "p50": 553.4080117940903, + "p90": 558.239996433258, + "p95": 563.0079805850983, + "p99": 582.5279951095581 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 119.74400281906128, - "p90": 127.3919939994812, - "p95": 128.89599800109863, - "p99": 132.9279989004135 + "p50": 369.1520094871521, + "p90": 377.1199882030487, + "p95": 378.62399220466614, + "p99": 388.35200667381287 }, "combine": { - "p50": 113.18399757146835, - "p90": 116.19199812412262, - "p95": 117.44000017642975, - "p99": 120.67200243473053 + "p50": 580.5119872093201, + "p90": 582.1120142936707, + "p95": 582.5920104980469, + "p99": 585.3760242462158 }, "roundtrip": { - "p50": 202.27199792861938, - "p90": 208.54400098323822, - "p95": 233.95200073719025, - "p99": 249.79199469089508 + "p50": 939.1679763793945, + "p90": 944.2880153656006, + "p95": 945.9840059280396, + "p99": 958.079993724823 }, "isolatedSum": { - "p50": 232.92800039052963, - "p90": 243.58399212360382, - "p95": 246.33599817752838, - "p99": 253.60000133514404 + "p50": 949.6639966964722, + "p90": 959.2320024967194, + "p95": 961.216002702713, + "p99": 973.7280309200287 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 684.1279864311218, + "p90": 693.3119893074036, + "p95": 694.6560144424438, + "p99": 802.5919795036316 + }, + "combine": { + "p50": 1085.15202999115, + "p90": 1086.7520570755005, + "p95": 1087.3279571533203, + "p99": 1098.9760160446167 + }, + "roundtrip": { + "p50": 1750.656008720398, + "p90": 1759.071946144104, + "p95": 1762.7840042114258, + "p99": 1789.2800569534302 + }, + "isolatedSum": { + "p50": 1769.2800164222717, + "p90": 1780.064046382904, + "p95": 1781.9839715957642, + "p99": 1901.5679955482483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1323.3599662780762, + "p90": 1332.1599960327148, + "p95": 1336.5440368652344, + "p99": 1345.3439474105835 + }, + "combine": { + "p50": 2080.22403717041, + "p90": 2082.0159912109375, + "p95": 2084.0959548950195, + "p99": 2094.655990600586 + }, + "roundtrip": { + "p50": 3382.688045501709, + "p90": 3391.9999599456787, + "p95": 3396.4478969573975, + "p99": 3412.480115890503 + }, + "isolatedSum": { + "p50": 3403.5840034484863, + "p90": 3414.1759872436523, + "p95": 3420.639991760254, + "p99": 3439.9999380111694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -20928,50 +22106,51 @@ ] }, { - "id": "cx-3752524d", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.6|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", + "id": "cx-6ace94e5", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_39a5906c", + "comparisonKey": "4191eeca9b95da96", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:39.045176+00:00", + "generatedAt": "2026-06-27T09:50:47.306052+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", + "runner": "b300-nv_16", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.6, - "achievedFraction": 0.5985, - "configuredUnits": 79, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -20980,216 +22159,291 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "29ae5ace13636f8", + "workloadId": "set:6:b952d4a43d688b50", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254286950", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254286950", - "createdAt": "2026-06-26T17:26:18Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285668831", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285668831", + "createdAt": "2026-06-27T09:50:47.306052+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 96.28800302743912, - "p90": 103.55199873447418, - "p95": 105.66399991512299, - "p99": 108.51199924945831 + "p50": 95.74399888515472, + "p90": 99.84000027179718, + "p95": 101.31199657917023, + "p99": 107.42399841547012 }, "combine": { - "p50": 79.1039988398552, - "p90": 81.37600123882294, - "p95": 84.89599823951721, - "p99": 89.91999924182892 + "p50": 115.26399850845337, + "p90": 116.35199934244156, + "p95": 117.5680011510849, + "p99": 131.77600502967834 }, "roundtrip": { - "p50": 146.27200365066528, - "p90": 156.38400614261627, - "p95": 161.82400286197662, - "p99": 219.2319929599762 + "p50": 194.14399564266205, + "p90": 199.52000677585602, + "p95": 200.54399967193604, + "p99": 206.68800175189972 }, "isolatedSum": { - "p50": 175.3920018672943, - "p90": 184.92799997329712, - "p95": 190.5599981546402, - "p99": 198.43199849128723 + "p50": 211.0079973936081, + "p90": 216.19199961423874, + "p95": 218.87999773025513, + "p99": 239.20000344514847 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 96.70399874448776, - "p90": 102.30399668216705, - "p95": 104.51199859380722, - "p99": 112.22399771213531 + "p50": 134.5919966697693, + "p90": 139.8400068283081, + "p95": 141.34399592876434, + "p99": 147.77599275112152 }, "combine": { - "p50": 79.58400249481201, - "p90": 87.3280018568039, - "p95": 87.80799806118011, - "p99": 89.9519994854927 + "p50": 155.87200224399567, + "p90": 165.27999937534332, + "p95": 170.43200135231018, + "p99": 176.7680048942566 }, "roundtrip": { - "p50": 153.3759981393814, - "p90": 161.21600568294525, - "p95": 162.56000101566315, - "p99": 166.72000288963318 + "p50": 273.27999472618103, + "p90": 280.5120050907135, + "p95": 281.72799944877625, + "p99": 288.35201263427734 }, "isolatedSum": { - "p50": 176.28800123929977, - "p90": 189.63199853897095, - "p95": 192.31999665498734, - "p99": 202.17599719762802 + "p50": 290.46399891376495, + "p90": 305.1200062036514, + "p95": 311.7759972810745, + "p99": 324.5439976453781 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 102.88000106811523, - "p90": 106.81600123643875, - "p95": 109.0560033917427, - "p99": 114.3679991364479 + "p50": 194.46399807929993, + "p90": 200.83199441432953, + "p95": 203.64800095558167, + "p99": 213.24799954891205 }, "combine": { - "p50": 87.99999952316284, - "p90": 95.48799693584442, - "p95": 96.22400254011154, - "p99": 119.1679984331131 + "p50": 265.3760015964508, + "p90": 274.3679881095886, + "p95": 274.84801411628723, + "p99": 277.75999903678894 }, "roundtrip": { - "p50": 161.95200383663177, - "p90": 170.0800061225891, - "p95": 172.5119948387146, - "p99": 460.7999920845032 + "p50": 444.19199228286743, + "p90": 448.67199659347534, + "p95": 450.27199387550354, + "p99": 476.0960042476654 }, "isolatedSum": { - "p50": 190.88000059127808, - "p90": 202.30399817228317, - "p95": 205.28000593185425, - "p99": 233.535997569561 + "p50": 459.83999967575073, + "p90": 475.19998252391815, + "p95": 478.4960150718689, + "p99": 491.007998585701 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 129.08799946308136, - "p90": 135.80800592899323, - "p95": 137.56799697875977, - "p99": 142.14399456977844 + "p50": 326.6560137271881, + "p90": 330.3999900817871, + "p95": 331.29599690437317, + "p99": 342.8800106048584 }, "combine": { - "p50": 113.27999830245972, - "p90": 120.44800072908401, - "p95": 120.67200243473053, - "p99": 123.74400347471237 + "p50": 461.88798546791077, + "p90": 470.94398736953735, + "p95": 471.45599126815796, + "p99": 483.2639992237091 }, "roundtrip": { - "p50": 211.5200012922287, - "p90": 218.176007270813, - "p95": 219.64800357818604, - "p99": 223.68000447750092 + "p50": 770.4960107803345, + "p90": 775.3599882125854, + "p95": 777.5999903678894, + "p99": 795.9039807319641 }, "isolatedSum": { - "p50": 242.36799776554108, - "p90": 256.25600665807724, - "p95": 258.2399994134903, - "p99": 265.8879980444908 + "p50": 788.5439991950989, + "p90": 801.3439774513245, + "p95": 802.7519881725311, + "p99": 826.1440098285675 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.1760053634644, + "p90": 577.567994594574, + "p95": 579.5199871063232, + "p99": 643.8400149345398 + }, + "combine": { + "p50": 815.8400058746338, + "p90": 826.5600204467773, + "p95": 827.5840282440186, + "p99": 830.8159708976746 + }, + "roundtrip": { + "p50": 1370.9759712219238, + "p90": 1381.0559511184692, + "p95": 1383.8720321655273, + "p99": 1396.672010421753 + }, + "isolatedSum": { + "p50": 1386.0160112380981, + "p90": 1404.1280150413513, + "p95": 1407.1040153503418, + "p99": 1474.6559858322144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1064.7039413452148, + "p90": 1068.4479475021362, + "p95": 1071.8079805374146, + "p99": 1093.3760404586792 + }, + "combine": { + "p50": 1526.2080430984497, + "p90": 1530.56001663208, + "p95": 1539.3919944763184, + "p99": 1604.8959493637085 + }, + "roundtrip": { + "p50": 2567.7759647369385, + "p90": 2580.415964126587, + "p95": 2587.8400802612305, + "p99": 2656.8961143493652 + }, + "isolatedSum": { + "p50": 2590.9119844436646, + "p90": 2599.0079641342163, + "p95": 2611.199975013733, + "p99": 2698.2719898223877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-7db267e7", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", - "colorKey": "h100_716e65b9", - "comparisonKey": "259b0e9f1092ac0e", + "id": "cx-f0a8ca82", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_e3d449ce", + "comparisonKey": "5a2fc26356c2c7bc", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:32:00.320566+00:00", + "generatedAt": "2026-06-27T09:47:59.202782+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · balanced", + "label": "B300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -21198,313 +22452,421 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "2225dbbdab9bf2d", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254367516", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", - "createdAt": "2026-06-26T17:27:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285607618", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285607618", + "createdAt": "2026-06-27T09:47:59.202782+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 95.93600034713745, - "p90": 103.00800204277039, - "p95": 104.38399761915207, - "p99": 107.64800012111664 + "p50": 94.46399658918381, + "p90": 100.35199671983719, + "p95": 103.2319962978363, + "p99": 128.4160017967224 }, "combine": { - "p50": 81.08799904584885, - "p90": 87.93599903583527, - "p95": 88.60799670219421, - "p99": 90.36800265312195 + "p50": 115.03999680280685, + "p90": 115.80800265073776, + "p95": 116.7680025100708, + "p99": 120.99199742078781 }, "roundtrip": { - "p50": 151.2639969587326, - "p90": 158.9760035276413, - "p95": 160.73599457740784, - "p99": 164.06400501728058 + "p50": 193.4400051832199, + "p90": 200.1280039548874, + "p95": 201.9840031862259, + "p99": 223.1999933719635 }, "isolatedSum": { - "p50": 177.0239993929863, - "p90": 190.94400107860565, - "p95": 192.99199432134628, - "p99": 198.0160027742386 + "p50": 209.50399339199066, + "p90": 216.15999937057495, + "p95": 219.9999988079071, + "p99": 249.40799921751022 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 74.23999905586243, - "p90": 96.79999947547913, - "p95": 100.00000149011612, - "p99": 103.7760004401207 + "p50": 135.3919953107834, + "p90": 139.80799913406372, + "p95": 141.4719969034195, + "p99": 152.6080071926117 }, "combine": { - "p50": 73.98399710655212, - "p90": 87.64799684286118, - "p95": 88.54400366544724, - "p99": 89.66399729251862 + "p50": 153.9199948310852, + "p90": 163.7440025806427, + "p95": 164.22399878501892, + "p99": 176.67199671268463 }, "roundtrip": { - "p50": 127.32799351215363, - "p90": 158.1439971923828, - "p95": 159.32799875736237, - "p99": 162.52799332141876 + "p50": 270.4319953918457, + "p90": 275.4560112953186, + "p95": 277.47198939323425, + "p99": 282.4000120162964 }, "isolatedSum": { - "p50": 148.22399616241455, - "p90": 184.4479963183403, - "p95": 188.54400515556335, - "p99": 193.4399977326393 + "p50": 289.3119901418686, + "p90": 303.5520017147064, + "p95": 305.6959956884384, + "p99": 329.2800039052963 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 4, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 74.87999647855759, - "p90": 99.5199978351593, - "p95": 103.20000350475311, - "p99": 106.62399977445602 + "p50": 195.0719952583313, + "p90": 202.91200280189514, + "p95": 204.76800203323364, + "p99": 211.5519940853119 }, "combine": { - "p50": 73.95199686288834, - "p90": 87.74399757385254, - "p95": 88.06400001049042, - "p99": 88.76799792051315 + "p50": 273.75999093055725, + "p90": 275.4560112953186, + "p95": 276.70401334762573, + "p99": 286.8480086326599 }, "roundtrip": { - "p50": 127.80800461769104, - "p90": 156.3519984483719, - "p95": 158.81599485874176, - "p99": 162.33600676059723 + "p50": 438.33601474761963, + "p90": 447.6799964904785, + "p95": 457.2800099849701, + "p99": 516.0959959030151 }, "isolatedSum": { - "p50": 148.83199334144592, - "p90": 187.26399540901184, - "p95": 191.26400351524353, - "p99": 195.39199769496918 + "p50": 468.83198618888855, + "p90": 478.36801409721375, + "p95": 481.4720153808594, + "p99": 498.4000027179718 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 4, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 94.36800330877304, - "p90": 100.09600222110748, - "p95": 101.95200145244598, - "p99": 107.4879989027977 + "p50": 325.408011674881, + "p90": 328.99200916290283, + "p95": 330.2080035209656, + "p99": 342.0160114765167 }, "combine": { - "p50": 80.92799782752991, - "p90": 88.03199976682663, - "p95": 88.86399865150452, - "p99": 89.79199826717377 + "p50": 459.48800444602966, + "p90": 470.46399116516113, + "p95": 470.94398736953735, + "p99": 482.87999629974365 }, "roundtrip": { - "p50": 149.85600113868713, - "p90": 156.95999562740326, - "p95": 158.1760048866272, - "p99": 161.98399662971497 + "p50": 764.959990978241, + "p90": 773.792028427124, + "p95": 783.456027507782, + "p99": 817.8880214691162 }, "isolatedSum": { - "p50": 175.29600113630295, - "p90": 188.1280019879341, - "p95": 190.8160001039505, - "p99": 197.27999716997147 + "p50": 784.8960161209106, + "p90": 799.456000328064, + "p95": 801.1519908905029, + "p99": 824.8960077762604 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 94.36800330877304, - "p90": 104.80000078678131, - "p95": 106.78400099277496, - "p99": 115.00799655914307 + "p50": 568.9600110054016, + "p90": 572.8960037231445, + "p95": 575.8079886436462, + "p99": 665.9200191497803 }, "combine": { - "p50": 86.59200370311737, - "p90": 88.76799792051315, - "p95": 89.56799656152725, - "p99": 96.83199971914291 + "p50": 814.0159845352173, + "p90": 815.6480193138123, + "p95": 817.8880214691162, + "p99": 888.8959884643555 }, "roundtrip": { - "p50": 150.11200308799744, - "p90": 161.50400042533875, - "p95": 166.24000668525696, - "p99": 490.62401056289673 + "p50": 1359.7760200500488, + "p90": 1370.0480461120605, + "p95": 1375.8080005645752, + "p99": 1418.239951133728 }, "isolatedSum": { - "p50": 180.9600070118904, - "p90": 193.56799870729446, - "p95": 196.35199755430222, - "p99": 211.83999627828598 + "p50": 1382.975995540619, + "p90": 1388.5440230369568, + "p95": 1393.6960101127625, + "p99": 1554.8160076141357 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 5, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 87.0399996638298, - "p90": 106.04800283908844, - "p95": 110.1439967751503, - "p99": 123.83999675512314 + "p50": 1064.2880201339722, + "p90": 1069.823980331421, + "p95": 1076.5119791030884, + "p99": 1097.6639986038208 }, "combine": { - "p50": 82.5280025601387, - "p90": 96.3200032711029, - "p95": 96.73599898815155, - "p99": 97.56799787282944 + "p50": 1516.8960094451904, + "p90": 1527.9040336608887, + "p95": 1529.8240184783936, + "p99": 1575.8399963378906 }, "roundtrip": { - "p50": 143.5839980840683, - "p90": 166.55999422073364, - "p95": 168.7680035829544, - "p99": 175.55199563503265 + "p50": 2567.840099334717, + "p90": 2580.9600353240967, + "p95": 2591.4878845214844, + "p99": 2632.960081100464 }, "isolatedSum": { - "p50": 169.5680022239685, - "p90": 202.36800611019135, - "p95": 206.87999576330185, - "p99": 221.40799462795258 + "p50": 2581.1840295791626, + "p90": 2597.7280139923096, + "p95": 2606.335997581482, + "p99": 2673.5039949417114 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4cb883eb", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", + "colorKey": "b300_8d2811e3", + "comparisonKey": "c2361bc487e04e6e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:36.475166+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4caecd33bedf786", + "workloadId": "set:3:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271889990", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271889990", + "createdAt": "2026-06-26T23:58:36.475166+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.87200117111206, + "p90": 106.88000172376633, + "p95": 109.3439981341362, + "p99": 126.62400305271149 + }, + "combine": { + "p50": 126.91199779510498, + "p90": 128.1919926404953, + "p95": 128.57599556446075, + "p99": 139.615997672081 + }, + "roundtrip": { + "p50": 209.6640020608902, + "p90": 213.95200490951538, + "p95": 215.488001704216, + "p99": 220.47999501228333 + }, + "isolatedSum": { + "p50": 230.78399896621704, + "p90": 235.07199436426163, + "p95": 237.91999369859695, + "p99": 266.2400007247925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 116.92799627780914, - "p90": 126.3359934091568, - "p95": 128.63999605178833, - "p99": 132.6719969511032 + "p50": 204.73599433898926, + "p90": 212.44800090789795, + "p95": 213.98399770259857, + "p99": 221.02400660514832 }, "combine": { - "p50": 104.19200360774994, - "p90": 112.06399649381638, - "p95": 112.99200356006622, - "p99": 113.76000195741653 + "p50": 325.28001070022583, + "p90": 336.41600608825684, + "p95": 336.70398592948914, + "p99": 340.4799997806549 }, "roundtrip": { - "p50": 190.49599766731262, - "p90": 199.74400103092194, - "p95": 202.36800611019135, - "p99": 204.76800203323364 + "p50": 510.528028011322, + "p90": 517.087996006012, + "p95": 519.1680192947388, + "p99": 526.4639854431152 }, "isolatedSum": { - "p50": 221.11999988555908, - "p90": 238.39998990297318, - "p95": 241.63199961185455, - "p99": 246.43199890851974 + "p50": 530.0160050392151, + "p90": 548.8640069961548, + "p95": 550.6879836320877, + "p99": 561.5040063858032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 4, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 129.85600531101227, - "p90": 152.96000242233276, - "p95": 154.78399395942688, - "p99": 158.87999534606934 + "p50": 648.1919884681702, + "p90": 659.0080261230469, + "p95": 662.6240015029907, + "p99": 672.5760102272034 }, "combine": { - "p50": 121.2799996137619, - "p90": 129.43999469280243, - "p95": 130.3360015153885, - "p99": 145.34400403499603 + "p50": 1063.8400316238403, + "p90": 1073.248028755188, + "p95": 1073.6639499664307, + "p99": 1096.60804271698 }, "roundtrip": { - "p50": 226.8799990415573, - "p90": 240.31999707221985, - "p95": 242.01600253582, - "p99": 245.02399563789368 + "p50": 1698.815941810608, + "p90": 1708.1600427627563, + "p95": 1712.4799489974976, + "p99": 1786.7519855499268 }, "isolatedSum": { - "p50": 251.13600492477417, - "p90": 282.3999971151352, - "p95": 285.11999547481537, - "p99": 304.22399938106537 + "p50": 1712.0320200920105, + "p90": 1732.2560548782349, + "p95": 1736.2879514694214, + "p99": 1769.1840529441833 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -21512,28 +22874,29 @@ ] }, { - "id": "cx-c5b168ae", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", - "colorKey": "h100_f7ec28aa", - "comparisonKey": "9896b8e4d81bc6a5", + "id": "cx-2d848061", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_8d2811e3", + "comparisonKey": "572a75005556e63b", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:32:03.917674+00:00", + "generatedAt": "2026-06-27T09:48:48.610470+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", + "runner": "b300-nv_06", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf", + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -21548,14 +22911,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -21564,8 +22927,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -21573,304 +22936,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254376151", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", - "createdAt": "2026-06-26T17:28:02Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285625501", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285625501", + "createdAt": "2026-06-27T09:48:48.610470+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 96.89600020647049, - "p90": 104.032002389431, - "p95": 106.04800283908844, - "p99": 111.04000359773636 + "p50": 100.19200295209885, + "p90": 104.2879968881607, + "p95": 107.35999792814255, + "p99": 113.69600147008896 }, "combine": { - "p50": 74.36800003051758, - "p90": 80.03199845552444, - "p95": 81.31200075149536, - "p99": 82.68799632787704 + "p50": 118.43200027942657, + "p90": 127.03999876976013, + "p95": 127.51999497413635, + "p99": 129.2479932308197 }, "roundtrip": { - "p50": 145.82400023937225, - "p90": 153.76000106334686, - "p95": 160.0639969110489, - "p99": 226.30399465560913 + "p50": 207.58399367332458, + "p90": 212.54399418830872, + "p95": 213.82400393486023, + "p99": 217.0879989862442 }, "isolatedSum": { - "p50": 171.26400023698807, - "p90": 184.06400084495544, - "p95": 187.3600035905838, - "p99": 193.7279999256134 + "p50": 218.62400323152542, + "p90": 231.32799565792084, + "p95": 234.8799929022789, + "p99": 242.94399470090866 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 70.72000205516815, - "p90": 103.93600165843964, - "p95": 105.18400371074677, - "p99": 113.63200098276138 + "p50": 130.40000200271606, + "p90": 137.40800321102142, + "p95": 138.84800672531128, + "p99": 147.93600142002106 }, "combine": { - "p50": 71.35999947786331, - "p90": 80.32000064849854, - "p95": 81.18399977684021, - "p99": 88.16000074148178 + "p50": 176.28799378871918, + "p90": 178.3359944820404, + "p95": 179.87200617790222, + "p99": 189.91999328136444 }, "roundtrip": { - "p50": 126.68800354003906, - "p90": 152.5759994983673, - "p95": 155.32800555229187, - "p99": 159.29600596427917 + "p50": 294.5280075073242, + "p90": 299.77598786354065, + "p95": 301.56800150871277, + "p99": 312.22400069236755 }, "isolatedSum": { - "p50": 142.08000153303146, - "p90": 184.25600230693817, - "p95": 186.36800348758698, - "p99": 201.79200172424316 + "p50": 306.68799579143524, + "p90": 315.74399769306183, + "p95": 318.7200129032135, + "p99": 337.8559947013855 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 3, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 70.14399766921997, - "p90": 100.28800368309021, - "p95": 102.55999863147736, - "p99": 131.71200454235077 + "p50": 207.13600516319275, + "p90": 211.776003241539, + "p95": 213.24799954891205, + "p99": 220.99199891090393 }, "combine": { - "p50": 71.61600142717361, - "p90": 79.55200225114822, - "p95": 79.74400371313095, - "p99": 84.22400057315826 + "p50": 324.8960077762604, + "p90": 334.9440097808838, + "p95": 335.61599254608154, + "p99": 338.46399188041687 }, "roundtrip": { - "p50": 127.77599692344666, - "p90": 153.50399911403656, - "p95": 155.2640050649643, - "p99": 160.73599457740784 + "p50": 504.12797927856445, + "p90": 511.03997230529785, + "p95": 513.2480263710022, + "p99": 517.5359845161438 }, "isolatedSum": { - "p50": 141.75999909639359, - "p90": 179.84000593423843, - "p95": 182.3040023446083, - "p99": 215.93600511550903 + "p50": 532.0320129394531, + "p90": 546.7200130224228, + "p95": 548.8639920949936, + "p99": 559.4559907913208 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 3, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 94.97600048780441, - "p90": 100.832000374794, - "p95": 102.30399668216705, - "p99": 114.3999993801117 + "p50": 347.6479947566986, + "p90": 353.08799147605896, + "p95": 354.8479974269867, + "p99": 364.4160032272339 }, "combine": { - "p50": 71.52000069618225, - "p90": 81.18399977684021, - "p95": 81.7599967122078, - "p99": 86.94399893283844 + "p50": 582.751989364624, + "p90": 592.8320288658142, + "p95": 593.4399962425232, + "p99": 599.7120141983032 }, "roundtrip": { - "p50": 125.31200051307678, - "p90": 153.05599570274353, - "p95": 156.0640037059784, - "p99": 159.42400693893433 + "p50": 909.4719886779785, + "p90": 917.248010635376, + "p95": 919.2320108413696, + "p99": 935.0079894065857 }, "isolatedSum": { - "p50": 166.49600118398666, - "p90": 182.01600015163422, - "p95": 184.06399339437485, - "p99": 201.34399831295013 + "p50": 930.3999841213226, + "p90": 945.9200203418732, + "p95": 948.2879936695099, + "p99": 964.1280174255371 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 95.551997423172, - "p90": 100.89600086212158, - "p95": 103.26399654150009, - "p99": 112.31999844312668 + "p50": 641.152024269104, + "p90": 652.0000100135803, + "p95": 655.2960276603699, + "p99": 747.6480007171631 }, "combine": { - "p50": 79.48800176382065, - "p90": 86.87999844551086, - "p95": 87.71199733018875, - "p99": 88.22400122880936 + "p50": 1062.0479583740234, + "p90": 1072.0640420913696, + "p95": 1072.6079940795898, + "p99": 1096.5440273284912 }, "roundtrip": { - "p50": 149.79200065135956, - "p90": 158.24000537395477, - "p95": 160.0320041179657, - "p99": 165.69599509239197 + "p50": 1689.9199485778809, + "p90": 1699.0079879760742, + "p95": 1702.5599479675293, + "p99": 1800.9920120239258 }, "isolatedSum": { - "p50": 175.03999918699265, - "p90": 187.77599930763245, - "p95": 190.97599387168884, - "p99": 200.54399967193604 + "p50": 1703.1999826431274, + "p90": 1724.06405210495, + "p95": 1727.9040217399597, + "p99": 1844.1920280456543 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 7, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 83.16799998283386, - "p90": 99.96800124645233, - "p95": 104.96000200510025, - "p99": 109.11999642848969 + "p50": 1252.0320415496826, + "p90": 1263.424038887024, + "p95": 1268.7360048294067, + "p99": 1281.2479734420776 }, "combine": { - "p50": 79.8719972372055, - "p90": 87.93599903583527, - "p95": 89.28000181913376, - "p99": 95.39200365543365 + "p50": 2043.8721179962158, + "p90": 2046.015977859497, + "p95": 2054.464101791382, + "p99": 2093.503952026367 }, "roundtrip": { - "p50": 135.26399433612823, - "p90": 159.19999778270721, - "p95": 161.72799468040466, - "p99": 166.6560024023056 + "p50": 3286.976099014282, + "p90": 3298.5599040985107, + "p95": 3302.432060241699, + "p99": 3373.823881149292 }, "isolatedSum": { - "p50": 163.03999722003937, - "p90": 187.9040002822876, - "p95": 194.240003824234, - "p99": 204.51200008392334 + "p50": 3295.9041595458984, + "p90": 3309.440016746521, + "p95": 3323.2001066207886, + "p99": 3374.751925468445 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 100.832000374794, - "p90": 114.68800157308578, - "p95": 116.67200177907944, - "p99": 134.91199910640717 - }, - "combine": { - "p50": 90.27200192213058, - "p90": 103.32799702882767, - "p95": 104.16000336408615, - "p99": 152.12799608707428 - }, - "roundtrip": { - "p50": 164.70399498939514, - "p90": 182.8480064868927, - "p95": 186.49600446224213, - "p99": 189.40800428390503 - }, - "isolatedSum": { - "p50": 191.1040022969246, - "p90": 218.01599860191345, - "p95": 220.8320051431656, - "p99": 287.03999519348145 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 121.31199985742569, - "p90": 139.67999815940857, - "p95": 144.57599818706512, - "p99": 150.87999403476715 - }, - "combine": { - "p50": 112.99200356006622, - "p90": 120.64000219106674, - "p95": 120.80000340938568, - "p99": 128.51199507713318 - }, - "roundtrip": { - "p50": 212.67199516296387, - "p90": 228.4799963235855, - "p95": 230.0799936056137, - "p99": 235.74399948120117 - }, - "isolatedSum": { - "p50": 234.3040034174919, - "p90": 260.3200003504753, - "p95": 265.3760015964508, - "p99": 279.39198911190033 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -21878,50 +23167,51 @@ ] }, { - "id": "cx-cf899bce", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", - "colorKey": "h100_93503624", - "comparisonKey": "74d307ed048ea3b5", + "id": "cx-f7ec6aaf", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", + "colorKey": "b300_2e44c039", + "comparisonKey": "b198376a27b75c7f", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:46:24.194442+00:00", + "generatedAt": "2026-06-26T23:58:40.218743+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "runner": "b300-nv_11", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -21930,313 +23220,421 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "3dd868cb33839a3", + "workloadId": "set:3:1ca614e23cc66be1", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255296001", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", - "createdAt": "2026-06-26T17:45:26Z", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28271897134", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271897134", + "createdAt": "2026-06-26T23:58:40.218743+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 69.72800195217133, - "p90": 75.83999633789062, - "p95": 77.85599678754807, - "p99": 83.39200168848038 + "p50": 92.3520028591156, + "p90": 95.90400010347366, + "p95": 98.78399968147278, + "p99": 113.34399878978729 }, "combine": { - "p50": 71.26399874687195, - "p90": 73.40800017118454, - "p95": 74.0479975938797, - "p99": 78.87999713420868 + "p50": 116.19199812412262, + "p90": 120.2239990234375, + "p95": 126.39999389648438, + "p99": 127.68000364303589 }, "roundtrip": { - "p50": 121.85599654912949, - "p90": 128.12800705432892, - "p95": 130.3039938211441, - "p99": 134.71999764442444 + "p50": 194.5279985666275, + "p90": 202.43200659751892, + "p95": 204.22400534152985, + "p99": 214.23999965190887 }, "isolatedSum": { - "p50": 140.99200069904327, - "p90": 149.24799650907516, - "p95": 151.90399438142776, - "p99": 162.27199882268906 + "p50": 208.54400098323822, + "p90": 216.12799912691116, + "p95": 225.18399357795715, + "p99": 241.02400243282318 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 6, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 70.3359991312027, - "p90": 76.25599950551987, - "p95": 78.59200239181519, - "p99": 84.6719965338707 + "p50": 180.09600043296814, + "p90": 188.6720061302185, + "p95": 190.46400487422943, + "p99": 204.83200252056122 }, "combine": { - "p50": 71.16799801588058, - "p90": 73.53600114583969, - "p95": 74.27199929952621, - "p99": 79.80799674987793 + "p50": 302.94400453567505, + "p90": 311.42398715019226, + "p95": 311.67998909950256, + "p99": 315.16799330711365 }, "roundtrip": { - "p50": 127.20000743865967, - "p90": 131.00799918174744, - "p95": 133.27999413013458, - "p99": 138.08000087738037 + "p50": 473.1520116329193, + "p90": 481.6960096359253, + "p95": 485.0560128688812, + "p99": 493.696004152298 }, "isolatedSum": { - "p50": 141.50399714708328, - "p90": 149.79200065135956, - "p95": 152.8640016913414, - "p99": 164.47999328374863 + "p50": 483.0400049686432, + "p90": 500.09599328041077, + "p95": 502.143993973732, + "p99": 519.9999958276749 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 6, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 73.18399846553802, - "p90": 102.14400291442871, - "p95": 105.50399869680405, - "p99": 108.44799876213074 + "p50": 570.0479745864868, + "p90": 580.4160237312317, + "p95": 583.7439894676208, + "p99": 621.0560202598572 }, "combine": { - "p50": 73.40800017118454, - "p90": 81.82399719953537, - "p95": 87.10400015115738, - "p99": 88.95999938249588 + "p50": 1098.7199544906616, + "p90": 1109.1840267181396, + "p95": 1109.663963317871, + "p99": 1124.4159936904907 }, "roundtrip": { - "p50": 131.8719983100891, - "p90": 160.3199988603592, - "p95": 162.88000345230103, - "p99": 167.1680063009262 + "p50": 1622.8159666061401, + "p90": 1629.3760538101196, + "p95": 1632.2239637374878, + "p99": 1643.3279514312744 }, "isolatedSum": { - "p50": 146.59199863672256, - "p90": 183.96800011396408, - "p95": 192.60799884796143, - "p99": 197.40799814462662 + "p50": 1668.7679290771484, + "p90": 1689.6000504493713, + "p95": 1693.407952785492, + "p99": 1745.472013950348 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 6, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3f3c8c0f", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_2e44c039", + "comparisonKey": "5c8a1b2520d6dc6d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T09:50:12.421760+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28285656632", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285656632", + "createdAt": "2026-06-27T09:50:12.421760+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.47199648618698, + "p90": 95.67999839782715, + "p95": 98.65599870681763, + "p99": 132.1599930524826 + }, + "combine": { + "p50": 116.83200299739838, + "p90": 126.30400061607361, + "p95": 126.88000500202179, + "p99": 138.047993183136 + }, + "roundtrip": { + "p50": 196.51199877262115, + "p90": 204.25599813461304, + "p95": 207.5520008802414, + "p99": 222.71999716758728 + }, + "isolatedSum": { + "p50": 210.30399948358536, + "p90": 221.98399901390076, + "p95": 225.53600370883942, + "p99": 270.2079862356186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 70.30399888753891, - "p90": 78.20799946784973, - "p95": 81.02399855852127, - "p99": 89.4400030374527 + "p50": 127.6479959487915, + "p90": 131.1040073633194, + "p95": 133.95200669765472, + "p99": 144.57599818706512 }, "combine": { - "p50": 72.7040022611618, - "p90": 73.91999661922455, - "p95": 74.27199929952621, - "p99": 79.58400249481201 + "p50": 174.55999553203583, + "p90": 176.7680048942566, + "p95": 177.279993891716, + "p99": 179.32799458503723 }, "roundtrip": { - "p50": 128.67200374603271, - "p90": 132.83200562000275, - "p95": 135.0719928741455, - "p99": 140.22399485111237 + "p50": 283.29598903656006, + "p90": 288.12798857688904, + "p95": 290.1439964771271, + "p99": 312.73600459098816 }, "isolatedSum": { - "p50": 143.0080011487007, - "p90": 152.12799608707428, - "p95": 155.29599785804749, - "p99": 169.0240055322647 + "p50": 302.20799148082733, + "p90": 307.872012257576, + "p95": 311.2320005893707, + "p99": 323.90399277210236 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 6, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 82.97599852085114, - "p90": 102.14400291442871, - "p95": 104.70400005578995, - "p99": 109.56799983978271 + "p50": 186.65599822998047, + "p90": 191.16799533367157, + "p95": 194.97600197792053, + "p99": 212.44800090789795 }, "combine": { - "p50": 74.30399954319, - "p90": 87.87199854850769, - "p95": 89.12000060081482, - "p99": 89.9519994854927 + "p50": 311.3279938697815, + "p90": 313.24800848960876, + "p95": 314.7520124912262, + "p99": 326.911985874176 }, "roundtrip": { - "p50": 132.4480026960373, - "p90": 161.47199273109436, - "p95": 163.26400637626648, - "p99": 166.9120043516159 + "p50": 479.74398732185364, + "p90": 486.7520034313202, + "p95": 488.8960123062134, + "p99": 497.79200553894043 }, "isolatedSum": { - "p50": 157.27999806404114, - "p90": 190.0160014629364, - "p95": 193.82400065660477, - "p99": 199.51999932527542 + "p50": 497.98399209976196, + "p90": 504.41600382328033, + "p95": 509.72801446914673, + "p99": 539.359986782074 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 5, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 81.40800148248672, - "p90": 103.80800068378448, - "p95": 105.3759977221489, - "p99": 108.0000028014183 + "p50": 311.42398715019226, + "p90": 318.62398982048035, + "p95": 323.13600182533264, + "p99": 337.15200424194336 }, "combine": { - "p50": 79.77599650621414, - "p90": 90.08000046014786, - "p95": 90.71999788284302, - "p99": 247.67999351024628 + "p50": 583.6480259895325, + "p90": 594.3679809570312, + "p95": 596.671998500824, + "p99": 632.6079964637756 }, "roundtrip": { - "p50": 138.17599415779114, - "p90": 156.3519984483719, - "p95": 159.7760021686554, - "p99": 163.83999586105347 + "p50": 887.4239921569824, + "p90": 891.9680118560791, + "p95": 893.6960101127625, + "p99": 918.4960126876831 }, "isolatedSum": { - "p50": 161.18399798870087, - "p90": 193.88800114393234, - "p95": 196.0959956049919, - "p99": 355.6799963116646 + "p50": 895.0720131397247, + "p90": 912.9919707775116, + "p95": 919.8080003261566, + "p99": 969.760000705719 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 99.74399954080582, - "p90": 115.35999923944473, - "p95": 117.37599968910217, - "p99": 125.2799928188324 + "p50": 568.2560205459595, + "p90": 577.0559906959534, + "p95": 585.6639742851257, + "p99": 622.0160126686096 }, "combine": { - "p50": 90.55999666452408, - "p90": 103.61599922180176, - "p95": 104.19200360774994, - "p99": 104.8320010304451 + "p50": 1099.1679430007935, + "p90": 1110.0800037384033, + "p95": 1111.1040115356445, + "p99": 1136.8639469146729 }, "roundtrip": { - "p50": 163.87200355529785, - "p90": 178.0479997396469, - "p95": 180.2240014076233, - "p99": 185.47199666500092 + "p50": 1613.2479906082153, + "p90": 1620.7040548324585, + "p95": 1624.2239475250244, + "p99": 1674.720048904419 }, "isolatedSum": { - "p50": 190.3039962053299, - "p90": 218.9759984612465, - "p95": 221.5680032968521, - "p99": 230.1119938492775 + "p50": 1667.423963546753, + "p90": 1687.1359944343567, + "p95": 1696.7679858207703, + "p99": 1758.8799595832825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 113.66400122642517, - "p90": 132.22399353981018, - "p95": 133.88800621032715, - "p99": 139.64800536632538 + "p50": 1112.2239828109741, + "p90": 1126.8800497055054, + "p95": 1135.7439756393433, + "p99": 1233.247995376587 }, "combine": { - "p50": 106.59199953079224, - "p90": 114.75200206041336, - "p95": 119.99999731779099, - "p99": 121.91999703645706 + "p50": 2068.864107131958, + "p90": 2072.096109390259, + "p95": 2080.4800987243652, + "p99": 2143.2321071624756 }, "roundtrip": { - "p50": 198.91199469566345, - "p90": 213.69600296020508, - "p95": 216.0319983959198, - "p99": 220.60799598693848 + "p50": 3127.5839805603027, + "p90": 3139.359951019287, + "p95": 3147.6480960845947, + "p99": 3192.70396232605 }, "isolatedSum": { - "p50": 220.2560007572174, - "p90": 246.97599560022354, - "p95": 253.88800352811813, - "p99": 261.56800240278244 + "p50": 3181.088089942932, + "p90": 3198.976159095764, + "p95": 3216.2240743637085, + "p99": 3376.4801025390625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 6, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -22244,50 +23642,51 @@ ] }, { - "id": "cx-4eb12954", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_5df912ff", - "comparisonKey": "5074d4febd922e2d", + "id": "cx-861c4f52", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_6d2e4735", + "comparisonKey": "e4e20084a0948dac", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:28:11.272284+00:00", + "generatedAt": "2026-06-27T09:50:16.626677+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", - "sku": "h100", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) [cl]", + "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -22296,312 +23695,238 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "46855e7fa6754eb", + "workloadId": "set:6:1ca614e23cc66be1", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254332840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", - "createdAt": "2026-06-26T17:27:12Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285658973", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285658973", + "createdAt": "2026-06-27T09:50:16.626677+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 78.91199737787247, - "p90": 85.21600067615509, - "p95": 87.20000088214874, - "p99": 93.34400296211243 + "p50": 94.30400282144547, + "p90": 96.03200107812881, + "p95": 97.88800030946732, + "p99": 106.55999928712845 }, "combine": { - "p50": 79.68000322580338, - "p90": 81.60000294446945, - "p95": 86.91199868917465, - "p99": 88.54400366544724 + "p50": 114.75200206041336, + "p90": 115.77600240707397, + "p95": 116.54400080442429, + "p99": 125.98399817943573 }, "roundtrip": { - "p50": 133.69600474834442, - "p90": 141.184002161026, - "p95": 143.2960033416748, - "p99": 151.48800611495972 + "p50": 192.25600361824036, + "p90": 196.1279958486557, + "p95": 198.11199605464935, + "p99": 216.19200706481934 }, "isolatedSum": { - "p50": 158.59200060367584, - "p90": 166.81600362062454, - "p95": 174.1119995713234, - "p99": 181.88800662755966 + "p50": 209.05600488185883, + "p90": 211.8080034852028, + "p95": 214.4320011138916, + "p99": 232.54399746656418 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 62.39999830722809, - "p90": 84.35200154781342, - "p95": 87.00799942016602, - "p99": 96.57599776983261 - }, - "combine": { - "p50": 71.99999690055847, - "p90": 81.02399855852127, - "p95": 81.44000172615051, - "p99": 87.80799806118011 - }, - "roundtrip": { - "p50": 116.7680025100708, - "p90": 140.00000059604645, - "p95": 141.6960060596466, - "p99": 143.96800100803375 - }, - "isolatedSum": { - "p50": 134.39999520778656, - "p90": 165.3760001063347, - "p95": 168.44800114631653, - "p99": 184.38399583101273 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 62.01599910855293, - "p90": 82.56000280380249, - "p95": 84.76799726486206, - "p99": 91.90399944782257 - }, - "combine": { - "p50": 72.89600372314453, - "p90": 86.94399893283844, - "p95": 87.61599659919739, - "p99": 88.22400122880936 - }, - "roundtrip": { - "p50": 116.57600104808807, - "p90": 143.13599467277527, - "p95": 144.96000111103058, - "p99": 189.40800428390503 - }, - "isolatedSum": { - "p50": 134.91200283169746, - "p90": 169.50400173664093, - "p95": 172.38399386405945, - "p99": 180.12800067663193 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 66.78400188684464, - "p90": 82.46400207281113, - "p95": 85.1840004324913, - "p99": 90.65599739551544 + "p50": 132.83200562000275, + "p90": 138.49599659442902, + "p95": 140.6400054693222, + "p99": 152.6080071926117 }, "combine": { - "p50": 73.02399724721909, - "p90": 86.87999844551086, - "p95": 87.55200356245041, - "p99": 88.57599645853043 + "p50": 155.20000457763672, + "p90": 163.83999586105347, + "p95": 164.19200599193573, + "p99": 166.9439971446991 }, "roundtrip": { - "p50": 116.67200177907944, - "p90": 142.4960047006607, - "p95": 143.64799857139587, - "p99": 149.1200029850006 + "p50": 272.4800109863281, + "p90": 279.87200021743774, + "p95": 287.1040105819702, + "p99": 306.5919876098633 }, "isolatedSum": { - "p50": 139.80799913406372, - "p90": 169.344000518322, - "p95": 172.7360039949417, - "p99": 179.23199385404587 + "p50": 288.03201019763947, + "p90": 302.3359924554825, + "p95": 304.83201146125793, + "p99": 319.5520043373108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 78.97599786520004, - "p90": 84.83199775218964, - "p95": 86.94399893283844, - "p99": 90.87999910116196 + "p50": 192.671999335289, + "p90": 199.48799908161163, + "p95": 201.05600357055664, + "p99": 214.27200734615326 }, "combine": { - "p50": 80.4160013794899, - "p90": 87.99999952316284, - "p95": 88.25600147247314, - "p99": 89.75999802350998 + "p50": 274.2080092430115, + "p90": 277.5680124759674, + "p95": 285.95200181007385, + "p99": 298.335999250412 }, "roundtrip": { - "p50": 116.73600226640701, - "p90": 140.00000059604645, - "p95": 143.23200285434723, - "p99": 146.94400131702423 + "p50": 444.0639913082123, + "p90": 448.63998889923096, + "p95": 450.9119987487793, + "p99": 470.91200947761536 }, "isolatedSum": { - "p50": 159.39199924468994, - "p90": 172.83199727535248, - "p95": 175.20000040531158, - "p99": 180.63999712467194 + "p50": 466.8800085783005, + "p90": 477.05601155757904, + "p95": 487.0080053806305, + "p99": 512.6080065965652 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 75.58400183916092, - "p90": 84.6719965338707, - "p95": 86.20800077915192, - "p99": 90.97599983215332 + "p50": 326.84800028800964, + "p90": 330.6240141391754, + "p95": 334.3679904937744, + "p99": 394.3359851837158 }, "combine": { - "p50": 80.19199967384338, - "p90": 88.51200342178345, - "p95": 95.10400146245956, - "p99": 111.77600175142288 + "p50": 469.63199973106384, + "p90": 471.1039960384369, + "p95": 472.7039933204651, + "p99": 483.13599824905396 }, "roundtrip": { - "p50": 143.16800236701965, - "p90": 153.28000485897064, - "p95": 154.7520011663437, - "p99": 170.6240028142929 + "p50": 772.4480032920837, + "p90": 781.7919850349426, + "p95": 785.2159738540649, + "p99": 801.2480139732361 }, "isolatedSum": { - "p50": 155.7760015130043, - "p90": 173.18399995565414, - "p95": 181.31200224161148, - "p99": 202.7520015835762 + "p50": 796.4800000190735, + "p90": 801.7280101776123, + "p95": 807.0719838142395, + "p99": 877.4719834327698 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 85.15200018882751, - "p90": 96.67199850082397, - "p95": 98.30400347709656, - "p99": 158.65600109100342 + "p50": 581.6959738731384, + "p90": 585.9519839286804, + "p95": 589.792013168335, + "p99": 671.8400120735168 }, "combine": { - "p50": 91.20000153779984, - "p90": 105.02400249242783, - "p95": 106.04800283908844, - "p99": 127.87200510501862 + "p50": 828.2240033149719, + "p90": 838.8159871101379, + "p95": 839.6160006523132, + "p99": 850.8480191230774 }, "roundtrip": { - "p50": 151.8079936504364, - "p90": 167.67999529838562, - "p95": 172.06400632858276, - "p99": 198.2399970293045 + "p50": 1393.4400081634521, + "p90": 1402.4319648742676, + "p95": 1406.6879749298096, + "p99": 1428.1920194625854 }, "isolatedSum": { - "p50": 176.35200172662735, - "p90": 201.6960009932518, - "p95": 204.352006316185, - "p99": 286.52800619602203 + "p50": 1409.9199771881104, + "p90": 1424.7679710388184, + "p95": 1429.4080138206482, + "p99": 1522.6880311965942 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 104.73600029945374, - "p90": 119.64800208806992, - "p95": 128.03199887275696, - "p99": 401.43999457359314 + "p50": 1084.3839645385742, + "p90": 1092.7679538726807, + "p95": 1101.5679836273193, + "p99": 1113.6959791183472 }, "combine": { - "p50": 106.49599879980087, - "p90": 120.83200365304947, - "p95": 121.47200107574463, - "p99": 128.00000607967377 + "p50": 1567.4560070037842, + "p90": 1576.8959522247314, + "p95": 1578.976035118103, + "p99": 1629.3120384216309 }, "roundtrip": { - "p50": 187.45599687099457, - "p90": 201.34399831295013, - "p95": 202.55999267101288, - "p99": 206.68800175189972 + "p50": 2638.4639739990234, + "p90": 2648.47993850708, + "p95": 2653.088092803955, + "p99": 2690.3679370880127 }, "isolatedSum": { - "p50": 211.2319990992546, - "p90": 240.48000574111938, - "p95": 249.5039999485016, - "p99": 529.4400006532669 + "p50": 2651.8399715423584, + "p90": 2669.663906097412, + "p95": 2680.5440187454224, + "p99": 2743.008017539978 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -22610,34 +23935,35 @@ ] }, { - "id": "cx-76b84ec2", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_17694d2c", - "comparisonKey": "d31efe4aa43e0223", + "id": "cx-cae00445", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_7ab35d34", + "comparisonKey": "d9d28463325111a5", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:16.080205+00:00", + "generatedAt": "2026-06-27T09:49:16.226066+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", + "runner": "b300-nv_11", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 [cl]", + "label": "B300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -22647,9 +23973,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -22662,8 +23988,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "cf93f8f6b52e428", + "workloadId": "set:6:a224603e5a1640b8", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -22671,354 +23997,281 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271551406", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271551406", - "createdAt": "2026-06-26T23:46:18Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285635254", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285635254", + "createdAt": "2026-06-27T09:49:16.226066+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 77.7600035071373, - "p90": 84.25600081682205, - "p95": 86.496002972126, - "p99": 92.57599711418152 + "p50": 102.78400033712387, + "p90": 104.80000078678131, + "p95": 105.66399991512299, + "p99": 110.23999750614166 }, "combine": { - "p50": 75.9039968252182, - "p90": 81.95199817419052, - "p95": 82.40000158548355, - "p99": 87.2960016131401 + "p50": 126.65599584579468, + "p90": 128.09599936008453, + "p95": 128.89599800109863, + "p99": 141.85599982738495 }, "roundtrip": { - "p50": 131.45600259304047, - "p90": 136.25599443912506, - "p95": 138.59200477600098, - "p99": 142.68800616264343 + "p50": 205.85599541664124, + "p90": 213.15200626850128, + "p95": 215.55200219154358, + "p99": 228.15999388694763 }, "isolatedSum": { - "p50": 153.6640003323555, - "p90": 166.20799899101257, - "p95": 168.89600455760956, - "p99": 179.87199872732162 + "p50": 229.43999618291855, + "p90": 232.89600014686584, + "p95": 234.55999791622162, + "p99": 252.0959973335266 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 66.23999774456024, - "p90": 80.99199831485748, - "p95": 83.13599973917007, - "p99": 87.52000331878662 + "p50": 139.8719996213913, + "p90": 141.63200557231903, + "p95": 143.23200285434723, + "p99": 152.8320014476776 }, "combine": { - "p50": 72.06399738788605, - "p90": 81.85599744319916, - "p95": 82.11199939250946, - "p99": 85.91999858617783 + "p50": 176.9919991493225, + "p90": 186.8479996919632, + "p95": 187.96800076961517, + "p99": 201.05600357055664 }, "roundtrip": { - "p50": 115.55200070142746, - "p90": 136.06399297714233, - "p95": 137.9839926958084, - "p99": 142.4960047006607 + "p50": 305.5039942264557, + "p90": 311.2640082836151, + "p95": 312.1599853038788, + "p99": 315.8400058746338 }, "isolatedSum": { - "p50": 138.3039951324463, - "p90": 162.84799575805664, - "p95": 165.24799913167953, - "p99": 173.44000190496445 + "p50": 316.8639987707138, + "p90": 328.4800052642822, + "p95": 331.2000036239624, + "p99": 353.88800501823425 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 3, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 77.60000228881836, - "p90": 81.69600367546082, - "p95": 83.93599838018417, - "p99": 89.02399986982346 + "p50": 213.6639952659607, + "p90": 221.88800573349, + "p95": 222.91199862957, + "p99": 224.63999688625336 }, "combine": { - "p50": 79.52000200748444, - "p90": 82.20800012350082, - "p95": 83.16799998283386, - "p99": 87.2960016131401 + "p50": 326.4960050582886, + "p90": 335.55200695991516, + "p95": 336.8639945983887, + "p99": 396.9919979572296 }, "roundtrip": { - "p50": 133.82400572299957, - "p90": 140.86399972438812, - "p95": 143.10400187969208, - "p99": 149.72800016403198 + "p50": 522.2079753875732, + "p90": 529.8879742622375, + "p95": 531.4239859580994, + "p99": 539.2640233039856 }, "isolatedSum": { - "p50": 157.1200042963028, - "p90": 163.90400379896164, - "p95": 167.10399836301804, - "p99": 176.32000148296356 + "p50": 540.1600003242493, + "p90": 557.4400126934052, + "p95": 559.7759932279587, + "p99": 621.631994843483 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 77.66400277614594, - "p90": 83.13599973917007, - "p95": 87.8399983048439, - "p99": 131.67999684810638 + "p50": 355.6160032749176, + "p90": 362.9760146141052, + "p95": 364.8639917373657, + "p99": 368.3519959449768 }, "combine": { - "p50": 81.216000020504, - "p90": 82.71999657154083, - "p95": 84.03199911117554, - "p99": 90.20800143480301 + "p50": 569.4720149040222, + "p90": 572.7360248565674, + "p95": 580.7039737701416, + "p99": 594.1759943962097 }, "roundtrip": { - "p50": 134.68800485134125, - "p90": 139.55199718475342, - "p95": 142.752006649971, - "p99": 145.56799829006195 + "p50": 920.2880263328552, + "p90": 929.0239810943604, + "p95": 936.7679953575134, + "p99": 955.5839896202087 }, "isolatedSum": { - "p50": 158.88000279664993, - "p90": 165.8559963107109, - "p95": 171.87199741601944, - "p99": 221.8879982829094 + "p50": 925.0880181789398, + "p90": 935.7120394706726, + "p95": 945.5679655075073, + "p99": 962.5279903411865 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 77.79199630022049, - "p90": 81.66400343179703, - "p95": 84.73599702119827, - "p99": 87.23200112581253 + "p50": 658.6880087852478, + "p90": 667.9679751396179, + "p95": 669.2479848861694, + "p99": 679.6159744262695 }, "combine": { - "p50": 81.69600367546082, - "p90": 84.79999750852585, - "p95": 88.95999938249588, - "p99": 90.27200192213058 + "p50": 1048.1280088424683, + "p90": 1052.191972732544, + "p95": 1061.1519813537598, + "p99": 1171.712040901184 }, "roundtrip": { - "p50": 135.29600203037262, - "p90": 143.5839980840683, - "p95": 144.96000111103058, - "p99": 150.30400454998016 + "p50": 1691.648006439209, + "p90": 1700.1279592514038, + "p95": 1703.5839557647705, + "p99": 1764.7039890289307 }, "isolatedSum": { - "p50": 159.4879999756813, - "p90": 166.46400094032288, - "p95": 173.69599640369415, - "p99": 177.50400304794312 + "p50": 1706.816017627716, + "p90": 1720.1599478721619, + "p95": 1730.3999662399292, + "p99": 1851.3280153274536 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 83.42400193214417, - "p90": 88.3840024471283, - "p95": 89.28000181913376, - "p99": 95.20000219345093 + "p50": 1285.375952720642, + "p90": 1298.6559867858887, + "p95": 1301.2160062789917, + "p99": 1428.063988685608 }, "combine": { - "p50": 81.44000172615051, - "p90": 89.9839997291565, - "p95": 90.27200192213058, - "p99": 92.47999638319016 + "p50": 2018.496036529541, + "p90": 2022.7839946746826, + "p95": 2031.3599109649658, + "p99": 2082.4639797210693 }, "roundtrip": { - "p50": 129.18399274349213, - "p90": 144.51199769973755, - "p95": 147.0080018043518, - "p99": 152.73599326610565 + "p50": 3294.048070907593, + "p90": 3308.799982070923, + "p95": 3315.9360885620117, + "p99": 3368.2239055633545 }, "isolatedSum": { - "p50": 164.86400365829468, - "p90": 178.3680021762848, - "p95": 179.55200374126434, - "p99": 187.67999857664108 + "p50": 3303.871989250183, + "p90": 3321.4399814605713, + "p95": 3332.5759172439575, + "p99": 3510.5279684066772 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 93.56799721717834, - "p90": 101.40799731016159, - "p95": 102.36799716949463, - "p99": 109.47199910879135 - }, - "combine": { - "p50": 94.81599926948547, - "p90": 99.61599856615067, - "p95": 102.33599692583084, - "p99": 105.82400113344193 - }, - "roundtrip": { - "p50": 158.78400206565857, - "p90": 165.72800278663635, - "p95": 167.04000532627106, - "p99": 170.01600563526154 - }, - "isolatedSum": { - "p50": 188.38399648666382, - "p90": 201.02399587631226, - "p95": 204.70399409532547, - "p99": 215.29600024223328 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 112.41599917411804, - "p90": 120.4800009727478, - "p95": 123.48800152540207, - "p99": 303.6800026893616 - }, - "combine": { - "p50": 111.90400272607803, - "p90": 117.34399944543839, - "p95": 120.03199756145477, - "p99": 125.08800625801086 - }, - "roundtrip": { - "p50": 192.80000030994415, - "p90": 199.74400103092194, - "p95": 201.9519954919815, - "p99": 206.9759964942932 - }, - "isolatedSum": { - "p50": 224.32000190019608, - "p90": 237.8240004181862, - "p95": 243.51999908685684, - "p99": 428.76800894737244 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 } ] }, { - "id": "cx-6f4d88a5", - "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_8abde1a9", - "comparisonKey": "a63125ec759ccc03", + "id": "cx-17599843", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_5e3d915a", + "comparisonKey": "0397aa2abeee044f", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:24.132792+00:00", + "generatedAt": "2026-06-27T09:49:19.827351+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "runner": "b300-nv_05", + "sku": "b300", "backend": "deepep", - "phase": "decode", - "mode": "ll", + "phase": "prefill", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 LL", + "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -23028,313 +24281,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "27ddc85ded0add9", + "workloadId": "set:6:a224603e5a1640b8", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271587010", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271587010", - "createdAt": "2026-06-26T23:47:25Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285637742", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285637742", + "createdAt": "2026-06-27T09:49:19.827351+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 47.29599878191948, - "p90": 48.70399832725525, - "p95": 49.02400076389313, - "p99": 54.75199967622757 - }, - "combine": { - "p50": 36.57599911093712, - "p90": 37.408001720905304, - "p95": 38.59199956059456, - "p99": 44.60800066590309 - }, - "roundtrip": { - "p50": 58.97599831223488, - "p90": 66.6240006685257, - "p95": 67.1359971165657, - "p99": 67.6800012588501 - }, - "isolatedSum": { - "p50": 83.8719978928566, - "p90": 86.11200004816055, - "p95": 87.61600032448769, - "p99": 99.36000034213066 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 40.32000154256821, - "p90": 48.51200059056282, - "p95": 48.73599857091904, - "p99": 53.82400006055832 + "p50": 94.94400024414062, + "p90": 96.89600020647049, + "p95": 99.16800260543823, + "p99": 108.99200290441513 }, "combine": { - "p50": 35.77600046992302, - "p90": 37.02399879693985, - "p95": 38.94399851560593, - "p99": 44.47999969124794 + "p50": 115.4559999704361, + "p90": 116.80000275373459, + "p95": 117.76000261306763, + "p99": 127.6479959487915 }, "roundtrip": { - "p50": 56.57599866390228, - "p90": 65.05600363016129, - "p95": 66.27199798822403, - "p99": 67.07199662923813 + "p50": 193.6960071325302, + "p90": 199.2959976196289, + "p95": 201.75999402999878, + "p99": 233.11999440193176 }, "isolatedSum": { - "p50": 76.09600201249123, - "p90": 85.53599938750267, - "p95": 87.67999708652496, - "p99": 98.30399975180626 + "p50": 210.40000021457672, + "p90": 213.69600296020508, + "p95": 216.92800521850586, + "p99": 236.63999885320663 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.27200150489807, - "p90": 48.70399832725525, - "p95": 49.056001007556915, - "p99": 55.39200082421303 - }, - "combine": { - "p50": 36.70400008559227, - "p90": 37.50399872660637, - "p95": 43.07200014591217, - "p99": 45.05600035190582 - }, - "roundtrip": { - "p50": 59.167999774217606, - "p90": 66.880002617836, - "p95": 67.45599955320358, - "p99": 68.57600063085556 - }, - "isolatedSum": { - "p50": 78.97600159049034, - "p90": 86.20799705386162, - "p95": 92.12800115346909, - "p99": 100.44800117611885 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 47.359999269247055, - "p90": 48.70399832725525, - "p95": 48.895999789237976, - "p99": 55.26399984955788 + "p50": 132.25600123405457, + "p90": 138.62399756908417, + "p95": 140.06400108337402, + "p99": 148.28799664974213 }, "combine": { - "p50": 36.57599911093712, - "p90": 43.2640016078949, - "p95": 43.776001781225204, - "p99": 45.024000108242035 + "p50": 163.4880006313324, + "p90": 164.73600268363953, + "p95": 165.53600132465363, + "p99": 188.48000466823578 }, "roundtrip": { - "p50": 64.67200070619583, - "p90": 67.10399687290192, - "p95": 67.29599833488464, - "p99": 69.47200000286102 + "p50": 273.3759880065918, + "p90": 280.89600801467896, + "p95": 283.4239900112152, + "p99": 295.0400114059448 }, "isolatedSum": { - "p50": 83.93599838018417, - "p90": 91.96799993515015, - "p95": 92.67200157046318, - "p99": 100.28799995779991 + "p50": 295.74400186538696, + "p90": 303.3600002527237, + "p95": 305.60000240802765, + "p99": 336.7680013179779 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 2, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 48.448000103235245, - "p90": 55.64799904823303, - "p95": 56.2559999525547, - "p99": 56.89600110054016 + "p50": 193.63200664520264, + "p90": 196.83200120925903, + "p95": 202.55999267101288, + "p99": 238.27199637889862 }, "combine": { - "p50": 43.776001781225204, - "p90": 44.73600164055824, - "p95": 44.89599913358688, - "p99": 48.22399839758873 + "p50": 264.384001493454, + "p90": 274.2399871349335, + "p95": 274.9119997024536, + "p99": 299.6160089969635 }, "roundtrip": { - "p50": 66.880002617836, - "p90": 73.82400333881378, - "p95": 74.68800246715546, - "p99": 75.29599964618683 + "p50": 442.78401136398315, + "p90": 448.4800100326538, + "p95": 453.8559913635254, + "p99": 481.1199903488159 }, "isolatedSum": { - "p50": 92.22400188446045, - "p90": 100.38400068879128, - "p95": 101.15199908614159, - "p99": 105.11999949812889 + "p50": 458.0160081386566, + "p90": 471.0719883441925, + "p95": 477.4719923734665, + "p99": 537.8880053758621 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 0, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 49.60000142455101, - "p90": 56.8000003695488, - "p95": 57.08799883723259, - "p99": 59.167999774217606 + "p50": 326.33599638938904, + "p90": 329.21600341796875, + "p95": 330.1120102405548, + "p99": 349.08801317214966 }, "combine": { - "p50": 51.00800096988678, - "p90": 52.86400020122528, - "p95": 53.0879981815815, - "p99": 53.98400127887726 + "p50": 459.26401019096375, + "p90": 470.43201327323914, + "p95": 471.48799896240234, + "p99": 483.68000984191895 }, "roundtrip": { - "p50": 75.39200037717819, - "p90": 83.26400071382523, - "p95": 83.74399691820145, - "p99": 84.63999629020691 + "p50": 768.5440182685852, + "p90": 775.3919959068298, + "p95": 784.3199968338013, + "p99": 826.6239762306213 }, "isolatedSum": { - "p50": 100.60800239443779, - "p90": 109.66400057077408, - "p95": 110.17599701881409, - "p99": 113.15200105309486 + "p50": 785.6000065803528, + "p90": 799.6480166912079, + "p95": 801.6000092029572, + "p99": 832.7680230140686 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 2, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 64.92800265550613, - "p90": 67.45599955320358, - "p95": 72.41600006818771, - "p99": 74.0479975938797 + "p50": 565.6960010528564, + "p90": 575.6480097770691, + "p95": 583.6799740791321, + "p99": 610.4320287704468 }, "combine": { - "p50": 61.055999249219894, - "p90": 63.1679967045784, - "p95": 68.54400038719177, - "p99": 77.18399912118912 + "p50": 815.7439827919006, + "p90": 827.8719782829285, + "p95": 830.6559920310974, + "p99": 852.6080250740051 }, "roundtrip": { - "p50": 105.76000064611435, - "p90": 108.67200046777725, - "p95": 109.18399691581726, - "p99": 113.69600147008896 + "p50": 1371.8719482421875, + "p90": 1386.7199420928955, + "p95": 1397.7919816970825, + "p99": 1450.4319429397583 }, "isolatedSum": { - "p50": 125.98400190472603, - "p90": 130.62399625778198, - "p95": 140.9600004553795, - "p99": 151.23199671506882 + "p50": 1381.439983844757, + "p90": 1403.5199880599976, + "p95": 1414.3359661102295, + "p99": 1463.040053844452 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 2, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 86.04799956083298, - "p90": 91.71199798583984, - "p95": 92.83199906349182, - "p99": 94.62399780750275 + "p50": 1069.599986076355, + "p90": 1080.8639526367188, + "p95": 1088.8639688491821, + "p99": 1109.0879440307617 }, "combine": { - "p50": 94.36800330877304, - "p90": 96.79999947547913, - "p95": 97.82399982213974, - "p99": 218.78400444984436 + "p50": 1531.3600301742554, + "p90": 1552.8000593185425, + "p95": 1564.2240047454834, + "p99": 1616.8960332870483 }, "roundtrip": { - "p50": 152.8960019350052, - "p90": 158.91200304031372, - "p95": 159.67999398708344, - "p99": 163.2000058889389 + "p50": 2586.0159397125244, + "p90": 2608.6719036102295, + "p95": 2621.151924133301, + "p99": 2671.7441082000732 }, "isolatedSum": { - "p50": 180.41600286960602, - "p90": 188.51199746131897, - "p95": 190.65599888563156, - "p99": 313.4080022573471 + "p50": 2600.9600162506104, + "p90": 2633.6640119552612, + "p95": 2653.0879735946655, + "p99": 2725.98397731781 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 0, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -23342,34 +24521,35 @@ ] }, { - "id": "cx-fecf5035", - "identity": "h100|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_45e1ef29", - "comparisonKey": "b17b52153b29fbde", + "id": "cx-4c124953", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_fdf55523", + "comparisonKey": "61f6ca66d0cc490b", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:28.951078+00:00", + "generatedAt": "2026-06-27T09:49:50.578369+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", + "runner": "b300-nv_07", + "sku": "b300", "backend": "deepep", - "phase": "decode", - "mode": "ll", + "phase": "prefill", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 LL", + "label": "B300 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -23379,12 +24559,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -23394,8 +24574,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:6709a02c31933a9f", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -23403,304 +24583,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271590306", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271590306", - "createdAt": "2026-06-26T23:47:32Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285646148", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285646148", + "createdAt": "2026-06-27T09:49:50.578369+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 42.17600077390671, - "p90": 48.928000032901764, - "p95": 49.8879998922348, - "p99": 51.77599936723709 + "p50": 100.73599964380264, + "p90": 103.16800326108932, + "p95": 105.12000322341919, + "p99": 109.53599959611893 }, "combine": { - "p50": 36.99199855327606, - "p90": 38.176000118255615, - "p95": 38.40000182390213, - "p99": 44.03200000524521 + "p50": 126.20800733566284, + "p90": 127.71199643611908, + "p95": 128.25599312782288, + "p99": 138.65600526332855 }, "roundtrip": { - "p50": 59.42400172352791, - "p90": 61.216000467538834, - "p95": 61.63199990987778, - "p99": 69.31199878454208 + "p50": 208.3200067281723, + "p90": 212.70400285720825, + "p95": 213.50400149822235, + "p99": 231.04000091552734 }, "isolatedSum": { - "p50": 79.16799932718277, - "p90": 87.10400015115738, - "p95": 88.28800171613693, - "p99": 95.8079993724823 + "p50": 226.94400697946548, + "p90": 230.8799996972084, + "p95": 233.37599635124207, + "p99": 248.19200485944748 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 4, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 42.30400174856186, - "p90": 49.375999718904495, - "p95": 49.95200037956238, - "p99": 51.80799961090088 + "p50": 135.93600690364838, + "p90": 139.39200341701508, + "p95": 140.83200693130493, + "p99": 151.7760008573532 }, "combine": { - "p50": 38.11199963092804, - "p90": 39.0079990029335, - "p95": 39.84000161290169, - "p99": 45.9199994802475 + "p50": 176.86399817466736, + "p90": 179.07199263572693, + "p95": 180.03199994564056, + "p99": 189.63199853897095 }, "roundtrip": { - "p50": 60.47999858856201, - "p90": 61.69600039720535, - "p95": 63.90400230884552, - "p99": 69.21599805355072 + "p50": 297.63200879096985, + "p90": 303.3599853515625, + "p95": 305.63199520111084, + "p99": 315.71200489997864 }, "isolatedSum": { - "p50": 80.4160013794899, - "p90": 88.383998721838, - "p95": 89.79200199246407, - "p99": 97.72799909114838 + "p50": 312.80000507831573, + "p90": 318.463996052742, + "p95": 320.8640068769455, + "p99": 341.40799939632416 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 4, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 42.367998510599136, - "p90": 49.855999648571014, - "p95": 50.20799860358238, - "p99": 57.95200169086456 + "p50": 202.4639993906021, + "p90": 210.207998752594, + "p95": 211.2320065498352, + "p99": 216.76799654960632 }, "combine": { - "p50": 37.47199848294258, - "p90": 38.7520007789135, - "p95": 39.03999924659729, - "p99": 46.30399867892265 - }, - "roundtrip": { - "p50": 59.26400050520897, - "p90": 61.983998864889145, - "p95": 63.19999694824219, - "p99": 69.50400024652481 - }, - "isolatedSum": { - "p50": 79.83999699354172, - "p90": 88.60800042748451, - "p95": 89.24799785017967, - "p99": 104.25600036978722 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.81599819660187, - "p90": 49.247998744249344, - "p95": 49.855999648571014, - "p99": 51.42400041222572 - }, - "combine": { - "p50": 37.9519984126091, - "p90": 38.784001022577286, - "p95": 40.352001786231995, - "p99": 46.39999940991402 - }, - "roundtrip": { - "p50": 60.63999980688095, - "p90": 68.35199892520905, - "p95": 68.80000233650208, - "p99": 69.88800317049026 - }, - "isolatedSum": { - "p50": 80.76799660921097, - "p90": 88.03199976682663, - "p95": 90.20800143480301, - "p99": 97.82399982213974 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 43.74400153756142, - "p90": 50.23999884724617, - "p95": 50.84799975156784, - "p99": 57.18399956822395 - }, - "combine": { - "p50": 38.2080003619194, - "p90": 45.791998505592346, - "p95": 46.08000069856644, - "p99": 49.056001007556915 + "p50": 325.1520097255707, + "p90": 335.07201075553894, + "p95": 335.7760012149811, + "p99": 359.23200845718384 }, "roundtrip": { - "p50": 66.91200286149979, - "p90": 69.15199756622314, - "p95": 69.98399645090103, - "p99": 76.7040029168129 + "p50": 506.84797763824463, + "p90": 513.5999917984009, + "p95": 517.7599787712097, + "p99": 538.4640097618103 }, "isolatedSum": { - "p50": 81.95200189948082, - "p90": 96.03199735283852, - "p95": 96.92800045013428, - "p99": 106.24000057578087 + "p50": 527.6160091161728, + "p90": 545.2800095081329, + "p95": 547.0080077648163, + "p99": 576.0000050067902 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 4, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 50.464000552892685, - "p90": 52.352000027894974, - "p95": 57.023998349905014, - "p99": 59.90400165319443 + "p50": 347.55200147628784, + "p90": 352.9599905014038, + "p95": 354.0799915790558, + "p99": 366.2079870700836 }, "combine": { - "p50": 46.68800160288811, - "p90": 48.128001391887665, - "p95": 49.056001007556915, - "p99": 54.84800040721893 + "p50": 582.6560258865356, + "p90": 592.3839807510376, + "p95": 593.4720039367676, + "p99": 617.0560121536255 }, "roundtrip": { - "p50": 76.76800340414047, - "p90": 84.44800227880478, - "p95": 85.21600067615509, - "p99": 86.30400151014328 + "p50": 910.431981086731, + "p90": 917.8879857063293, + "p95": 920.0000166893005, + "p99": 955.6159973144531 }, "isolatedSum": { - "p50": 97.15200215578079, - "p90": 100.48000141978264, - "p95": 106.07999935746193, - "p99": 114.75200206041336 + "p50": 930.2080273628235, + "p90": 945.3439712524414, + "p95": 947.5519955158234, + "p99": 983.2639992237091 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 2, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 61.69600039720535, - "p90": 66.6240006685257, - "p95": 67.55200028419495, - "p99": 73.7600028514862 + "p50": 640.7679915428162, + "p90": 647.4559903144836, + "p95": 654.7520160675049, + "p99": 680.7680130004883 }, "combine": { - "p50": 62.17600032687187, - "p90": 63.551999628543854, - "p95": 64.06400352716446, - "p99": 70.49600034952164 + "p50": 1063.1359815597534, + "p90": 1072.8960037231445, + "p95": 1073.6639499664307, + "p99": 1096.384048461914 }, "roundtrip": { - "p50": 102.11200267076492, - "p90": 109.8560020327568, - "p95": 110.27199774980545, - "p99": 111.39199882745743 + "p50": 1693.8879489898682, + "p90": 1702.7519941329956, + "p95": 1707.0399522781372, + "p99": 1791.648030281067 }, "isolatedSum": { - "p50": 123.87200072407722, - "p90": 130.17600029706955, - "p95": 131.6160038113594, - "p99": 144.25600320100784 + "p50": 1703.9039731025696, + "p90": 1720.3519940376282, + "p95": 1728.4159660339355, + "p99": 1777.1520614624023 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 4, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 85.56800335645676, - "p90": 89.50400352478027, - "p95": 90.14400094747543, - "p99": 95.45599669218063 + "p50": 1251.9999742507935, + "p90": 1263.10396194458, + "p95": 1265.504002571106, + "p99": 1327.9999494552612 }, "combine": { - "p50": 91.45600348711014, - "p90": 99.16800260543823, - "p95": 99.80800002813339, - "p99": 101.05600208044052 + "p50": 2043.5841083526611, + "p90": 2046.623945236206, + "p95": 2055.6159019470215, + "p99": 2118.272066116333 }, "roundtrip": { - "p50": 158.52800011634827, - "p90": 164.60800170898438, - "p95": 166.52800142765045, - "p99": 168.38400065898895 + "p50": 3285.952091217041, + "p90": 3299.0078926086426, + "p95": 3308.896064758301, + "p99": 3355.7119369506836 }, "isolatedSum": { - "p50": 177.0240068435669, - "p90": 188.6720061302185, - "p95": 189.95200097560883, - "p99": 196.51199877262115 + "p50": 3295.5840826034546, + "p90": 3309.727907180786, + "p95": 3321.1199045181274, + "p99": 3446.2720155715942 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 4, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -23708,49 +24814,50 @@ ] }, { - "id": "cx-f1655975", - "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_81ce2214", - "comparisonKey": "16f06985ac4d7bde", + "id": "cx-5c56d46f", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_4eade0db", + "comparisonKey": "0fc5df79c3e0429b", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:24.570568+00:00", + "generatedAt": "2026-06-27T09:49:49.297184+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "runner": "b300-nv_08", + "sku": "b300", "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 LL (norm)", + "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -23760,313 +24867,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:6709a02c31933a9f", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254350430", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254350430", - "createdAt": "2026-06-26T17:27:32Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285648797", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285648797", + "createdAt": "2026-06-27T09:49:49.297184+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 42.49599948525429, - "p90": 50.27199909090996, - "p95": 50.87999999523163, - "p99": 57.920001447200775 - }, - "combine": { - "p50": 37.98399865627289, - "p90": 39.135999977588654, - "p95": 45.3759990632534, - "p99": 46.911999583244324 - }, - "roundtrip": { - "p50": 60.83200126886368, - "p90": 62.272001057863235, - "p95": 67.90400296449661, - "p99": 69.88800317049026 - }, - "isolatedSum": { - "p50": 80.47999814152718, - "p90": 89.40799906849861, - "p95": 96.25599905848503, - "p99": 104.8320010304451 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 49.02400076389313, - "p90": 50.40000006556511, - "p95": 50.87999999523163, - "p99": 57.11999908089638 - }, - "combine": { - "p50": 38.2080003619194, - "p90": 38.84800150990486, - "p95": 39.64800015091896, - "p99": 45.85599899291992 - }, - "roundtrip": { - "p50": 61.216000467538834, - "p90": 67.84000247716904, - "p95": 68.9919963479042, - "p99": 69.88800317049026 - }, - "isolatedSum": { - "p50": 87.23200112581253, - "p90": 89.24800157546997, - "p95": 90.52800014615059, - "p99": 102.9759980738163 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 42.75200143456459, - "p90": 50.04800111055374, - "p95": 50.52800104022026, - "p99": 57.88800120353699 + "p50": 95.0080007314682, + "p90": 97.59999811649323, + "p95": 99.61599856615067, + "p99": 111.16799712181091 }, "combine": { - "p50": 37.9519984126091, - "p90": 38.84800150990486, - "p95": 40.44799879193306, - "p99": 46.52800038456917 + "p50": 115.29599875211716, + "p90": 116.95999652147293, + "p95": 118.8800036907196, + "p99": 139.52000439167023 }, "roundtrip": { - "p50": 60.736000537872314, - "p90": 62.431998550891876, - "p95": 67.9360032081604, - "p99": 70.0799971818924 + "p50": 193.24800372123718, + "p90": 199.42399859428406, + "p95": 200.70399343967438, + "p99": 229.08799350261688 }, "isolatedSum": { - "p50": 80.70399984717369, - "p90": 88.8960026204586, - "p95": 90.97599983215332, - "p99": 104.41600158810616 + "p50": 210.30399948358536, + "p90": 214.55999463796616, + "p95": 218.49600225687027, + "p99": 250.68800151348114 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 5, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 49.12000149488449, - "p90": 50.36799982190132, - "p95": 50.783999264240265, - "p99": 56.44800141453743 + "p50": 137.56799697875977, + "p90": 140.35199582576752, + "p95": 142.2400027513504, + "p99": 159.58400070667267 }, "combine": { - "p50": 38.2080003619194, - "p90": 39.8080013692379, - "p95": 44.89599913358688, - "p99": 46.23999819159508 + "p50": 154.7520011663437, + "p90": 163.93600404262543, + "p95": 164.32000696659088, + "p99": 166.4000004529953 }, "roundtrip": { - "p50": 61.08799949288368, - "p90": 68.54400038719177, - "p95": 69.023996591568, - "p99": 70.01599669456482 + "p50": 272.2559869289398, + "p90": 278.01600098609924, + "p95": 280.64000606536865, + "p99": 293.66400837898254 }, "isolatedSum": { - "p50": 87.3280018568039, - "p90": 90.17600119113922, - "p95": 95.67999839782715, - "p99": 102.68799960613251 + "p50": 292.31999814510345, + "p90": 304.28799986839294, + "p95": 306.5600097179413, + "p99": 325.98400115966797 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 5, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 49.536000937223434, - "p90": 50.783999264240265, - "p95": 52.73599922657013, - "p99": 58.079998940229416 + "p50": 193.4719979763031, + "p90": 199.23199713230133, + "p95": 200.54399967193604, + "p99": 207.48800039291382 }, "combine": { - "p50": 45.24800181388855, - "p90": 46.431999653577805, - "p95": 46.68800160288811, - "p99": 48.48000034689903 + "p50": 265.79201221466064, + "p90": 274.52799677848816, + "p95": 274.9119997024536, + "p99": 285.8879864215851 }, "roundtrip": { - "p50": 68.67200136184692, - "p90": 70.30399888753891, - "p95": 75.42400062084198, - "p99": 77.504001557827 + "p50": 444.2239999771118, + "p90": 450.5600035190582, + "p95": 459.3920111656189, + "p99": 474.016010761261 }, "isolatedSum": { - "p50": 94.78400275111198, - "p90": 97.21599891781807, - "p95": 99.42400082945824, - "p99": 106.55999928712845 + "p50": 459.26401019096375, + "p90": 473.7599939107895, + "p95": 475.45599937438965, + "p99": 493.3759868144989 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 7, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 50.52800104022026, - "p90": 57.5999990105629, - "p95": 58.079998940229416, - "p99": 58.97599831223488 + "p50": 325.0240087509155, + "p90": 329.8240005970001, + "p95": 331.0079872608185, + "p99": 350.14399886131287 }, "combine": { - "p50": 46.592000871896744, - "p90": 53.568001836538315, - "p95": 54.207999259233475, - "p99": 55.10399863123894 + "p50": 457.7920138835907, + "p90": 459.4239890575409, + "p95": 461.95200085639954, + "p99": 473.66398572921753 }, "roundtrip": { - "p50": 77.56800204515457, - "p90": 85.34400165081024, - "p95": 85.79199761152267, - "p99": 86.496002972126 + "p50": 760.479986667633, + "p90": 767.7119970321655, + "p95": 772.2240090370178, + "p99": 781.9520235061646 }, "isolatedSum": { - "p50": 97.120001912117, - "p90": 111.16800084710121, - "p95": 112.28799819946289, - "p99": 114.07999694347382 + "p50": 782.8160226345062, + "p90": 789.247989654541, + "p95": 792.959988117218, + "p99": 823.8079845905304 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 5, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 66.01600348949432, - "p90": 107.35999792814255, - "p95": 108.06400328874588, - "p99": 109.40799862146378 + "p50": 574.9120116233826, + "p90": 582.2719931602478, + "p95": 583.9999914169312, + "p99": 640.3840184211731 }, "combine": { - "p50": 62.52799928188324, - "p90": 63.93600255250931, - "p95": 65.85600227117538, - "p99": 79.29600030183792 + "p50": 830.016016960144, + "p90": 839.9360179901123, + "p95": 840.287983417511, + "p99": 852.4479866027832 }, "roundtrip": { - "p50": 102.39999741315842, - "p90": 110.1439967751503, - "p95": 110.68800091743469, - "p99": 112.89600282907486 + "p50": 1387.8079652786255, + "p90": 1396.7679738998413, + "p95": 1398.9759683609009, + "p99": 1455.1680088043213 }, "isolatedSum": { - "p50": 128.54400277137756, - "p90": 171.29600048065186, - "p95": 173.92000555992126, - "p99": 188.7039989233017 + "p50": 1404.9280285835266, + "p90": 1422.20801115036, + "p95": 1424.2879748344421, + "p99": 1492.8320050239563 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 3, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 87.2960016131401, - "p90": 90.91199934482574, - "p95": 94.08000111579895, - "p99": 95.51999717950821 + "p50": 1065.2480125427246, + "p90": 1072.6079940795898, + "p95": 1079.2319774627686, + "p99": 1102.720022201538 }, "combine": { - "p50": 88.86399865150452, - "p90": 95.64799815416336, - "p95": 96.3520035147667, - "p99": 97.43999689817429 + "p50": 1539.5840406417847, + "p90": 1542.464017868042, + "p95": 1552.2559881210327, + "p99": 1614.7840023040771 }, "roundtrip": { - "p50": 153.21600437164307, - "p90": 159.90400314331055, - "p95": 160.67199409008026, - "p99": 161.95200383663177 + "p50": 2586.3358974456787, + "p90": 2598.720073699951, + "p95": 2605.4399013519287, + "p99": 2665.247917175293 }, "isolatedSum": { - "p50": 176.16000026464462, - "p90": 186.5599974989891, - "p95": 190.43200463056564, - "p99": 192.9599940776825 + "p50": 2604.8320531845093, + "p90": 2615.072011947632, + "p95": 2631.4879655838013, + "p99": 2717.5040245056152 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 5, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -24074,46 +25107,47 @@ ] }, { - "id": "cx-1bb82fc0", - "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h100_97196257", - "comparisonKey": "efcc4c7d487df84c", + "id": "cx-fb4f7eef", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_f1ea991b", + "comparisonKey": "c5288b3181a71a36", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:08.338542+00:00", + "generatedAt": "2026-06-27T09:48:56.789691+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "runner": "b300-nv_13", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "B300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -24126,313 +25160,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271676478", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271676478", - "createdAt": "2026-06-26T23:50:12Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285627928", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285627928", + "createdAt": "2026-06-27T09:48:56.789691+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 184.7359985113144, - "p90": 193.08799505233765, - "p95": 196.86399400234222, - "p99": 204.25599813461304 - }, - "combine": { - "p50": 49.79199916124344, - "p90": 51.96800082921982, - "p95": 53.79199981689453, - "p99": 56.86400085687637 - }, - "roundtrip": { - "p50": 218.9760059118271, - "p90": 226.52800381183624, - "p95": 230.0799936056137, - "p99": 235.6480062007904 - }, - "isolatedSum": { - "p50": 234.52799767255783, - "p90": 245.05599588155746, - "p95": 250.65599381923676, - "p99": 261.1199989914894 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 183.87199938297272, - "p90": 192.19200313091278, - "p95": 195.16800343990326, - "p99": 201.56799256801605 - }, - "combine": { - "p50": 50.87999999523163, - "p90": 54.17599901556969, - "p95": 55.67999929189682, - "p99": 59.328000992536545 - }, - "roundtrip": { - "p50": 220.12799978256226, - "p90": 227.87199914455414, - "p95": 230.43200373649597, - "p99": 237.31200397014618 - }, - "isolatedSum": { - "p50": 234.75199937820435, - "p90": 246.36800214648247, - "p95": 250.84800273180008, - "p99": 260.8959935605526 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 187.77599930763245, - "p90": 268.0320143699646, - "p95": 271.36000990867615, - "p99": 282.49600529670715 + "p50": 94.43199634552002, + "p90": 96.73599898815155, + "p95": 99.0080013871193, + "p99": 112.5119999051094 }, "combine": { - "p50": 52.44800075888634, - "p90": 63.90400230884552, - "p95": 64.86400216817856, - "p99": 69.76000219583511 + "p50": 115.35999923944473, + "p90": 116.22399836778641, + "p95": 117.37599968910217, + "p99": 128.4160017967224 }, "roundtrip": { - "p50": 225.3440022468567, - "p90": 308.9280128479004, - "p95": 312.48000264167786, - "p99": 320.5440044403076 + "p50": 195.71200013160706, + "p90": 200.51200687885284, + "p95": 201.31200551986694, + "p99": 211.61599457263947 }, "isolatedSum": { - "p50": 240.22400006651878, - "p90": 331.9360166788101, - "p95": 336.2240120768547, - "p99": 352.25600749254227 + "p50": 209.79199558496475, + "p90": 212.95999735593796, + "p95": 216.38400107622147, + "p99": 240.92800170183182 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 7, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 184.03199315071106, - "p90": 193.31200420856476, - "p95": 197.79199361801147, - "p99": 205.9839963912964 + "p50": 137.40800321102142, + "p90": 139.90400731563568, + "p95": 140.83200693130493, + "p99": 150.176003575325 }, "combine": { - "p50": 51.7439991235733, - "p90": 55.296000093221664, - "p95": 57.312000542879105, - "p99": 63.19999694824219 + "p50": 153.85599434375763, + "p90": 163.2319986820221, + "p95": 163.7440025806427, + "p99": 175.6799966096878 }, "roundtrip": { - "p50": 220.8320051431656, - "p90": 228.7680059671402, - "p95": 231.455996632576, - "p99": 239.55200612545013 + "p50": 272.09600806236267, + "p90": 277.15200185775757, + "p95": 278.4639894962311, + "p99": 286.46400570869446 }, "isolatedSum": { - "p50": 235.77599227428436, - "p90": 248.60800430178642, - "p95": 255.10399416089058, - "p99": 269.1839933395386 + "p50": 291.26399755477905, + "p90": 303.1360059976578, + "p95": 304.57600951194763, + "p99": 325.8560001850128 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 7, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 187.96800076961517, - "p90": 273.24798703193665, - "p95": 286.6879999637604, - "p99": 400.06399154663086 + "p50": 193.53599846363068, + "p90": 200.8959949016571, + "p95": 202.2079974412918, + "p99": 214.84799683094025 }, "combine": { - "p50": 53.75999957323074, - "p90": 65.15199691057205, - "p95": 67.45599955320358, - "p99": 75.23199915885925 + "p50": 265.1839852333069, + "p90": 274.1760015487671, + "p95": 274.78399872779846, + "p99": 279.04000878334045 }, "roundtrip": { - "p50": 225.600004196167, - "p90": 310.8479976654053, - "p95": 322.6880133152008, - "p99": 449.7919976711273 + "p50": 440.8000111579895, + "p90": 447.7440118789673, + "p95": 449.15199279785156, + "p99": 459.03998613357544 }, "isolatedSum": { - "p50": 241.72800034284592, - "p90": 338.3999839425087, - "p95": 354.14399951696396, - "p99": 475.2959907054901 + "p50": 458.71998369693756, + "p90": 475.0719964504242, + "p95": 476.99199616909027, + "p99": 493.8880056142807 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 5, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 189.11999464035034, - "p90": 271.36000990867615, - "p95": 286.9440019130707, - "p99": 324.0959942340851 + "p50": 325.6959915161133, + "p90": 330.3680121898651, + "p95": 331.84000849723816, + "p99": 342.72000193595886 }, "combine": { - "p50": 56.44800141453743, - "p90": 68.57600063085556, - "p95": 69.11999732255936, - "p99": 73.56800138950348 + "p50": 450.3360092639923, + "p90": 459.3920111656189, + "p95": 460.4479968547821, + "p99": 472.6080000400543 }, "roundtrip": { - "p50": 226.27200186252594, - "p90": 234.14400219917297, - "p95": 238.68800699710846, - "p99": 254.27201390266418 + "p50": 759.4239711761475, + "p90": 766.2720084190369, + "p95": 770.3679800033569, + "p99": 786.6560220718384 }, "isolatedSum": { - "p50": 245.56799605488777, - "p90": 339.9360105395317, - "p95": 356.06399923563004, - "p99": 397.66399562358856 + "p50": 776.0320007801056, + "p90": 789.760023355484, + "p95": 792.2880053520203, + "p99": 815.3280019760132 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 7, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 189.34400379657745, - "p90": 270.08000016212463, - "p95": 275.2639949321747, - "p99": 289.98398780822754 + "p50": 574.4640231132507, + "p90": 581.9839835166931, + "p95": 583.9359760284424, + "p99": 595.9039926528931 }, "combine": { - "p50": 64.60800021886826, - "p90": 76.89599692821503, - "p95": 78.23999971151352, - "p99": 82.2720006108284 + "p50": 828.9600014686584, + "p90": 839.9680256843567, + "p95": 840.4160141944885, + "p99": 851.9359827041626 }, "roundtrip": { - "p50": 238.3359968662262, - "p90": 318.015992641449, - "p95": 321.4719891548157, - "p99": 329.72800731658936 + "p50": 1387.0079517364502, + "p90": 1396.83198928833, + "p95": 1399.5200395584106, + "p99": 1415.1999950408936 }, "isolatedSum": { - "p50": 253.9520040154457, - "p90": 346.97599709033966, - "p95": 353.5039946436882, - "p99": 372.25598841905594 + "p50": 1403.4240245819092, + "p90": 1421.9520092010498, + "p95": 1424.351990222931, + "p99": 1447.8399753570557 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 5, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 192.19200313091278, - "p90": 272.15999364852905, - "p95": 275.7120132446289, - "p99": 291.29600524902344 + "p50": 1063.904047012329, + "p90": 1069.216012954712, + "p95": 1072.543978691101, + "p99": 1081.1200141906738 }, "combine": { - "p50": 78.17599922418594, - "p90": 87.93599903583527, - "p95": 89.15200084447861, - "p99": 95.20000219345093 + "p50": 1530.303955078125, + "p90": 1540.4800176620483, + "p95": 1541.9520139694214, + "p99": 1576.799988746643 }, "roundtrip": { - "p50": 255.3279995918274, - "p90": 335.6480002403259, - "p95": 343.9359962940216, - "p99": 380.0320029258728 + "p50": 2580.832004547119, + "p90": 2592.2560691833496, + "p95": 2598.8481044769287, + "p99": 2691.8399333953857 }, "isolatedSum": { - "p50": 270.3680023550987, - "p90": 360.0959926843643, - "p95": 364.8640140891075, - "p99": 386.49600744247437 + "p50": 2594.208002090454, + "p90": 2609.6960306167603, + "p95": 2614.4959926605225, + "p99": 2657.920002937317 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -24440,50 +25400,51 @@ ] }, { - "id": "cx-c961a187", - "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h100_97196257", - "comparisonKey": "994b6e44326c8d14", + "id": "cx-e7727ce9", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_c1ad910f", + "comparisonKey": "9532205a80f3d757", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:36.382828+00:00", + "generatedAt": "2026-06-26T17:38:48.516779+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "B300 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -24492,8 +25453,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -24501,304 +25462,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271691858", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271691858", - "createdAt": "2026-06-26T23:50:38Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:38:48.516779+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 196.03200256824493, - "p90": 203.48800718784332, - "p95": 207.32800662517548, - "p99": 214.9759978055954 + "p50": 94.11200135946274, + "p90": 98.9760011434555, + "p95": 100.54399818181992, + "p99": 116.44800007343292 }, "combine": { - "p50": 53.727999329566956, - "p90": 55.48800155520439, - "p95": 57.760000228881836, - "p99": 60.80000102519989 + "p50": 115.1999980211258, + "p90": 115.9679964184761, + "p95": 116.89600348472595, + "p99": 129.02399897575378 }, "roundtrip": { - "p50": 231.26399517059326, - "p90": 238.91200125217438, - "p95": 242.36799776554108, - "p99": 250.0160038471222 + "p50": 193.2159960269928, + "p90": 198.43199849128723, + "p95": 199.8080015182495, + "p99": 217.50399470329285 }, "isolatedSum": { - "p50": 249.7600018978119, - "p90": 258.9760087430477, - "p95": 265.0880068540573, - "p99": 275.7759988307953 + "p50": 209.31199938058853, + "p90": 214.9439975619316, + "p95": 217.44000166654587, + "p99": 245.4719990491867 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 215040, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 2, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 195.80799341201782, - "p90": 202.78400182724, - "p95": 205.1199972629547, - "p99": 212.12799847126007 + "p50": 135.42400300502777, + "p90": 138.75199854373932, + "p95": 141.184002161026, + "p99": 151.0079950094223 }, "combine": { - "p50": 55.93600124120712, - "p90": 57.53599852323532, - "p95": 59.93599817156792, - "p99": 62.880001962184906 + "p50": 154.59200739860535, + "p90": 163.90399634838104, + "p95": 164.5440012216568, + "p99": 176.54399573802948 }, "roundtrip": { - "p50": 233.60000550746918, - "p90": 240.9600019454956, - "p95": 243.13600361347198, - "p99": 255.10400533676147 + "p50": 271.67999744415283, + "p90": 277.6319980621338, + "p95": 280.70399165153503, + "p99": 291.3599908351898 }, "isolatedSum": { - "p50": 251.74399465322495, - "p90": 260.3200003504753, - "p95": 265.05599543452263, - "p99": 275.008000433445 + "p50": 290.0160104036331, + "p90": 302.65599489212036, + "p95": 305.7280033826828, + "p99": 327.5519907474518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 440320, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 2, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 200.15999674797058, - "p90": 287.48801350593567, - "p95": 290.2719974517822, - "p99": 298.17599058151245 + "p50": 193.24800372123718, + "p90": 199.61600005626678, + "p95": 200.80000162124634, + "p99": 206.68800175189972 }, "combine": { - "p50": 57.11999908089638, - "p90": 68.67200136184692, - "p95": 69.56800073385239, - "p99": 75.3600001335144 + "p50": 265.8880054950714, + "p90": 274.59201216697693, + "p95": 275.2000093460083, + "p99": 286.78399324417114 }, "roundtrip": { - "p50": 238.01599442958832, - "p90": 328.5120129585266, - "p95": 332.73598551750183, - "p99": 340.1600122451782 + "p50": 442.59199500083923, + "p90": 448.96000623703003, + "p95": 455.00800013542175, + "p99": 461.40798926353455 }, "isolatedSum": { - "p50": 257.27999582886696, - "p90": 356.1600148677826, - "p95": 359.8399981856346, - "p99": 373.53599071502686 + "p50": 459.1360092163086, + "p90": 474.2080122232437, + "p95": 476.00001096725464, + "p99": 493.47199499607086 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 870400, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 2, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 199.072003364563, - "p90": 282.1120023727417, - "p95": 285.8240008354187, - "p99": 292.7359938621521 + "p50": 326.2079954147339, + "p90": 329.75998520851135, + "p95": 331.6799998283386, + "p99": 341.6000008583069 }, "combine": { - "p50": 57.5999990105629, - "p90": 66.14399701356888, - "p95": 66.72000139951706, - "p99": 71.48800045251846 - }, - "roundtrip": { - "p50": 236.32000386714935, - "p90": 315.3280019760132, - "p95": 318.91199946403503, - "p99": 326.2079954147339 - }, - "isolatedSum": { - "p50": 256.6720023751259, - "p90": 348.2559993863106, - "p95": 352.54400223493576, - "p99": 364.22399431467056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1735680, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 199.71199333667755, - "p90": 288.86398673057556, - "p95": 291.23198986053467, - "p99": 296.4160144329071 - }, - "combine": { - "p50": 58.62399935722351, - "p90": 70.14399766921997, - "p95": 71.03999704122543, - "p99": 74.11199808120728 - }, - "roundtrip": { - "p50": 239.19999599456787, - "p90": 329.75998520851135, - "p95": 332.5439989566803, - "p99": 338.3359909057617 - }, - "isolatedSum": { - "p50": 258.33599269390106, - "p90": 359.00798439979553, - "p95": 362.2719869017601, - "p99": 370.5280125141144 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3456000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 200.3519982099533, - "p90": 288.2559895515442, - "p95": 290.49599170684814, - "p99": 295.1360046863556 - }, - "combine": { - "p50": 63.040003180503845, - "p90": 73.44000041484833, - "p95": 73.95199686288834, - "p99": 79.45600152015686 + "p50": 457.66401290893555, + "p90": 459.77601408958435, + "p95": 469.760000705719, + "p99": 473.7600088119507 }, "roundtrip": { - "p50": 244.25600469112396, - "p90": 330.7200074195862, - "p95": 333.24798941612244, - "p99": 339.35999870300293 + "p50": 762.5920176506042, + "p90": 771.7440128326416, + "p95": 774.2080092430115, + "p99": 789.6320223808289 }, "isolatedSum": { - "p50": 263.39200139045715, - "p90": 361.6959899663925, - "p95": 364.4479885697365, - "p99": 374.59200620651245 + "p50": 783.8720083236694, + "p90": 789.5359992980957, + "p95": 801.4400005340576, + "p99": 815.3600096702576 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6988800, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 2, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 199.5519995689392, - "p90": 287.55199909210205, - "p95": 291.6480004787445, - "p99": 305.5360019207001 + "p50": 577.1200060844421, + "p90": 582.5920104980469, + "p95": 583.5520029067993, + "p99": 591.2960171699524 }, "combine": { - "p50": 73.34399968385696, - "p90": 85.02399921417236, - "p95": 86.5280032157898, - "p99": 89.72799777984619 + "p50": 817.2799944877625, + "p90": 828.4159898757935, + "p95": 831.8719863891602, + "p99": 913.4079813957214 }, "roundtrip": { - "p50": 254.72000241279602, - "p90": 339.83999490737915, - "p95": 342.97600388526917, - "p99": 349.5680093765259 + "p50": 1376.9279718399048, + "p90": 1386.9119882583618, + "p95": 1392.7680253982544, + "p99": 1453.8240432739258 }, "isolatedSum": { - "p50": 272.8959992527962, - "p90": 372.5759983062744, - "p95": 378.1760036945343, - "p99": 395.26399970054626 + "p50": 1394.4000005722046, + "p90": 1411.0080003738403, + "p95": 1415.4239892959595, + "p99": 1504.7039985656738 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13987840, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 2, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 206.33600652217865, - "p90": 288.32000494003296, - "p95": 292.4480140209198, - "p99": 296.671986579895 + "p50": 1069.5040225982666, + "p90": 1078.0160427093506, + "p95": 1080.2559852600098, + "p99": 1090.880036354065 }, "combine": { - "p50": 86.87999844551086, - "p90": 100.19200295209885, - "p95": 104.63999956846237, - "p99": 326.24000310897827 + "p50": 1528.8959741592407, + "p90": 1540.4479503631592, + "p95": 1542.688012123108, + "p99": 1554.751992225647 }, "roundtrip": { - "p50": 274.944007396698, - "p90": 355.0719916820526, - "p95": 358.8480055332184, - "p99": 364.8959994316101 + "p50": 2581.9520950317383, + "p90": 2594.6240425109863, + "p95": 2602.303981781006, + "p99": 2637.9199028015137 }, "isolatedSum": { - "p50": 293.2160049676895, - "p90": 388.5120078921318, - "p95": 397.0880135893822, - "p99": 622.9119896888733 + "p50": 2598.3999967575073, + "p90": 2618.4639930725098, + "p95": 2622.9439973831177, + "p99": 2645.632028579712 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 2, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -24806,50 +25693,51 @@ ] }, { - "id": "cx-55a4c230", - "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_97196257", - "comparisonKey": "8ab5124e24ec36ab", + "id": "cx-5fd5a06c", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", + "colorKey": "b300_0622d929", + "comparisonKey": "8c83b99af9d27709", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:02.860609+00:00", + "generatedAt": "2026-06-26T18:11:00.153293+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", + "runner": "b300-nv_10", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -24858,8 +25746,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -24867,304 +25755,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271706435", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271706435", - "createdAt": "2026-06-26T23:51:06Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254508907", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", + "createdAt": "2026-06-26T18:11:00.153293+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 196.8960016965866, - "p90": 227.77600586414337, - "p95": 297.40801453590393, - "p99": 503.32802534103394 + "p50": 105.79200088977814, + "p90": 108.83200168609619, + "p95": 111.00800335407257, + "p99": 118.9119964838028 }, "combine": { - "p50": 57.920001447200775, - "p90": 62.144000083208084, - "p95": 67.10399687290192, - "p99": 282.0799946784973 + "p50": 130.0159990787506, + "p90": 139.20000195503235, + "p95": 139.74399864673615, + "p99": 150.84800124168396 }, "roundtrip": { - "p50": 237.40799725055695, - "p90": 243.77599358558655, - "p95": 245.31200528144836, - "p99": 250.0160038471222 + "p50": 228.38400304317474, + "p90": 234.65600609779358, + "p95": 235.61599850654602, + "p99": 252.28801369667053 }, "isolatedSum": { - "p50": 254.81600314378738, - "p90": 289.92000594735146, - "p95": 364.51201140880585, - "p99": 785.4080200195312 + "p50": 235.80799996852875, + "p90": 248.03200364112854, + "p95": 250.75200200080872, + "p99": 269.75999772548676 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 197.1839964389801, - "p90": 204.92799580097198, - "p95": 207.45599269866943, - "p99": 214.6880030632019 + "p50": 159.36000645160675, + "p90": 162.56000101566315, + "p95": 163.90399634838104, + "p99": 170.59199512004852 }, "combine": { - "p50": 58.49599838256836, - "p90": 60.92799827456474, - "p95": 63.26399743556976, - "p99": 70.65600156784058 + "p50": 201.34399831295013, + "p90": 203.96800339221954, + "p95": 211.45600080490112, + "p99": 224.86400604248047 }, "roundtrip": { - "p50": 237.56800591945648, - "p90": 243.96799504756927, - "p95": 247.29600548744202, - "p99": 255.61600923538208 + "p50": 334.879994392395, + "p90": 340.03201127052307, + "p95": 342.0479893684387, + "p99": 360.28799414634705 }, "isolatedSum": { - "p50": 255.67999482154846, - "p90": 265.8559940755367, - "p95": 270.7199901342392, - "p99": 285.3440046310425 + "p50": 360.7040047645569, + "p90": 366.5280044078827, + "p95": 375.35999715328217, + "p99": 395.456001162529 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 200.47999918460846, - "p90": 282.71999955177307, - "p95": 291.20001196861267, - "p99": 401.2480080127716 + "p50": 232.67200589179993, + "p90": 240.76800048351288, + "p95": 244.60799992084503, + "p99": 252.22399830818176 }, "combine": { - "p50": 59.90400165319443, - "p90": 66.84800237417221, - "p95": 69.5360004901886, - "p99": 75.68000257015228 + "p50": 338.01600337028503, + "p90": 347.8719890117645, + "p95": 348.7040102481842, + "p99": 361.407995223999 }, "roundtrip": { - "p50": 243.20000410079956, - "p90": 321.9839930534363, - "p95": 326.7199993133545, - "p99": 334.75199341773987 + "p50": 553.9519786834717, + "p90": 560.2239966392517, + "p95": 564.3839836120605, + "p99": 589.8879766464233 }, "isolatedSum": { - "p50": 260.3840008378029, - "p90": 349.5680019259453, - "p95": 360.73601245880127, - "p99": 476.9280105829239 + "p50": 570.688009262085, + "p90": 588.6399894952774, + "p95": 593.3120101690292, + "p99": 613.6319935321808 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 200.6399929523468, - "p90": 261.9200050830841, - "p95": 265.6959891319275, - "p99": 275.1680016517639 + "p50": 409.5360040664673, + "p90": 415.0719940662384, + "p95": 416.76801443099976, + "p99": 433.50398540496826 }, "combine": { - "p50": 60.99199876189232, - "p90": 69.2799985408783, - "p95": 69.88800317049026, - "p99": 75.32799988985062 + "p50": 594.3359732627869, + "p90": 599.7120141983032, + "p95": 606.2399744987488, + "p99": 619.2640066146851 }, "roundtrip": { - "p50": 239.9040013551712, - "p90": 296.9599962234497, - "p95": 299.8400032520294, - "p99": 307.5200021266937 + "p50": 986.1119985580444, + "p90": 993.5680031776428, + "p95": 998.8160133361816, + "p99": 1015.8400535583496 }, "isolatedSum": { - "p50": 261.6319917142391, - "p90": 331.2000036239624, - "p95": 335.58399230241776, - "p99": 350.49600154161453 + "p50": 1003.8719773292542, + "p90": 1014.7840082645416, + "p95": 1023.0079889297485, + "p99": 1052.7679920196533 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 1, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 201.75999402999878, - "p90": 280.3199887275696, - "p95": 284.89598631858826, - "p99": 351.48799419403076 + "p50": 756.384015083313, + "p90": 767.3280239105225, + "p95": 769.6639895439148, + "p99": 787.7439856529236 }, "combine": { - "p50": 61.76000088453293, - "p90": 69.72800195217133, - "p95": 72.92799651622772, - "p99": 133.82400572299957 + "p50": 1112.671971321106, + "p90": 1122.8480339050293, + "p95": 1133.6640119552612, + "p99": 1208.4800004959106 }, "roundtrip": { - "p50": 245.82399427890778, - "p90": 325.53601264953613, - "p95": 328.8959860801697, - "p99": 600.3199815750122 + "p50": 1856.0960292816162, + "p90": 1870.6879615783691, + "p95": 1877.087950706482, + "p99": 1941.5040016174316 }, "isolatedSum": { - "p50": 263.5199949145317, - "p90": 350.0479906797409, - "p95": 357.823982834816, - "p99": 485.31199991703033 + "p50": 1869.055986404419, + "p90": 1890.1760578155518, + "p95": 1903.328001499176, + "p99": 1996.2239861488342 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 1, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 200.73600113391876, - "p90": 285.0559949874878, - "p95": 287.9680097103119, - "p99": 303.42400074005127 + "p50": 1458.8799476623535, + "p90": 1475.0720262527466, + "p95": 1481.4079999923706, + "p99": 1536.8640422821045 }, "combine": { - "p50": 66.78400188684464, - "p90": 78.20799946784973, - "p95": 79.93599772453308, - "p99": 83.8719978928566 + "p50": 2142.047882080078, + "p90": 2154.560089111328, + "p95": 2158.9438915252686, + "p99": 2215.9039974212646 }, "roundtrip": { - "p50": 249.9839961528778, - "p90": 319.487988948822, - "p95": 328.8959860801697, - "p99": 336.35199069976807 + "p50": 3584.160089492798, + "p90": 3605.760097503662, + "p95": 3613.152027130127, + "p99": 3669.503927230835 }, "isolatedSum": { - "p50": 267.5200030207634, - "p90": 363.2639944553375, - "p95": 367.90400743484497, - "p99": 387.29599863290787 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 200.73600113391876, - "p90": 281.2480032444, - "p95": 289.11998867988586, - "p99": 304.9919903278351 - }, - "combine": { - "p50": 77.11999863386154, - "p90": 84.1279998421669, - "p95": 86.40000224113464, - "p99": 95.77599912881851 - }, - "roundtrip": { - "p50": 259.5840096473694, - "p90": 337.8559947013855, - "p95": 341.3439989089966, - "p99": 350.5280017852783 - }, - "isolatedSum": { - "p50": 277.8559997677803, - "p90": 365.3760030865669, - "p95": 375.5199909210205, - "p99": 400.7679894566536 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 212.5760018825531, - "p90": 282.1759879589081, - "p95": 286.5920066833496, - "p99": 307.96799063682556 - }, - "combine": { - "p50": 92.06400066614151, - "p90": 98.11200201511383, - "p95": 99.48799759149551, - "p99": 103.74400019645691 - }, - "roundtrip": { - "p50": 289.44000601768494, - "p90": 355.3279936313629, - "p95": 359.71200466156006, - "p99": 366.91200733184814 - }, - "isolatedSum": { - "p50": 304.6400025486946, - "p90": 380.2879899740219, - "p95": 386.0800042748451, - "p99": 411.71199083328247 + "p50": 3600.9278297424316, + "p90": 3629.6321153640747, + "p95": 3640.351891517639, + "p99": 3752.768039703369 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -25172,50 +25986,51 @@ ] }, { - "id": "cx-416fcf7d", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_a96c99f3", - "comparisonKey": "2a90693171512d11", + "id": "cx-6620cae5", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", + "colorKey": "b300_01ab5b1a", + "comparisonKey": "5702bf02b3927f32", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:58.225003+00:00", + "generatedAt": "2026-06-26T23:38:15.541333+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", + "runner": "b300-nv_06", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "B300 EP8 · deepep · bf16 (norm) · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -25224,8 +26039,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -25233,304 +26048,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271567087", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271567087", - "createdAt": "2026-06-26T23:46:45Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28271231753", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", + "createdAt": "2026-06-26T23:38:15.541333+00:00", + "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 98.9760011434555, - "p90": 106.65600001811981, - "p95": 116.06399714946747, - "p99": 156.5759927034378 - }, - "combine": { - "p50": 72.03199714422226, - "p90": 74.65600222349167, - "p95": 77.60000228881836, - "p99": 90.04800021648407 - }, - "roundtrip": { - "p50": 190.8160001039505, - "p90": 195.2960044145584, - "p95": 198.04799556732178, - "p99": 214.08000588417053 - }, - "isolatedSum": { - "p50": 171.00799828767776, - "p90": 181.31200224161148, - "p95": 193.66399943828583, - "p99": 246.62399291992188 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 70.39999961853027, - "p90": 100.3199964761734, - "p95": 102.20800340175629, - "p99": 106.72000050544739 + "p50": 101.3759970664978, + "p90": 104.76800054311752, + "p95": 106.01600259542465, + "p99": 111.90400272607803 }, "combine": { - "p50": 63.07200342416763, - "p90": 74.87999647855759, - "p95": 75.55200159549713, - "p99": 79.13599908351898 + "p50": 126.11199915409088, + "p90": 127.3919939994812, + "p95": 127.83999741077423, + "p99": 129.18399274349213 }, "roundtrip": { - "p50": 151.296004652977, - "p90": 195.5839991569519, - "p95": 197.79199361801147, - "p99": 202.17600464820862 + "p50": 207.8080028295517, + "p90": 212.6079946756363, + "p95": 213.69600296020508, + "p99": 224.2559939622879 }, "isolatedSum": { - "p50": 133.4720030426979, - "p90": 175.199992954731, - "p95": 177.76000499725342, - "p99": 185.85599958896637 + "p50": 227.48799622058868, + "p90": 232.15999454259872, + "p95": 233.85600000619888, + "p99": 241.08799546957016 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 71.1359977722168, - "p90": 100.73599964380264, - "p95": 103.58399897813797, - "p99": 112.0000034570694 + "p50": 136.31999492645264, + "p90": 139.80799913406372, + "p95": 140.86399972438812, + "p99": 150.43200552463531 }, "combine": { - "p50": 63.58399987220764, - "p90": 75.13599842786789, - "p95": 76.31999999284744, - "p99": 80.1599994301796 + "p50": 176.35199427604675, + "p90": 178.78399789333344, + "p95": 180.03199994564056, + "p99": 188.60800564289093 }, "roundtrip": { - "p50": 152.96000242233276, - "p90": 197.2160041332245, - "p95": 200.28799772262573, - "p99": 206.7199945449829 + "p50": 297.5679934024811, + "p90": 303.45600843429565, + "p95": 306.46398663520813, + "p99": 319.2960023880005 }, "isolatedSum": { - "p50": 134.71999764442444, - "p90": 175.87199807167053, - "p95": 179.9039989709854, - "p99": 192.160002887249 + "p50": 312.6719892024994, + "p90": 318.59199702739716, + "p95": 320.8959996700287, + "p99": 339.04001116752625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 99.39199686050415, - "p90": 103.61599922180176, - "p95": 106.46399855613708, - "p99": 123.3920007944107 + "p50": 203.90400290489197, + "p90": 211.58400177955627, + "p95": 212.51200139522552, + "p99": 223.32799434661865 }, "combine": { - "p50": 74.14399832487106, - "p90": 75.9039968252182, - "p95": 78.59200239181519, - "p99": 99.48799759149551 + "p50": 325.1839876174927, + "p90": 335.55200695991516, + "p95": 335.80800890922546, + "p99": 337.8559947013855 }, "roundtrip": { - "p50": 194.68800723552704, - "p90": 199.48799908161163, - "p95": 203.0079960823059, - "p99": 231.80800676345825 + "p50": 506.20800256729126, + "p90": 514.4960284233093, + "p95": 519.7759866714478, + "p99": 534.0160131454468 }, "isolatedSum": { - "p50": 173.5359951853752, - "p90": 179.51999604701996, - "p95": 185.05600094795227, - "p99": 222.87999838590622 + "p50": 529.0879905223846, + "p90": 547.1360087394714, + "p95": 548.320010304451, + "p99": 561.1839890480042 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 98.39999675750732, - "p90": 103.16800326108932, - "p95": 105.31199723482132, - "p99": 109.98400300741196 - }, - "combine": { - "p50": 76.64000242948532, - "p90": 78.23999971151352, - "p95": 79.45600152015686, - "p99": 81.88799768686295 - }, - "roundtrip": { - "p50": 195.96800208091736, - "p90": 201.6959935426712, - "p95": 204.57600057125092, - "p99": 208.3519995212555 - }, - "isolatedSum": { - "p50": 175.03999918699265, - "p90": 181.40800297260284, - "p95": 184.76799875497818, - "p99": 191.8720006942749 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 72.57600128650665, - "p90": 100.63999891281128, - "p95": 102.62399911880493, - "p99": 107.2319969534874 + "p50": 348.86398911476135, + "p90": 353.40800881385803, + "p95": 354.7520041465759, + "p99": 364.22398686408997 }, "combine": { - "p50": 71.42399996519089, - "p90": 83.0719992518425, - "p95": 84.35200154781342, - "p99": 88.73599767684937 + "p50": 582.4000239372253, + "p90": 585.9519839286804, + "p95": 593.0879712104797, + "p99": 594.5919752120972 }, "roundtrip": { - "p50": 160.16000509262085, - "p90": 205.05599677562714, - "p95": 208.3519995212555, - "p99": 231.1680018901825 + "p50": 909.5680117607117, + "p90": 917.2160029411316, + "p95": 918.5600280761719, + "p99": 924.127995967865 }, "isolatedSum": { - "p50": 144.00000125169754, - "p90": 183.71199816465378, - "p95": 186.97600066661835, - "p99": 195.96799463033676 + "p50": 931.2640130519867, + "p90": 939.3599927425385, + "p95": 947.8399753570557, + "p99": 958.8159620761871 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 81.28000050783157, - "p90": 103.13600301742554, - "p95": 105.6319996714592, - "p99": 113.92000317573547 + "p50": 641.8560147285461, + "p90": 648.639976978302, + "p95": 655.135989189148, + "p99": 660.256028175354 }, "combine": { - "p50": 81.11999928951263, - "p90": 92.73599833250046, - "p95": 93.6959981918335, - "p99": 98.91200065612793 + "p50": 1062.7520084381104, + "p90": 1072.7039575576782, + "p95": 1073.4080076217651, + "p99": 1076.5119791030884 }, "roundtrip": { - "p50": 172.5119948387146, - "p90": 216.60800278186798, - "p95": 219.87199783325195, - "p99": 227.48799622058868 + "p50": 1693.343997001648, + "p90": 1700.6080150604248, + "p95": 1702.847957611084, + "p99": 1706.6559791564941 }, "isolatedSum": { - "p50": 162.3999997973442, - "p90": 195.872001349926, - "p95": 199.3279978632927, - "p99": 212.8320038318634 + "p50": 1704.6080231666565, + "p90": 1721.3439345359802, + "p95": 1728.543996810913, + "p99": 1736.7680072784424 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 88.86399865150452, - "p90": 108.96000266075134, - "p95": 111.23199760913849, - "p99": 116.28799885511398 + "p50": 1252.1920204162598, + "p90": 1262.719988822937, + "p95": 1264.7360563278198, + "p99": 1276.8640518188477 }, "combine": { - "p50": 98.14400225877762, - "p90": 108.96000266075134, - "p95": 110.49599945545197, - "p99": 114.68800157308578 + "p50": 2043.4560775756836, + "p90": 2045.151948928833, + "p95": 2047.1999645233154, + "p99": 2067.392110824585 }, "roundtrip": { - "p50": 216.67200326919556, - "p90": 236.00000143051147, - "p95": 238.49600553512573, - "p99": 242.46400594711304 + "p50": 3284.6720218658447, + "p90": 3295.1040267944336, + "p95": 3299.0400791168213, + "p99": 3313.3440017700195 }, "isolatedSum": { - "p50": 187.00800091028214, - "p90": 217.92000532150269, - "p95": 221.72799706459045, - "p99": 230.97600042819977 + "p50": 3295.6480979919434, + "p90": 3307.87193775177, + "p95": 3311.9360208511353, + "p99": 3344.2561626434326 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -25538,50 +26279,51 @@ ] }, { - "id": "cx-d4dbb29d", - "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_97196257", - "comparisonKey": "9687217877b9ce9c", + "id": "cx-9b7dbfc5", + "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", + "colorKey": "b300_085c12d4", + "comparisonKey": "afb8d29f702ca3c1", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:10.138934+00:00", + "generatedAt": "2026-06-26T18:21:45.459593+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", + "runner": "b300-nv_16", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -25590,313 +26332,239 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271579958", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271579958", - "createdAt": "2026-06-26T23:47:12Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28255311146", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", + "createdAt": "2026-06-26T18:21:45.459593+00:00", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 193.05600225925446, - "p90": 204.3839991092682, - "p95": 210.52800118923187, - "p99": 277.9200077056885 + "p50": 93.28000247478485, + "p90": 96.16000205278397, + "p95": 98.78399968147278, + "p99": 129.2479932308197 }, "combine": { - "p50": 60.95999851822853, - "p90": 63.29599767923355, - "p95": 65.31199812889099, - "p99": 68.76800209283829 - }, - "roundtrip": { - "p50": 237.63200640678406, - "p90": 244.25600469112396, - "p95": 246.14399671554565, - "p99": 269.4079875946045 - }, - "isolatedSum": { - "p50": 254.016000777483, - "p90": 267.67999678850174, - "p95": 275.83999931812286, - "p99": 346.68800979852676 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 192.9280012845993, - "p90": 200.6720006465912, - "p95": 204.79999482631683, - "p99": 264.5759880542755 - }, - "combine": { - "p50": 62.272001057863235, - "p90": 64.7680014371872, - "p95": 67.391999065876, - "p99": 73.08799773454666 - }, - "roundtrip": { - "p50": 235.6480062007904, - "p90": 243.0720031261444, - "p95": 245.60000002384186, - "p99": 259.71201062202454 - }, - "isolatedSum": { - "p50": 255.20000234246254, - "p90": 265.4400020837784, - "p95": 272.19199389219284, - "p99": 337.6639857888222 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 197.24799692630768, - "p90": 286.080002784729, - "p95": 290.71998596191406, - "p99": 302.2400140762329 - }, - "combine": { - "p50": 63.32799792289734, - "p90": 71.32799923419952, - "p95": 75.45600086450577, - "p99": 82.62400329113007 + "p50": 114.94400352239609, + "p90": 115.55200070142746, + "p95": 115.93600362539291, + "p99": 126.3359934091568 }, "roundtrip": { - "p50": 242.94400215148926, - "p90": 349.40800070762634, - "p95": 354.4960021972656, - "p99": 367.13600158691406 + "p50": 195.6160068511963, + "p90": 199.42399859428406, + "p95": 200.83199441432953, + "p99": 215.16799926757812 }, "isolatedSum": { - "p50": 260.575994849205, - "p90": 357.4080020189285, - "p95": 366.17598682641983, - "p99": 384.864017367363 + "p50": 208.22400599718094, + "p90": 211.71200275421143, + "p95": 214.7200033068657, + "p99": 255.5839866399765 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 2, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 196.383997797966, - "p90": 251.583993434906, - "p95": 254.8159956932068, - "p99": 268.15998554229736 + "p50": 136.25599443912506, + "p90": 139.00800049304962, + "p95": 141.50400459766388, + "p99": 155.03999590873718 }, "combine": { - "p50": 63.87200206518173, - "p90": 72.73600250482559, - "p95": 73.5040009021759, - "p99": 77.95199751853943 + "p50": 153.72799336910248, + "p90": 163.2319986820221, + "p95": 163.80800306797028, + "p99": 167.67999529838562 }, "roundtrip": { - "p50": 242.11199581623077, - "p90": 299.3920147418976, - "p95": 304.1599988937378, - "p99": 410.8160138130188 + "p50": 269.9199914932251, + "p90": 275.64799785614014, + "p95": 276.92800760269165, + "p99": 291.77600145339966 }, "isolatedSum": { - "p50": 260.25599986314774, - "p90": 324.3199959397316, - "p95": 328.3199965953827, - "p99": 346.1119830608368 + "p50": 289.98398780822754, + "p90": 302.2399991750717, + "p95": 305.31200766563416, + "p99": 322.7199912071228 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 197.63199985027313, - "p90": 288.35201263427734, - "p95": 294.048011302948, - "p99": 322.04800844192505 + "p50": 192.671999335289, + "p90": 200.095996260643, + "p95": 201.1840045452118, + "p99": 211.99999749660492 }, "combine": { - "p50": 66.46399945020676, - "p90": 79.9039974808693, - "p95": 106.33599758148193, - "p99": 204.25599813461304 + "p50": 264.70398902893066, + "p90": 274.2399871349335, + "p95": 274.9119997024536, + "p99": 286.3999903202057 }, "roundtrip": { - "p50": 246.62399291992188, - "p90": 330.24001121520996, - "p95": 333.5359990596771, - "p99": 341.18399024009705 + "p50": 439.7439956665039, + "p90": 445.279985666275, + "p95": 447.519987821579, + "p99": 459.9039852619171 }, "isolatedSum": { - "p50": 264.0959993004799, - "p90": 368.25601011514664, - "p95": 400.38400888442993, - "p99": 526.3040065765381 + "p50": 457.37598836421967, + "p90": 474.3359833955765, + "p95": 476.0960042476654, + "p99": 498.3999878168106 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 198.40000569820404, - "p90": 284.35200452804565, - "p95": 288.06400299072266, - "p99": 295.9040105342865 + "p50": 319.90399956703186, + "p90": 325.8560001850128, + "p95": 327.1999955177307, + "p99": 333.44000577926636 }, "combine": { - "p50": 70.97599655389786, - "p90": 79.96799796819687, - "p95": 80.70400357246399, - "p99": 83.52000266313553 + "p50": 450.78399777412415, + "p90": 458.8800072669983, + "p95": 459.77601408958435, + "p99": 482.87999629974365 }, "roundtrip": { - "p50": 250.36799907684326, - "p90": 306.5919876098633, - "p95": 310.2079927921295, - "p99": 368.8639998435974 + "p50": 756.1600208282471, + "p90": 761.5039944648743, + "p95": 763.5840177536011, + "p99": 783.5519909858704 }, "isolatedSum": { - "p50": 269.3760022521019, - "p90": 364.3200024962425, - "p95": 368.76800656318665, - "p99": 379.424013197422 + "p50": 770.687997341156, + "p90": 784.7360074520111, + "p95": 786.9760096073151, + "p99": 816.32000207901 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 0, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 198.65599274635315, - "p90": 284.8320007324219, - "p95": 289.69600796699524, - "p99": 304.4480085372925 + "p50": 573.0559825897217, + "p90": 581.6959738731384, + "p95": 583.7119817733765, + "p99": 671.4879870414734 }, "combine": { - "p50": 80.48000186681747, - "p90": 88.83199840784073, - "p95": 90.52799642086029, - "p99": 101.31199657917023 + "p50": 827.4880051612854, + "p90": 838.6240005493164, + "p95": 839.9040102958679, + "p99": 863.4560108184814 }, "roundtrip": { - "p50": 260.96001267433167, - "p90": 351.80801153182983, - "p95": 355.55198788642883, - "p99": 367.0400083065033 + "p50": 1382.9760551452637, + "p90": 1392.9920196533203, + "p95": 1396.8960046768188, + "p99": 1428.1599521636963 }, "isolatedSum": { - "p50": 279.1359946131706, - "p90": 373.6639991402626, - "p95": 380.22400438785553, - "p99": 405.7600051164627 + "p50": 1400.543987751007, + "p90": 1420.3199744224548, + "p95": 1423.6159920692444, + "p99": 1534.9439978599548 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 216.8319970369339, - "p90": 312.8640055656433, - "p95": 320.73599100112915, - "p99": 336.41600608825684 + "p50": 1061.8879795074463, + "p90": 1068.7040090560913, + "p95": 1075.9040117263794, + "p99": 1094.048023223877 }, "combine": { - "p50": 98.94400089979172, - "p90": 112.83200234174728, - "p95": 113.79200220108032, - "p99": 119.13599818944931 + "p50": 1530.2079916000366, + "p90": 1540.7040119171143, + "p95": 1551.2640476226807, + "p99": 1662.6559495925903 }, "roundtrip": { - "p50": 303.2959997653961, - "p90": 388.0000114440918, - "p95": 392.2879993915558, - "p99": 401.2480080127716 + "p50": 2579.9999237060547, + "p90": 2593.7600135803223, + "p95": 2600.543975830078, + "p99": 2645.440101623535 }, "isolatedSum": { - "p50": 315.7759979367256, - "p90": 425.6960079073906, - "p95": 434.5279932022095, - "p99": 455.55200427770615 + "p50": 2592.095971107483, + "p90": 2609.4080209732056, + "p95": 2627.16805934906, + "p99": 2756.7039728164673 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -25904,50 +26572,51 @@ ] }, { - "id": "cx-8e5c4d34", - "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h100_97196257", - "comparisonKey": "969c3964291e1270", + "id": "cx-07a9b9e5", + "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_63f1354f", + "comparisonKey": "e1e888fe005f12d0", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:43.012530+00:00", + "generatedAt": "2026-06-26T17:43:21.918392+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -25956,8 +26625,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -25965,304 +26634,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271660154", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271660154", - "createdAt": "2026-06-26T23:49:43Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254489726", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", + "createdAt": "2026-06-26T17:43:21.918392+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 198.7520009279251, - "p90": 206.2399983406067, - "p95": 209.56799387931824, - "p99": 221.69600427150726 + "p50": 86.43200248479843, + "p90": 88.95999938249588, + "p95": 91.58399701118469, + "p99": 99.55199807882309 }, "combine": { - "p50": 60.83200126886368, - "p90": 64.31999802589417, - "p95": 65.98400324583054, - "p99": 69.05599683523178 + "p50": 115.35999923944473, + "p90": 116.03199690580368, + "p95": 116.38399958610535, + "p99": 121.56800180673599 }, "roundtrip": { - "p50": 242.71999299526215, - "p90": 250.07998943328857, - "p95": 254.5279860496521, - "p99": 290.0159955024719 + "p50": 186.8479996919632, + "p90": 192.47999787330627, + "p95": 193.31200420856476, + "p99": 215.45599400997162 }, "isolatedSum": { - "p50": 259.5840021967888, - "p90": 270.55999636650085, - "p95": 275.5519971251488, - "p99": 290.75200110673904 + "p50": 201.79200172424316, + "p90": 204.99199628829956, + "p95": 207.96799659729004, + "p99": 221.11999988555908 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 0, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 205.53599298000336, - "p90": 313.6320114135742, - "p95": 323.8399922847748, - "p99": 375.5840063095093 + "p50": 128.60800325870514, + "p90": 131.48799538612366, + "p95": 132.79999792575836, + "p99": 147.20000326633453 }, "combine": { - "p50": 62.81600147485733, - "p90": 76.1599987745285, - "p95": 79.19999957084656, - "p99": 83.0719992518425 + "p50": 156.19200468063354, + "p90": 164.48000073432922, + "p95": 164.76799547672272, + "p99": 167.71200299263 }, "roundtrip": { - "p50": 242.49599874019623, - "p90": 250.43201446533203, - "p95": 253.08799743652344, - "p99": 294.1119968891144 + "p50": 264.8000121116638, + "p90": 271.232008934021, + "p95": 274.6239900588989, + "p99": 307.20001459121704 }, "isolatedSum": { - "p50": 268.3519944548607, - "p90": 389.7920101881027, - "p95": 403.03999185562134, - "p99": 458.6560055613518 + "p50": 284.8000079393387, + "p90": 295.9679961204529, + "p95": 297.5679934024811, + "p99": 314.91200625896454 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 0, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 203.5519927740097, - "p90": 291.55200719833374, - "p95": 296.09599709510803, - "p99": 303.6159873008728 + "p50": 186.46399676799774, + "p90": 192.86400079727173, + "p95": 195.360004901886, + "p99": 208.3200067281723 }, "combine": { - "p50": 63.26399743556976, - "p90": 73.98399710655212, - "p95": 75.83999633789062, - "p99": 80.09599894285202 + "p50": 266.6879892349243, + "p90": 274.78399872779846, + "p95": 275.2639949321747, + "p99": 287.1359884738922 }, "roundtrip": { - "p50": 247.42400646209717, - "p90": 336.67200803756714, - "p95": 339.4559919834137, - "p99": 346.20800614356995 + "p50": 437.4080002307892, + "p90": 442.30398535728455, + "p95": 445.6320106983185, + "p99": 468.51199865341187 }, "isolatedSum": { - "p50": 266.81599020957947, - "p90": 365.53600430488586, - "p95": 371.93599343299866, - "p99": 383.7119862437248 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 199.45600628852844, - "p90": 207.8080028295517, - "p95": 213.02400529384613, - "p99": 235.29599606990814 - }, - "combine": { - "p50": 62.72000074386597, - "p90": 67.16799736022949, - "p95": 68.64000111818314, - "p99": 73.60000163316727 - }, - "roundtrip": { - "p50": 245.85600197315216, - "p90": 253.1839907169342, - "p95": 256.9279968738556, - "p99": 269.3119943141937 - }, - "isolatedSum": { - "p50": 262.1760070323944, - "p90": 274.9760001897812, - "p95": 281.66400641202927, - "p99": 308.8959977030754 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 204.22400534152985, - "p90": 292.60799288749695, - "p95": 296.3840067386627, - "p99": 434.30399894714355 - }, - "combine": { - "p50": 66.14399701356888, - "p90": 75.55200159549713, - "p95": 76.1599987745285, - "p99": 79.8719972372055 - }, - "roundtrip": { - "p50": 250.59199333190918, - "p90": 335.32801270484924, - "p95": 340.2239978313446, - "p99": 366.5919899940491 - }, - "isolatedSum": { - "p50": 270.3680023550987, - "p90": 368.1599944829941, - "p95": 372.5440055131912, - "p99": 514.1759961843491 + "p50": 453.15198600292206, + "p90": 467.6479995250702, + "p95": 470.62399983406067, + "p99": 495.4559952020645 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 0, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 200.99200308322906, - "p90": 286.3039970397949, - "p95": 293.3120131492615, - "p99": 305.11999130249023 + "p50": 316.0319924354553, + "p90": 319.16800141334534, + "p95": 320.99199295043945, + "p99": 330.01598715782166 }, "combine": { - "p50": 70.88000327348709, - "p90": 75.83999633789062, - "p95": 78.11199873685837, - "p99": 86.84799820184708 + "p50": 458.8479995727539, + "p90": 461.66399121284485, + "p95": 470.20798921585083, + "p99": 483.39200019836426 }, "roundtrip": { - "p50": 253.31199169158936, - "p90": 259.71201062202454, - "p95": 262.4959945678711, - "p99": 270.9439992904663 + "p50": 752.0639896392822, + "p90": 761.3440155982971, + "p95": 763.6799812316895, + "p99": 787.6480221748352 }, "isolatedSum": { - "p50": 271.87200635671616, - "p90": 362.14399337768555, - "p95": 371.42401188611984, - "p99": 391.9679895043373 + "p50": 774.8799920082092, + "p90": 780.8319926261902, + "p95": 791.1999821662903, + "p99": 813.4079873561859 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 0, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 204.22400534152985, - "p90": 293.8239872455597, - "p95": 299.74400997161865, - "p99": 323.4559893608093 + "p50": 557.2800040245056, + "p90": 565.0240182876587, + "p95": 566.3679838180542, + "p99": 600.0319719314575 }, "combine": { - "p50": 81.82399719953537, - "p90": 93.40800344944, - "p95": 96.63999825716019, - "p99": 99.64799880981445 + "p50": 817.4399733543396, + "p90": 827.8399705886841, + "p95": 832.0639729499817, + "p99": 854.3999791145325 }, "roundtrip": { - "p50": 268.73600482940674, - "p90": 351.6159951686859, - "p95": 354.4960021972656, - "p99": 361.6639971733093 + "p50": 1359.328031539917, + "p90": 1370.911955833435, + "p95": 1380.5760145187378, + "p99": 1444.640040397644 }, "isolatedSum": { - "p50": 286.0480025410652, - "p90": 387.2319906949997, - "p95": 396.38400822877884, - "p99": 423.1039881706238 + "p50": 1374.7199773788452, + "p90": 1392.8639888763428, + "p95": 1398.431956768036, + "p99": 1454.43195104599 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 5, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 224.2240011692047, - "p90": 294.5919930934906, - "p95": 298.4960079193115, - "p99": 310.8159899711609 + "p50": 1037.4079942703247, + "p90": 1044.800043106079, + "p95": 1047.4879741668701, + "p99": 1074.3039846420288 }, "combine": { - "p50": 99.90400075912476, - "p90": 110.33599823713303, - "p95": 111.35999858379364, - "p99": 114.68800157308578 + "p50": 1529.6319723129272, + "p90": 1541.375994682312, + "p95": 1552.0639419555664, + "p99": 1577.1199464797974 }, "roundtrip": { - "p50": 310.88000535964966, - "p90": 375.2320110797882, - "p95": 378.04800271987915, - "p99": 386.46399974823 + "p50": 2550.9119033813477, + "p90": 2564.2240047454834, + "p95": 2571.199893951416, + "p99": 2613.2800579071045 }, "isolatedSum": { - "p50": 324.12800192832947, - "p90": 404.9279913306236, - "p95": 409.85600650310516, - "p99": 425.5039915442467 + "p50": 2567.039966583252, + "p90": 2586.176037788391, + "p95": 2599.5519161224365, + "p99": 2651.423931121826 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 5, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -26270,28 +26865,29 @@ ] }, { - "id": "cx-4e4a7f2d", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_91aa6e56", - "comparisonKey": "511cf861d6b2e142", + "id": "cx-179c0247", + "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_33311fdc", + "comparisonKey": "6deb8b087f7b728f", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:28:00.849157+00:00", + "generatedAt": "2026-06-27T09:46:35.384079+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "runner": "b300-nv_09", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", + "label": "B300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -26301,19 +26897,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -26322,8 +26918,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -26331,303 +26927,229 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254323956", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", - "createdAt": "2026-06-26T17:27:01Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285576352", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285576352", + "createdAt": "2026-06-27T09:46:35.384079+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.98400104045868, - "p90": 102.88000106811523, - "p95": 104.38399761915207, - "p99": 110.20799726247787 - }, - "combine": { - "p50": 72.28799909353256, - "p90": 74.14399832487106, - "p95": 75.29599964618683, - "p99": 78.65600287914276 - }, - "roundtrip": { - "p50": 190.65600633621216, - "p90": 195.90400159358978, - "p95": 198.30399751663208, - "p99": 202.72000133991241 - }, - "isolatedSum": { - "p50": 170.27200013399124, - "p90": 177.0239993929863, - "p95": 179.6799972653389, - "p99": 188.86400014162064 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 72.15999811887741, - "p90": 99.90400075912476, - "p95": 102.52799838781357, - "p99": 105.0880029797554 + "p50": 87.26400136947632, + "p90": 88.8959988951683, + "p95": 91.20000153779984, + "p99": 106.08000308275223 }, "combine": { - "p50": 63.35999816656113, - "p90": 73.18399846553802, - "p95": 73.98399710655212, - "p99": 78.46400141716003 + "p50": 115.55200070142746, + "p90": 116.80000275373459, + "p95": 117.60000139474869, + "p99": 140.32000303268433 }, "roundtrip": { - "p50": 153.82400155067444, - "p90": 194.43200528621674, - "p95": 196.28800451755524, - "p99": 201.05600357055664 + "p50": 186.17600202560425, + "p90": 192.76799261569977, + "p95": 193.82399320602417, + "p99": 217.75999665260315 }, "isolatedSum": { - "p50": 135.51999628543854, - "p90": 173.08799922466278, - "p95": 176.5119954943657, - "p99": 183.55200439691544 + "p50": 202.81600207090378, + "p90": 205.6960016489029, + "p95": 208.80000293254852, + "p99": 246.40000611543655 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 72.31999933719635, - "p90": 103.4879982471466, - "p95": 107.26399719715118, - "p99": 115.48800021409988 + "p50": 130.20800054073334, + "p90": 132.35199451446533, + "p95": 133.34399461746216, + "p99": 138.7840062379837 }, "combine": { - "p50": 64.03200328350067, - "p90": 76.28799974918365, - "p95": 77.82399654388428, - "p99": 81.98399841785431 + "p50": 155.45600652694702, + "p90": 164.51199352741241, + "p95": 164.8319959640503, + "p99": 176.83200538158417 }, "roundtrip": { - "p50": 156.09599649906158, - "p90": 202.36800611019135, - "p95": 205.63200116157532, - "p99": 212.51200139522552 + "p50": 266.4639949798584, + "p90": 271.61601185798645, + "p95": 274.59201216697693, + "p99": 283.3600044250488 }, "isolatedSum": { - "p50": 136.35200262069702, - "p90": 179.77599799633026, - "p95": 185.08799374103546, - "p99": 197.4719986319542 + "p50": 285.66400706768036, + "p90": 296.86398804187775, + "p95": 298.17599058151245, + "p99": 315.61601161956787 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 97.50399738550186, - "p90": 102.30399668216705, - "p95": 105.85600137710571, - "p99": 113.40799927711487 + "p50": 186.81600689888, + "p90": 192.83199310302734, + "p95": 195.0400024652481, + "p99": 202.7519941329956 }, "combine": { - "p50": 63.80800157785416, - "p90": 74.94399696588516, - "p95": 76.28799974918365, - "p99": 80.89599758386612 + "p50": 274.52799677848816, + "p90": 275.64799785614014, + "p95": 276.5760123729706, + "p99": 286.624014377594 }, "roundtrip": { - "p50": 154.6880006790161, - "p90": 194.7840005159378, - "p95": 199.0399956703186, - "p99": 203.87199521064758 + "p50": 440.064013004303, + "p90": 445.3119933605194, + "p95": 451.61598920822144, + "p99": 459.77601408958435 }, "isolatedSum": { - "p50": 161.31199896335602, - "p90": 177.24799364805222, - "p95": 182.14400112628937, - "p99": 194.303996860981 + "p50": 461.34400367736816, + "p90": 468.4799909591675, + "p95": 471.6160148382187, + "p99": 489.3760085105896 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 97.08800166845322, - "p90": 104.3199971318245, - "p95": 107.39199817180634, - "p99": 113.43999952077866 + "p50": 316.25598669052124, + "p90": 319.68000531196594, + "p95": 321.1199939250946, + "p99": 329.120010137558 }, "combine": { - "p50": 75.74400305747986, - "p90": 78.49600166082382, - "p95": 80.06399869918823, - "p99": 83.36000144481659 + "p50": 459.00800824165344, + "p90": 461.0239863395691, + "p95": 462.5920057296753, + "p99": 473.66398572921753 }, "roundtrip": { - "p50": 195.2960044145584, - "p90": 205.85599541664124, - "p95": 209.85600352287292, - "p99": 223.83999824523926 + "p50": 752.5119781494141, + "p90": 760.9919905662537, + "p95": 763.3919715881348, + "p99": 770.4640030860901 }, "isolatedSum": { - "p50": 172.83200472593307, - "p90": 182.81599879264832, - "p95": 187.45599687099457, - "p99": 196.80000096559525 + "p50": 775.2639949321747, + "p90": 780.703991651535, + "p95": 783.7119996547699, + "p99": 802.7839958667755 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 73.11999797821045, - "p90": 104.16000336408615, - "p95": 106.84800148010254, - "p99": 112.09599673748016 + "p50": 558.5920214653015, + "p90": 565.3759837150574, + "p95": 566.9119954109192, + "p99": 578.7839889526367 }, "combine": { - "p50": 69.2799985408783, - "p90": 81.88799768686295, - "p95": 82.87999778985977, - "p99": 88.28800171613693 + "p50": 819.0079927444458, + "p90": 828.4800052642822, + "p95": 830.9760093688965, + "p99": 844.8960185050964 }, "roundtrip": { - "p50": 161.21600568294525, - "p90": 206.65599405765533, - "p95": 210.84800362586975, - "p99": 216.22399985790253 + "p50": 1360.640048980713, + "p90": 1367.583990097046, + "p95": 1372.320055961609, + "p99": 1414.1119718551636 }, "isolatedSum": { - "p50": 142.39999651908875, - "p90": 186.0480010509491, - "p95": 189.7279992699623, - "p99": 200.3839984536171 + "p50": 1377.6000142097473, + "p90": 1393.8559889793396, + "p95": 1397.8880047798157, + "p99": 1423.6800074577332 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 78.65600287914276, - "p90": 106.9440022110939, - "p95": 110.55999994277954, - "p99": 125.44000148773193 + "p50": 1036.255955696106, + "p90": 1045.151948928833, + "p95": 1047.584056854248, + "p99": 1086.5919589996338 }, "combine": { - "p50": 83.64800363779068, - "p90": 96.38399630784988, - "p95": 97.69599884748459, - "p99": 100.00000149011612 + "p50": 1528.480052947998, + "p90": 1540.544033050537, + "p95": 1543.2319641113281, + "p99": 1555.2959442138672 }, "roundtrip": { - "p50": 175.7120043039322, - "p90": 222.6880043745041, - "p95": 225.24799406528473, - "p99": 231.74400627613068 + "p50": 2546.976089477539, + "p90": 2557.1839809417725, + "p95": 2563.4560585021973, + "p99": 2601.2799739837646 }, "isolatedSum": { - "p50": 162.30400651693344, - "p90": 203.3279985189438, - "p95": 208.25599879026413, - "p99": 225.44000297784805 + "p50": 2564.736008644104, + "p90": 2585.69598197937, + "p95": 2590.816020965576, + "p99": 2641.887903213501 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 90.33600240945816, - "p90": 110.84800213575363, - "p95": 113.82400244474411, - "p99": 117.11999773979187 - }, - "combine": { - "p50": 98.78399968147278, - "p90": 111.00800335407257, - "p95": 112.0000034570694, - "p99": 117.21599847078323 - }, - "roundtrip": { - "p50": 216.12800657749176, - "p90": 240.60800671577454, - "p95": 244.25600469112396, - "p99": 250.2720057964325 - }, - "isolatedSum": { - "p50": 189.12000209093094, - "p90": 221.8560054898262, - "p95": 225.8240059018135, - "p99": 234.3359962105751 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -26636,32 +27158,33 @@ ] }, { - "id": "cx-750e874d", - "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_7f10961a", - "comparisonKey": "f145cb161a39591f", + "id": "cx-d90a63c5", + "identity": "b300|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "0a8b502bd3614965", "schemaVersion": 3, - "generatedAt": "2026-06-26T15:23:35.919985+00:00", + "generatedAt": "2026-06-27T11:14:26.432170+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", + "runner": "b300-nv_01", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", + "label": "B300 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -26672,12 +27195,12 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "unknown", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, @@ -26688,8 +27211,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -26697,337 +27220,264 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28247584217", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247584217", - "createdAt": "2026-06-26T15:22:36Z", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + "id": "28287509502", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287509502", + "createdAt": "2026-06-27T11:14:26.432170+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 251.93598866462708, - "p90": 260.3839933872223, - "p95": 263.10399174690247, - "p99": 268.5759961605072 + "p50": 80.60800284147263, + "p90": 83.45600217580795, + "p95": 84.6719965338707, + "p99": 93.56799721717834 }, "combine": { - "p50": 68.41599941253662, - "p90": 69.88800317049026, - "p95": 70.8480030298233, - "p99": 76.03199779987335 + "p50": 89.82399851083755, + "p90": 92.12800115346909, + "p95": 93.40800344944, + "p99": 99.84000027179718 }, "roundtrip": { - "p50": 296.51200771331787, - "p90": 304.1279911994934, - "p95": 306.40000104904175, - "p99": 349.15199875831604 + "p50": 183.4239959716797, + "p90": 186.46399676799774, + "p95": 187.68000602722168, + "p99": 198.17599654197693 }, "isolatedSum": { - "p50": 320.3519880771637, - "p90": 330.27199655771255, - "p95": 333.95199477672577, - "p99": 344.60799396038055 + "p50": 170.43200135231018, + "p90": 175.58400332927704, + "p95": 178.0799999833107, + "p99": 193.40799748897552 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 200.51200687885284, - "p90": 256.8320035934448, - "p95": 259.99999046325684, - "p99": 268.0000066757202 - }, - "combine": { - "p50": 63.00800293684006, - "p90": 71.00799679756165, - "p95": 71.84000313282013, - "p99": 74.68800246715546 - }, - "roundtrip": { - "p50": 243.1039959192276, - "p90": 300.1919984817505, - "p95": 303.5840094089508, - "p99": 308.9919984340668 - }, - "isolatedSum": { - "p50": 263.5200098156929, - "p90": 327.84000039100647, - "p95": 331.83999359607697, - "p99": 342.68800914287567 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 199.13600385189056, - "p90": 287.9680097103119, - "p95": 291.1359965801239, - "p99": 298.2720136642456 + "p50": 108.15999656915665, + "p90": 112.19199746847153, + "p95": 116.60800129175186, + "p99": 134.62400436401367 }, "combine": { - "p50": 63.519999384880066, - "p90": 75.1039981842041, - "p95": 76.73600316047668, - "p99": 81.40800148248672 + "p50": 124.92799758911133, + "p90": 128.4479945898056, + "p95": 129.98400628566742, + "p99": 134.91199910640717 }, "roundtrip": { - "p50": 246.17600440979004, - "p90": 330.84800839424133, - "p95": 333.9200019836426, - "p99": 343.6479866504669 + "p50": 259.99999046325684, + "p90": 264.3519937992096, + "p95": 266.07999205589294, + "p99": 281.6320061683655 }, "isolatedSum": { - "p50": 262.65600323677063, - "p90": 363.072007894516, - "p95": 367.8719997406006, - "p99": 379.68001514673233 + "p50": 233.08799415826797, + "p90": 240.63999205827713, + "p95": 246.59200757741928, + "p99": 269.53600347042084 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 199.16799664497375, - "p90": 258.14399123191833, - "p95": 261.4080011844635, - "p99": 267.16798543930054 + "p50": 152.79999375343323, + "p90": 157.02399611473083, + "p95": 159.2639982700348, + "p99": 168.16000640392303 }, "combine": { - "p50": 63.4239986538887, - "p90": 72.57600128650665, - "p95": 73.18399846553802, - "p99": 76.28799974918365 + "p50": 189.60000574588776, + "p90": 192.7040070295334, + "p95": 194.46399807929993, + "p99": 207.71199464797974 }, "roundtrip": { - "p50": 244.83199417591095, - "p90": 302.3039996623993, - "p95": 305.759996175766, - "p99": 310.94399094581604 + "p50": 395.04000544548035, + "p90": 400.2879858016968, + "p95": 402.3360013961792, + "p99": 415.6799912452698 }, "isolatedSum": { - "p50": 262.59199529886246, - "p90": 330.719992518425, - "p95": 334.5919996500015, - "p99": 343.4559851884842 + "p50": 342.399999499321, + "p90": 349.7280031442642, + "p95": 353.7279963493347, + "p99": 375.87200105190277 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 200.28799772262573, - "p90": 286.5599989891052, - "p95": 290.0800108909607, - "p99": 296.57599329948425 + "p50": 245.92000246047974, + "p90": 251.3599991798401, + "p95": 252.99200415611267, + "p99": 264.0640139579773 }, "combine": { - "p50": 65.5359998345375, - "p90": 76.86399668455124, - "p95": 77.66400277614594, - "p99": 80.76799660921097 + "p50": 390.75198769569397, + "p90": 398.9120125770569, + "p95": 401.66398882865906, + "p99": 409.0240001678467 }, "roundtrip": { - "p50": 248.57600033283234, - "p90": 330.4640054702759, - "p95": 333.6319923400879, - "p99": 344.7360098361969 + "p50": 774.0160226821899, + "p90": 781.9200158119202, + "p95": 786.4320278167725, + "p99": 796.3839769363403 }, "isolatedSum": { - "p50": 265.82399755716324, - "p90": 363.42399567365646, - "p95": 367.7440136671066, - "p99": 377.3439899086952 + "p50": 636.6719901561737, + "p90": 650.272011756897, + "p95": 654.6559929847717, + "p99": 673.088014125824 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 198.88000190258026, - "p90": 284.4800055027008, - "p95": 288.12798857688904, - "p99": 293.0240035057068 + "p50": 438.1119906902313, + "p90": 448.09600710868835, + "p95": 452.4799883365631, + "p99": 461.63201332092285 }, "combine": { - "p50": 69.18399780988693, - "p90": 80.54400235414505, - "p95": 81.4720019698143, - "p99": 84.63999629020691 + "p50": 750.6240010261536, + "p90": 756.4160227775574, + "p95": 758.2399845123291, + "p99": 767.0400142669678 }, "roundtrip": { - "p50": 253.12000513076782, - "p90": 334.01599526405334, - "p95": 336.89600229263306, - "p99": 340.31999111175537 + "p50": 1456.3839435577393, + "p90": 1466.4959907531738, + "p95": 1470.3359603881836, + "p99": 1482.3039770126343 }, "isolatedSum": { - "p50": 268.0639997124672, - "p90": 365.02400785684586, - "p95": 369.59999054670334, - "p99": 377.6639997959137 + "p50": 1188.735991716385, + "p90": 1204.5120298862457, + "p95": 1210.7199728488922, + "p99": 1228.6720275878906 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 202.07999646663666, - "p90": 355.00800609588623, - "p95": 361.7280125617981, - "p99": 423.007994890213 + "p50": 854.6559810638428, + "p90": 867.7120208740234, + "p95": 873.2159733772278, + "p99": 887.8080248832703 }, "combine": { - "p50": 82.65600353479385, - "p90": 94.11200135946274, - "p95": 95.8079993724823, - "p99": 99.45599734783173 + "p50": 1436.5119934082031, + "p90": 1444.5120096206665, + "p95": 1448.3519792556763, + "p99": 1471.9359874725342 }, "roundtrip": { - "p50": 266.88000559806824, - "p90": 352.03200578689575, - "p95": 355.3600013256073, - "p99": 361.4720106124878 + "p50": 2809.664011001587, + "p90": 2821.1519718170166, + "p95": 2827.1679878234863, + "p99": 2873.1839656829834 }, "isolatedSum": { - "p50": 284.7360000014305, - "p90": 449.12000745534897, - "p95": 457.5360119342804, - "p99": 522.4639922380447 + "p50": 2291.167974472046, + "p90": 2312.22403049469, + "p95": 2321.567952632904, + "p99": 2359.7440123558044 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 221.79199755191803, - "p90": 289.72798585891724, - "p95": 293.08798909187317, - "p99": 300.9600043296814 - }, - "combine": { - "p50": 98.27200323343277, - "p90": 108.8000014424324, - "p95": 110.1439967751503, - "p99": 113.88800293207169 - }, - "roundtrip": { - "p50": 303.74398827552795, - "p90": 364.8639917373657, - "p95": 367.45598912239075, - "p99": 371.5519905090332 - }, - "isolatedSum": { - "p50": 320.0640007853508, - "p90": 398.52798730134964, - "p95": 403.23198586702347, - "p99": 414.8480072617531 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 } ] }, { - "id": "cx-b83230a1", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_eddc3af6", - "comparisonKey": "f291497d6f9ce0d1", + "id": "cx-acd7c4ed", + "identity": "b300|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_c4c63f07", + "comparisonKey": "31714ccd7ce96f8f", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:42.999710+00:00", + "generatedAt": "2026-06-27T09:52:16.802838+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", + "runner": "b300-nv_07", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm) [cl]", + "label": "B300 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -27038,14 +27488,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -27054,8 +27504,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -27063,304 +27513,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254341346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", - "createdAt": "2026-06-26T17:27:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285696261", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285696261", + "createdAt": "2026-06-27T09:52:16.802838+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 81.34400099515915, - "p90": 84.927998483181, - "p95": 86.496002972126, - "p99": 90.14400094747543 + "p50": 1875.615954399109, + "p90": 2675.9040355682373, + "p95": 2804.8319816589355, + "p99": 3161.2160205841064 }, "combine": { - "p50": 71.3919997215271, - "p90": 73.91999661922455, - "p95": 74.87999647855759, - "p99": 77.98399776220322 + "p50": 1791.424036026001, + "p90": 2183.648109436035, + "p95": 2710.495948791504, + "p99": 2984.6720695495605 }, "roundtrip": { - "p50": 173.15199971199036, - "p90": 178.6240041255951, - "p95": 180.92800676822662, - "p99": 186.5600049495697 + "p50": 1945.6959962844849, + "p90": 2103.775978088379, + "p95": 2727.839946746826, + "p99": 3128.959894180298 }, "isolatedSum": { - "p50": 152.73600071668625, - "p90": 158.84799510240555, - "p95": 161.3759994506836, - "p99": 168.12799870967865 + "p50": 3667.03999042511, + "p90": 4859.5521450042725, + "p95": 5515.327930450439, + "p99": 6145.888090133667 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 58.49599838256836, - "p90": 82.78399705886841, - "p95": 84.3840017914772, - "p99": 90.01599997282028 + "p50": 1994.4000244140625, + "p90": 2822.2079277038574, + "p95": 3089.344024658203, + "p99": 4134.687900543213 }, "combine": { - "p50": 63.07200342416763, - "p90": 74.0479975938797, - "p95": 74.8480036854744, - "p99": 77.44000107049942 + "p50": 1834.3039751052856, + "p90": 2468.640089035034, + "p95": 2714.9438858032227, + "p99": 3004.672050476074 }, "roundtrip": { - "p50": 141.12000167369843, - "p90": 176.54399573802948, - "p95": 178.81600558757782, - "p99": 181.92000687122345 + "p50": 2093.0240154266357, + "p90": 2329.024076461792, + "p95": 2922.7840900421143, + "p99": 3284.0960025787354 }, "isolatedSum": { - "p50": 121.56800180673599, - "p90": 156.8319946527481, - "p95": 159.2320054769516, - "p99": 167.4560010433197 + "p50": 3828.703999519348, + "p90": 5290.848016738892, + "p95": 5804.287910461426, + "p99": 7139.359951019287 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 59.13599953055382, - "p90": 82.68799632787704, - "p95": 85.37600189447403, - "p99": 91.61599725484848 + "p50": 2082.495927810669, + "p90": 2575.5200386047363, + "p95": 3061.8879795074463, + "p99": 3882.4000358581543 }, "combine": { - "p50": 63.64800035953522, - "p90": 74.14399832487106, - "p95": 75.19999891519547, - "p99": 79.32800054550171 + "p50": 1895.7120180130005, + "p90": 2081.5999507904053, + "p95": 2722.0799922943115, + "p99": 3054.0480613708496 }, "roundtrip": { - "p50": 140.83200693130493, - "p90": 178.49600315093994, - "p95": 180.92800676822662, - "p99": 187.45599687099457 + "p50": 2248.447895050049, + "p90": 2507.391929626465, + "p95": 3178.4000396728516, + "p99": 3517.632007598877 }, "isolatedSum": { - "p50": 122.78399989008904, - "p90": 156.8319946527481, - "p95": 160.5760008096695, - "p99": 170.9439978003502 + "p50": 3978.2079458236694, + "p90": 4657.119989395142, + "p95": 5783.967971801758, + "p99": 6936.448097229004 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 61.792001128196716, - "p90": 83.20000022649765, - "p95": 86.07999980449677, - "p99": 96.00000083446503 + "p50": 2266.335964202881, + "p90": 2846.816062927246, + "p95": 3325.5679607391357, + "p99": 3900.8638858795166 }, "combine": { - "p50": 65.43999910354614, - "p90": 75.93599706888199, - "p95": 78.14399898052216, - "p99": 83.74399691820145 + "p50": 2154.8800468444824, + "p90": 2735.584020614624, + "p95": 3072.096109390259, + "p99": 3418.11203956604 }, "roundtrip": { - "p50": 144.44799721240997, - "p90": 181.15200102329254, - "p95": 184.25600230693817, - "p99": 199.8080015182495 + "p50": 2644.864082336426, + "p90": 3269.08802986145, + "p95": 3706.2718868255615, + "p99": 6074.7199058532715 }, "isolatedSum": { - "p50": 127.23200023174286, - "p90": 159.13599729537964, - "p95": 164.22399878501892, - "p99": 179.74399775266647 + "p50": 4421.216011047363, + "p90": 5582.40008354187, + "p95": 6397.6640701293945, + "p99": 7318.975925445557 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 2, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 80.38400113582611, - "p90": 83.52000266313553, - "p95": 85.08799970149994, - "p99": 92.38400310277939 + "p50": 2653.8240909576416, + "p90": 2906.4319133758545, + "p95": 3221.951961517334, + "p99": 3759.3278884887695 }, "combine": { - "p50": 75.80800354480743, - "p90": 77.85599678754807, - "p95": 79.03999835252762, - "p99": 80.83199709653854 + "p50": 2523.5838890075684, + "p90": 2799.743890762329, + "p95": 3378.0479431152344, + "p99": 3780.8001041412354 }, "roundtrip": { - "p50": 150.59199929237366, - "p90": 182.49599635601044, - "p95": 184.60799753665924, - "p99": 194.815993309021 + "p50": 3350.048065185547, + "p90": 3693.056106567383, + "p95": 4236.576080322266, + "p99": 4646.240234375 }, "isolatedSum": { - "p50": 156.19200468063354, - "p90": 161.3759994506836, - "p95": 164.12799805402756, - "p99": 173.21600019931793 + "p50": 5177.40797996521, + "p90": 5706.175804138184, + "p95": 6599.999904632568, + "p99": 7540.127992630005 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 61.792001128196716, - "p90": 81.727996468544, - "p95": 84.28800106048584, - "p99": 89.88799899816513 - }, - "combine": { - "p50": 69.34399902820587, - "p90": 79.96799796819687, - "p95": 81.24800026416779, - "p99": 83.99999886751175 - }, - "roundtrip": { - "p50": 146.11199498176575, - "p90": 184.32000279426575, - "p95": 186.52799725532532, - "p99": 192.44800508022308 - }, - "isolatedSum": { - "p50": 131.1360001564026, - "p90": 161.69599443674088, - "p95": 165.53600132465363, - "p99": 173.88799786567688 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 72.80000299215317, - "p90": 86.43200248479843, - "p95": 92.54399687051773, - "p99": 99.7759997844696 - }, - "combine": { - "p50": 85.08799970149994, - "p90": 95.0080007314682, - "p95": 96.41599655151367, - "p99": 101.21600329875946 - }, - "roundtrip": { - "p50": 182.8799992799759, - "p90": 202.94399559497833, - "p95": 208.3200067281723, - "p99": 218.176007270813 - }, - "isolatedSum": { - "p50": 157.8880026936531, - "p90": 181.44000321626663, - "p95": 188.9599934220314, - "p99": 200.99200308322906 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 77.79199630022049, - "p90": 92.12800115346909, - "p95": 93.72799843549728, - "p99": 98.24000298976898 + "p50": 3389.280080795288, + "p90": 3919.840097427368, + "p95": 4479.1998863220215, + "p99": 6919.424057006836 }, "combine": { - "p50": 99.55199807882309, - "p90": 109.72800105810165, - "p95": 110.91200262308121, - "p99": 114.46399986743927 + "p50": 3219.4879055023193, + "p90": 3500.704050064087, + "p95": 4088.6402130126953, + "p99": 4587.488174438477 }, "roundtrip": { - "p50": 205.1520049571991, - "p90": 219.200000166893, - "p95": 220.89600563049316, - "p99": 223.4880030155182 + "p50": 4788.127899169922, + "p90": 4992.767810821533, + "p95": 5423.679828643799, + "p99": 6249.695777893066 }, "isolatedSum": { - "p50": 177.34399437904358, - "p90": 201.85600221157074, - "p95": 204.6400010585785, - "p99": 212.70400285720825 + "p50": 6608.767986297607, + "p90": 7420.544147491455, + "p95": 8567.840099334717, + "p99": 11506.912231445312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -27368,32 +27744,33 @@ ] }, { - "id": "cx-d8e58489", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ec72792b", - "comparisonKey": "2bfd4913feb2a935", + "id": "cx-a725beb5", + "identity": "b300|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_c4c63f07", + "comparisonKey": "9a5b239287748a0a", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:54.320638+00:00", + "generatedAt": "2026-06-27T09:52:25.584381+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", + "runner": "b300-nv_12", + "sku": "b300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 [cl]", + "label": "B300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -27405,9 +27782,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -27420,8 +27797,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -27429,304 +27806,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271573150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271573150", - "createdAt": "2026-06-26T23:46:58Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285707789", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285707789", + "createdAt": "2026-06-27T09:52:25.584381+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 78.3040001988411, - "p90": 82.07999914884567, - "p95": 84.44800227880478, - "p99": 88.03199976682663 - }, - "combine": { - "p50": 71.1359977722168, - "p90": 72.86400347948074, - "p95": 73.82400333881378, - "p99": 77.88799703121185 - }, - "roundtrip": { - "p50": 136.63999736309052, - "p90": 174.75199699401855, - "p95": 177.15199291706085, - "p99": 181.08800053596497 - }, - "isolatedSum": { - "p50": 149.4399979710579, - "p90": 154.94400262832642, - "p95": 158.27200561761856, - "p99": 165.91999679803848 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 56.832000613212585, - "p90": 79.74400371313095, - "p95": 81.11999928951263, - "p99": 85.69599688053131 + "p50": 155.20000457763672, + "p90": 159.2320054769516, + "p95": 161.9199961423874, + "p99": 180.83199858665466 }, "combine": { - "p50": 62.3680017888546, - "p90": 71.58400118350983, - "p95": 72.25599884986877, - "p99": 75.9039968252182 + "p50": 95.74399888515472, + "p90": 98.14400225877762, + "p95": 99.10400211811066, + "p99": 110.36799848079681 }, "roundtrip": { - "p50": 138.0160003900528, - "p90": 172.95999825000763, - "p95": 174.30399358272552, - "p99": 179.61600422859192 + "p50": 242.5920069217682, + "p90": 246.20799720287323, + "p95": 248.28800559043884, + "p99": 264.0959918498993 }, "isolatedSum": { - "p50": 119.20000240206718, - "p90": 151.32800489664078, - "p95": 153.3759981393814, - "p99": 161.5999937057495 + "p50": 250.94400346279144, + "p90": 257.3760077357292, + "p95": 261.02399826049805, + "p99": 291.1999970674515 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 56.92800134420395, - "p90": 82.0159986615181, - "p95": 85.02399921417236, - "p99": 87.77599781751633 - }, - "combine": { - "p50": 63.07200342416763, - "p90": 74.94399696588516, - "p95": 76.28799974918365, - "p99": 79.99999821186066 - }, - "roundtrip": { - "p50": 138.7840062379837, - "p90": 179.51999604701996, - "p95": 182.01600015163422, - "p99": 187.42400407791138 - }, - "isolatedSum": { - "p50": 120.00000476837158, - "p90": 156.95999562740326, - "p95": 161.31199896335602, - "p99": 167.77599602937698 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 56.832000613212585, - "p90": 80.99199831485748, - "p95": 82.94399827718735, - "p99": 87.99999952316284 + "p50": 201.1519968509674, + "p90": 206.2080055475235, + "p95": 208.00000429153442, + "p99": 217.69599616527557 }, "combine": { - "p50": 63.71200084686279, - "p90": 74.43200051784515, - "p95": 75.19999891519547, - "p99": 79.52000200748444 + "p50": 131.84000551700592, + "p90": 134.65599715709686, + "p95": 135.77599823474884, + "p99": 142.46399700641632 }, "roundtrip": { - "p50": 139.93600010871887, - "p90": 178.5919964313507, - "p95": 181.98400735855103, - "p99": 185.47199666500092 + "p50": 328.3199965953827, + "p90": 334.56000685691833, + "p95": 336.8000090122223, + "p99": 351.77600383758545 }, "isolatedSum": { - "p50": 120.54400146007538, - "p90": 155.42399883270264, - "p95": 158.1439971923828, - "p99": 167.52000153064728 + "p50": 332.9920023679733, + "p90": 340.86400270462036, + "p95": 343.77600252628326, + "p99": 360.1599931716919 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 57.472001761198044, - "p90": 81.79199695587158, - "p95": 84.28800106048584, - "p99": 87.87199854850769 + "p50": 302.94400453567505, + "p90": 307.2640001773834, + "p95": 309.82398986816406, + "p99": 326.07999444007874 }, "combine": { - "p50": 65.5359998345375, - "p90": 77.37600058317184, - "p95": 79.3600007891655, - "p99": 82.46400207281113 + "p50": 206.4639925956726, + "p90": 211.71200275421143, + "p95": 213.24799954891205, + "p99": 225.8879989385605 }, "roundtrip": { - "p50": 141.184002161026, - "p90": 181.7920058965683, - "p95": 184.9599927663803, - "p99": 191.93600118160248 + "p50": 523.4879851341248, + "p90": 529.8240184783936, + "p95": 533.3120226860046, + "p99": 555.6480288505554 }, "isolatedSum": { - "p50": 123.00800159573555, - "p90": 159.16799753904343, - "p95": 163.64800184965134, - "p99": 170.33600062131882 + "p50": 509.40799713134766, + "p90": 518.9760029315948, + "p95": 523.0719894170761, + "p99": 551.9679933786392 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 60.32000109553337, - "p90": 82.0159986615181, - "p95": 84.63999629020691, - "p99": 91.0400003194809 + "p50": 526.5600085258484, + "p90": 532.480001449585, + "p95": 534.8799824714661, + "p99": 544.8639988899231 }, "combine": { - "p50": 70.97599655389786, - "p90": 82.14399963617325, - "p95": 83.20000022649765, - "p99": 88.60799670219421 + "p50": 429.8880100250244, + "p90": 435.232013463974, + "p95": 437.855988740921, + "p99": 454.0480077266693 }, "roundtrip": { - "p50": 147.0080018043518, - "p90": 185.7919991016388, - "p95": 188.06399405002594, - "p99": 192.25600361824036 + "p50": 936.2559914588928, + "p90": 944.0320134162903, + "p95": 946.6879963874817, + "p99": 960.096001625061 }, "isolatedSum": { - "p50": 131.29599764943123, - "p90": 164.15999829769135, - "p95": 167.83999651670456, - "p99": 179.6479970216751 + "p50": 956.4480185508728, + "p90": 967.712014913559, + "p95": 972.7359712123871, + "p99": 998.9120066165924 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 69.60000097751617, - "p90": 85.69599688053131, - "p95": 87.99999952316284, - "p99": 100.8640006184578 + "p50": 967.9359793663025, + "p90": 977.728009223938, + "p95": 980.7999730110168, + "p99": 989.5679950714111 }, "combine": { - "p50": 80.6720033288002, - "p90": 92.70399808883667, - "p95": 93.66399794816971, - "p99": 97.4079966545105 + "p50": 777.8559923171997, + "p90": 783.9679718017578, + "p95": 787.1999740600586, + "p99": 800.000011920929 }, "roundtrip": { - "p50": 160.70400178432465, - "p90": 200.83199441432953, - "p95": 203.19999754428864, - "p99": 211.5200012922287 + "p50": 1729.024052619934, + "p90": 1740.5760288238525, + "p95": 1744.0320253372192, + "p99": 1758.9759826660156 }, "isolatedSum": { - "p50": 150.27200430631638, - "p90": 178.39999496936798, - "p95": 181.66399747133255, - "p99": 198.2719972729683 + "p50": 1745.7919716835022, + "p90": 1761.6959810256958, + "p95": 1767.9999470710754, + "p99": 1789.56800699234 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 77.05599814653397, - "p90": 91.96799993515015, - "p95": 94.43199634552002, - "p99": 99.32799637317657 + "p50": 1878.9119720458984, + "p90": 1891.3919925689697, + "p95": 1897.055983543396, + "p99": 1933.2799911499023 }, "combine": { - "p50": 97.53599762916565, - "p90": 109.37599837779999, - "p95": 110.68800091743469, - "p99": 115.7120019197464 + "p50": 1474.8159646987915, + "p90": 1484.8320484161377, + "p95": 1491.3280010223389, + "p99": 1509.2159509658813 }, "roundtrip": { - "p50": 203.80799472332, - "p90": 219.9999988079071, - "p95": 222.59199619293213, - "p99": 236.4799976348877 + "p50": 3333.631992340088, + "p90": 3347.424030303955, + "p95": 3355.1039695739746, + "p99": 3383.3279609680176 }, "isolatedSum": { - "p50": 174.59199577569962, - "p90": 201.34399831295013, - "p95": 205.1199972629547, - "p99": 215.03999829292297 + "p50": 3353.72793674469, + "p90": 3376.2240409851074, + "p95": 3388.383984565735, + "p99": 3442.4959421157837 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -27734,30 +28037,31 @@ ] }, { - "id": "cx-f1a3625a", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_7720baf2", - "comparisonKey": "800e526f613bc59d", + "id": "cx-a5fb5961", + "identity": "b300|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "6214ef692f2daf2b", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:09.827299+00:00", + "generatedAt": "2026-06-27T11:14:24.890661+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_03", + "sku": "b300", "backend": "deepep", - "phase": "decode", - "mode": "ll", + "phase": "prefill", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 LL", + "label": "B300 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -27771,12 +28075,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -27786,8 +28090,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -27795,304 +28099,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271594334", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", - "createdAt": "2026-06-26T23:47:39Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287498289", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287498289", + "createdAt": "2026-06-27T11:14:24.890661+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 30.81599995493889, - "p90": 33.824000507593155, - "p95": 36.67199984192848, - "p99": 41.760001331567764 + "p50": 81.18399977684021, + "p90": 83.83999764919281, + "p95": 84.95999872684479, + "p99": 91.90399944782257 }, "combine": { - "p50": 33.535998314619064, - "p90": 36.06399893760681, - "p95": 38.656000047922134, - "p99": 94.62399780750275 + "p50": 102.27199643850327, + "p90": 105.40799796581268, + "p95": 106.36799782514572, + "p99": 112.99200356006622 }, "roundtrip": { - "p50": 2063.647985458374, - "p90": 2066.3039684295654, - "p95": 2067.5199031829834, - "p99": 2072.1280574798584 + "p50": 204.96000349521637, + "p90": 208.41600000858307, + "p95": 210.30400693416595, + "p99": 227.743998169899 }, "isolatedSum": { - "p50": 64.35199826955795, - "p90": 69.88799944519997, - "p95": 75.32799988985062, - "p99": 136.3839991390705 + "p50": 183.45599621534348, + "p90": 189.2479956150055, + "p95": 191.3279965519905, + "p99": 204.8960030078888 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 3, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 30.688000842928886, - "p90": 33.440001308918, - "p95": 35.32800078392029, - "p99": 41.85599833726883 + "p50": 113.40799927711487, + "p90": 116.70400202274323, + "p95": 118.6240017414093, + "p99": 128.80000472068787 }, "combine": { - "p50": 35.10399907827377, - "p90": 39.135999977588654, - "p95": 60.99199876189232, - "p99": 184.2239946126938 + "p50": 140.9599930047989, + "p90": 143.93599331378937, + "p95": 145.31199634075165, + "p99": 153.4080058336258 }, "roundtrip": { - "p50": 2065.023899078369, - "p90": 2067.647933959961, - "p95": 2069.279909133911, - "p99": 2082.5600624084473 + "p50": 306.0159981250763, + "p90": 310.8159899711609, + "p95": 313.2160007953644, + "p99": 340.5759930610657 }, "isolatedSum": { - "p50": 65.79199992120266, - "p90": 72.57600128650665, - "p95": 96.3199995458126, - "p99": 226.07999294996262 + "p50": 254.36799228191376, + "p90": 260.6399953365326, + "p95": 263.93599808216095, + "p99": 282.20801055431366 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 30.527999624609947, - "p90": 32.70399942994118, - "p95": 34.33600068092346, - "p99": 38.72000053524971 - }, - "combine": { - "p50": 34.71999987959862, - "p90": 36.896001547575, - "p95": 37.82400116324425, - "p99": 40.672000497579575 - }, - "roundtrip": { - "p50": 2065.7920837402344, - "p90": 2069.4079399108887, - "p95": 2074.079990386963, - "p99": 2120.703935623169 - }, - "isolatedSum": { - "p50": 65.24799950420856, - "p90": 69.60000097751617, - "p95": 72.16000184416771, - "p99": 79.39200103282928 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 31.007999554276466, - "p90": 33.24799984693527, - "p95": 35.45600175857544, - "p99": 42.11200028657913 - }, - "combine": { - "p50": 35.74400022625923, - "p90": 38.62399980425835, - "p95": 39.903998374938965, - "p99": 44.12800073623657 - }, - "roundtrip": { - "p50": 2066.240072250366, - "p90": 2069.6959495544434, - "p95": 2070.784091949463, - "p99": 2073.9200115203857 - }, - "isolatedSum": { - "p50": 66.7519997805357, - "p90": 71.87199965119362, - "p95": 75.3600001335144, - "p99": 86.2400010228157 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 3, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 32.32000023126602, - "p90": 39.103999733924866, - "p95": 52.799999713897705, - "p99": 55.36000058054924 + "p50": 173.43999445438385, + "p90": 178.30400168895721, + "p95": 179.967999458313, + "p99": 184.54399704933167 }, "combine": { - "p50": 38.656000047922134, - "p90": 41.79200157523155, - "p95": 42.97599941492081, - "p99": 47.520000487565994 + "p50": 239.04000222682953, + "p90": 245.27999758720398, + "p95": 247.23200500011444, + "p99": 258.59200954437256 }, "roundtrip": { - "p50": 2071.9680786132812, - "p90": 2074.592113494873, - "p95": 2075.615882873535, - "p99": 2079.7760486602783 + "p50": 515.4240131378174, + "p90": 521.5039849281311, + "p95": 525.0880122184753, + "p99": 547.4560260772705 }, "isolatedSum": { - "p50": 70.97600027918816, - "p90": 80.89600130915642, - "p95": 95.77599912881851, - "p99": 102.88000106811523 + "p50": 412.4799966812134, + "p90": 423.5839992761612, + "p95": 427.20000445842743, + "p99": 443.1360065937042 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 3, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 36.3520011305809, - "p90": 38.11199963092804, - "p95": 40.22400081157684, - "p99": 45.951999723911285 + "p50": 290.52799940109253, + "p90": 296.35199904441833, + "p95": 299.1040050983429, + "p99": 307.93601274490356 }, "combine": { - "p50": 47.968000173568726, - "p90": 50.87999999523163, - "p95": 51.83999985456467, - "p99": 58.04799869656563 + "p50": 438.6560022830963, + "p90": 443.4239864349365, + "p95": 447.00801372528076, + "p99": 467.3919975757599 }, "roundtrip": { - "p50": 2082.7200412750244, - "p90": 2085.2479934692383, - "p95": 2086.2081050872803, - "p99": 2089.1199111938477 + "p50": 922.2720265388489, + "p90": 931.007981300354, + "p95": 934.719979763031, + "p99": 978.7840247154236 }, "isolatedSum": { - "p50": 84.32000130414963, - "p90": 88.99199962615967, - "p95": 92.06400066614151, - "p99": 103.99999842047691 + "p50": 729.1840016841888, + "p90": 739.7759854793549, + "p95": 746.1120188236237, + "p99": 775.3280103206635 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 3, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 41.600000113248825, - "p90": 51.00800096988678, - "p95": 52.12799832224846, - "p99": 55.1999993622303 + "p50": 532.3519706726074, + "p90": 539.2640233039856, + "p95": 543.5519814491272, + "p99": 568.0000185966492 }, "combine": { - "p50": 60.67200005054474, - "p90": 68.67200136184692, - "p95": 71.68000191450119, - "p99": 97.08800166845322 + "p50": 796.064019203186, + "p90": 802.4960160255432, + "p95": 809.5679879188538, + "p99": 841.3119912147522 }, "roundtrip": { - "p50": 2101.8240451812744, - "p90": 2108.736038208008, - "p95": 2111.936092376709, - "p99": 2120.1279163360596 + "p50": 1721.9840288162231, + "p90": 1732.4479818344116, + "p95": 1739.743947982788, + "p99": 1767.4560546875 }, "isolatedSum": { - "p50": 102.27200016379356, - "p90": 119.6800023317337, - "p95": 123.80800023674965, - "p99": 152.28800103068352 + "p50": 1328.4159898757935, + "p90": 1341.7600393295288, + "p95": 1353.119969367981, + "p99": 1409.3120098114014 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 1, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 54.016001522541046, - "p90": 56.223999708890915, - "p95": 57.312000542879105, - "p99": 60.575999319553375 + "p50": 1001.7600059509277, + "p90": 1012.6080513000488, + "p95": 1018.2720422744751, + "p99": 1040.3200387954712 }, "combine": { - "p50": 88.54400366544724, - "p90": 91.93599969148636, - "p95": 92.70399808883667, - "p99": 114.81600254774094 + "p50": 1498.3359575271606, + "p90": 1507.5520277023315, + "p95": 1513.983964920044, + "p99": 1537.0559692382812 }, "roundtrip": { - "p50": 2143.0718898773193, - "p90": 2146.7199325561523, - "p95": 2147.455930709839, - "p99": 2153.791904449463 + "p50": 3295.1040267944336, + "p90": 3310.1439476013184, + "p95": 3322.4000930786133, + "p99": 3358.4959506988525 }, "isolatedSum": { - "p50": 142.56000518798828, - "p90": 148.15999940037727, - "p95": 150.01599863171577, - "p99": 175.3920018672943 + "p50": 2500.0959634780884, + "p90": 2520.1600790023804, + "p95": 2532.256007194519, + "p99": 2577.3760080337524 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 1, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28100,30 +28330,31 @@ ] }, { - "id": "cx-73d1725a", - "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_3a77ee8e", - "comparisonKey": "93509525aa3f27c6", + "id": "cx-fba134bd", + "identity": "b300|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_c4c63f07", + "comparisonKey": "690e54d4fc20f43e", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:16.484836+00:00", + "generatedAt": "2026-06-27T09:52:55.540924+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_11", + "sku": "b300", "backend": "deepep", - "phase": "decode", - "mode": "ll", + "phase": "prefill", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 LL", + "label": "B300 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -28137,12 +28368,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -28152,8 +28383,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -28161,304 +28392,523 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271598000", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", - "createdAt": "2026-06-26T23:47:46Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285718802", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285718802", + "createdAt": "2026-06-27T09:52:55.540924+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 31.10400028526783, - "p90": 33.376000821590424, - "p95": 34.88000109791756, - "p99": 39.264000952243805 + "p50": 156.67200088500977, + "p90": 160.863995552063, + "p95": 164.48000073432922, + "p99": 179.03999984264374 }, "combine": { - "p50": 32.575998455286026, - "p90": 35.32800078392029, - "p95": 36.928001791238785, - "p99": 40.41599854826927 + "p50": 101.6639992594719, + "p90": 103.67999970912933, + "p95": 104.3199971318245, + "p99": 107.26399719715118 }, "roundtrip": { - "p50": 2062.4639987945557, - "p90": 2065.1841163635254, - "p95": 2067.9678916931152, - "p99": 2091.871976852417 + "p50": 251.3599991798401, + "p90": 255.23200631141663, + "p95": 258.87998938560486, + "p99": 285.7919931411743 }, "isolatedSum": { - "p50": 63.679998740553856, - "p90": 68.70400160551071, - "p95": 71.80800288915634, - "p99": 79.67999950051308 + "p50": 258.33600014448166, + "p90": 264.5439952611923, + "p95": 268.7999978661537, + "p99": 286.3039970397949 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 5, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 30.719999223947525, - "p90": 32.99200162291527, - "p95": 35.551998764276505, - "p99": 40.64000025391579 + "p50": 210.40000021457672, + "p90": 215.07200598716736, + "p95": 218.9439982175827, + "p99": 230.880007147789 }, "combine": { - "p50": 32.735999673604965, - "p90": 35.00799834728241, - "p95": 36.3520011305809, - "p99": 43.807998299598694 + "p50": 140.86399972438812, + "p90": 144.6080058813095, + "p95": 145.7280069589615, + "p99": 171.29600048065186 }, "roundtrip": { - "p50": 2063.136100769043, - "p90": 2065.376043319702, - "p95": 2067.296028137207, - "p99": 2071.039915084839 + "p50": 349.95201230049133, + "p90": 354.2720079421997, + "p95": 357.91999101638794, + "p99": 378.62399220466614 }, "isolatedSum": { - "p50": 63.45599889755249, - "p90": 67.99999997019768, - "p95": 71.9039998948574, - "p99": 84.44799855351448 + "p50": 351.26399993896484, + "p90": 359.68001186847687, + "p95": 364.6720051765442, + "p99": 402.17600762844086 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 5, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 32.35200047492981, - "p90": 46.65600135922432, - "p95": 47.42399975657463, - "p99": 53.279999643564224 + "p50": 335.58401465415955, + "p90": 340.12800455093384, + "p95": 341.15201234817505, + "p99": 352.28800773620605 }, "combine": { - "p50": 33.824000507593155, - "p90": 36.768000572919846, - "p95": 39.07199949026108, - "p99": 50.783999264240265 + "p50": 239.1359955072403, + "p90": 245.02399563789368, + "p95": 247.13599681854248, + "p99": 252.70399451255798 }, "roundtrip": { - "p50": 2064.095973968506, - "p90": 2066.9119358062744, - "p95": 2069.567918777466, - "p99": 2080.512046813965 + "p50": 575.872004032135, + "p90": 582.8160047531128, + "p95": 585.4079723358154, + "p99": 596.6399908065796 }, "isolatedSum": { - "p50": 66.17600098252296, - "p90": 83.42400193214417, - "p95": 86.49599924683571, - "p99": 104.06399890780449 + "p50": 574.7200101613998, + "p90": 585.1520001888275, + "p95": 588.2880091667175, + "p99": 604.992002248764 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 5, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 31.90400078892708, - "p90": 34.04799848794937, - "p95": 35.74400022625923, - "p99": 39.77600112557411 + "p50": 584.1599702835083, + "p90": 589.8879766464233, + "p95": 592.2240018844604, + "p99": 601.472020149231 }, "combine": { - "p50": 34.17599946260452, - "p90": 36.22400015592575, - "p95": 37.53599897027016, - "p99": 42.208001017570496 + "p50": 437.5999867916107, + "p90": 441.8880045413971, + "p95": 445.43999433517456, + "p99": 456.7039906978607 }, "roundtrip": { - "p50": 2065.279960632324, - "p90": 2068.416118621826, - "p95": 2070.6560611724854, - "p99": 2080.8000564575195 + "p50": 1006.943941116333, + "p90": 1015.2640342712402, + "p95": 1019.10400390625, + "p99": 1030.9120416641235 }, "isolatedSum": { - "p50": 66.0800002515316, - "p90": 70.27199864387512, - "p95": 73.27999919652939, - "p99": 81.98400214314461 + "p50": 1021.759957075119, + "p90": 1031.7759811878204, + "p95": 1037.663996219635, + "p99": 1058.1760108470917 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 5, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 32.86400064826012, - "p90": 34.432001411914825, - "p95": 36.25600039958954, - "p99": 40.73600098490715 + "p50": 1078.8160562515259, + "p90": 1086.2720012664795, + "p95": 1088.8639688491821, + "p99": 1102.6240587234497 }, "combine": { - "p50": 37.88800165057182, - "p90": 44.67200115323067, - "p95": 46.30399867892265, - "p99": 69.24799829721451 + "p50": 797.0240116119385, + "p90": 804.7360181808472, + "p95": 809.9200129508972, + "p99": 828.2560110092163 }, "roundtrip": { - "p50": 2071.1679458618164, - "p90": 2079.5199871063232, - "p95": 2080.4800987243652, - "p99": 2085.439920425415 + "p50": 1859.071969985962, + "p90": 1870.6560134887695, + "p95": 1876.1919736862183, + "p99": 1887.3920440673828 }, "isolatedSum": { - "p50": 70.75200229883194, - "p90": 79.10400256514549, - "p95": 82.55999907851219, - "p99": 109.98399928212166 + "p50": 1875.8400678634644, + "p90": 1891.0080194473267, + "p95": 1898.7839818000793, + "p99": 1930.880069732666 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 5, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 35.00799834728241, - "p90": 36.928001791238785, - "p95": 39.07199949026108, - "p99": 41.98399931192398 + "p50": 2078.239917755127, + "p90": 2087.264060974121, + "p95": 2091.3920402526855, + "p99": 2107.840061187744 }, "combine": { - "p50": 43.68000105023384, - "p90": 45.72800174355507, - "p95": 46.879999339580536, - "p99": 52.480001002550125 + "p50": 1500.2559423446655, + "p90": 1509.8240375518799, + "p95": 1514.6880149841309, + "p99": 1528.4160375595093 }, "roundtrip": { - "p50": 2079.263925552368, - "p90": 2081.279993057251, - "p95": 2082.5281143188477, - "p99": 2086.1759185791016 + "p50": 3560.703992843628, + "p90": 3572.9920864105225, + "p95": 3578.847885131836, + "p99": 3600.7680892944336 }, "isolatedSum": { - "p50": 78.68799939751625, - "p90": 82.65600353479385, - "p95": 85.95199882984161, - "p99": 94.4640003144741 + "p50": 3578.4958600997925, + "p90": 3597.088098526001, + "p95": 3606.0800552368164, + "p99": 3636.2560987472534 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 5, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-67e5feea", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "ff71982761f18df0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:31.663724+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286436120", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120", + "createdAt": "2026-06-27T10:26:31.663724+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 42.11200028657913, - "p90": 65.15199691057205, - "p95": 74.36800003051758, - "p99": 88.99199962615967 + "p50": 1799.5840311050415, + "p90": 2587.9039764404297, + "p95": 2896.159887313843, + "p99": 3459.968090057373 }, "combine": { - "p50": 58.9120015501976, - "p90": 63.87200206518173, - "p95": 64.80000168085098, - "p99": 71.45600020885468 + "p50": 1817.7920579910278, + "p90": 2162.816047668457, + "p95": 2672.192096710205, + "p99": 2924.3199825286865 }, "roundtrip": { - "p50": 2100.9280681610107, - "p90": 2110.1760864257812, - "p95": 2111.2639904022217, - "p99": 2114.367961883545 + "p50": 1977.4080514907837, + "p90": 2173.4719276428223, + "p95": 2860.5120182037354, + "p99": 3130.8159828186035 }, "isolatedSum": { - "p50": 101.02400183677673, - "p90": 129.02399897575378, - "p95": 139.16800171136856, - "p99": 160.44799983501434 + "p50": 3617.3760890960693, + "p90": 4750.720024108887, + "p95": 5568.351984024048, + "p99": 6384.28807258606 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 1846.0479974746704, + "p90": 2604.5761108398438, + "p95": 2895.456075668335, + "p99": 3439.487934112549 + }, + "combine": { + "p50": 1870.6239461898804, + "p90": 2174.5920181274414, + "p95": 2705.2159309387207, + "p99": 3008.8319778442383 + }, + "roundtrip": { + "p50": 2121.920108795166, + "p90": 2273.087978363037, + "p95": 2978.7840843200684, + "p99": 3390.048027038574 + }, + "isolatedSum": { + "p50": 3716.671943664551, + "p90": 4779.168128967285, + "p95": 5600.672006607056, + "p99": 6448.319911956787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1958.3040475845337, + "p90": 2819.5838928222656, + "p95": 3096.895933151245, + "p99": 5452.991962432861 + }, + "combine": { + "p50": 1994.7839975357056, + "p90": 2250.5600452423096, + "p95": 2893.791913986206, + "p99": 3337.984085083008 + }, + "roundtrip": { + "p50": 2347.584009170532, + "p90": 2880.44810295105, + "p95": 3284.991979598999, + "p99": 3777.6639461517334 + }, + "isolatedSum": { + "p50": 3953.0880451202393, + "p90": 5070.143938064575, + "p95": 5990.687847137451, + "p99": 8790.97604751587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 53.18399891257286, - "p90": 54.78399991989136, - "p95": 56.60799890756607, - "p99": 61.535999178886414 + "p50": 2067.199945449829, + "p90": 2887.7758979797363, + "p95": 3118.6559200286865, + "p99": 3810.5599880218506 }, "combine": { - "p50": 85.75999736785889, - "p90": 88.03199976682663, - "p95": 89.12000060081482, - "p99": 95.29600292444229 + "p50": 2245.4400062561035, + "p90": 2792.095899581909, + "p95": 3188.5440349578857, + "p99": 3587.552070617676 }, "roundtrip": { - "p50": 2140.671968460083, - "p90": 2143.5201168060303, - "p95": 2145.632028579712, - "p99": 2288.991928100586 + "p50": 2770.080089569092, + "p90": 2971.872091293335, + "p95": 3523.7441062927246, + "p99": 3988.640069961548 }, "isolatedSum": { - "p50": 138.94399628043175, - "p90": 142.815999686718, - "p95": 145.7279995083809, - "p99": 156.8320021033287 + "p50": 4312.639951705933, + "p90": 5679.8717975616455, + "p95": 6307.199954986572, + "p99": 7398.112058639526 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 2, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2318.943977355957, + "p90": 2845.599889755249, + "p95": 3288.3200645446777, + "p99": 3567.9359436035156 + }, + "combine": { + "p50": 2601.759910583496, + "p90": 2804.192066192627, + "p95": 3261.3439559936523, + "p99": 3862.2400760650635 + }, + "roundtrip": { + "p50": 3612.5121116638184, + "p90": 4097.760200500488, + "p95": 4626.783847808838, + "p99": 6537.69588470459 + }, + "isolatedSum": { + "p50": 4920.703887939453, + "p90": 5649.791955947876, + "p95": 6549.66402053833, + "p99": 7430.176019668579 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2815.4239654541016, + "p90": 3583.904027938843, + "p95": 3803.584098815918, + "p99": 4226.624011993408 + }, + "combine": { + "p50": 3305.2799701690674, + "p90": 3407.8400135040283, + "p95": 3562.688112258911, + "p99": 4382.976055145264 + }, + "roundtrip": { + "p50": 5279.6478271484375, + "p90": 5909.920215606689, + "p95": 6326.015949249268, + "p99": 6807.90376663208 + }, + "isolatedSum": { + "p50": 6120.703935623169, + "p90": 6991.744041442871, + "p95": 7366.272211074829, + "p99": 8609.600067138672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28466,28 +28916,29 @@ ] }, { - "id": "cx-1d30dd2c", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_ac25b0a1", - "comparisonKey": "405d06288635d74f", + "id": "cx-45b4616a", + "identity": "b300|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_c4c63f07", + "comparisonKey": "56fe7b02fd8e6b1a", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:32:59.549027+00:00", + "generatedAt": "2026-06-27T09:51:04.276703+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 LL (norm)", + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -28502,13 +28953,13 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -28518,8 +28969,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -28527,304 +28978,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254359089", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", - "createdAt": "2026-06-26T17:27:42Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28285674665", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285674665", + "createdAt": "2026-06-27T09:51:04.276703+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 30.432000756263733, - "p90": 32.32000023126602, - "p95": 34.143999218940735, - "p99": 38.015998899936676 - }, - "combine": { - "p50": 32.287999987602234, - "p90": 34.78400036692619, - "p95": 35.87200120091438, - "p99": 40.383998304605484 - }, - "roundtrip": { - "p50": 2063.9359951019287, - "p90": 2065.632104873657, - "p95": 2066.9760704040527, - "p99": 2069.6001052856445 - }, - "isolatedSum": { - "p50": 62.72000074386597, - "p90": 67.10400059819221, - "p95": 70.01600041985512, - "p99": 78.39999720454216 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 30.368000268936157, - "p90": 32.09599852561951, - "p95": 34.01599824428558, - "p99": 37.248000502586365 - }, - "combine": { - "p50": 32.22399950027466, - "p90": 34.46400165557861, - "p95": 35.711999982595444, - "p99": 45.88799923658371 - }, - "roundtrip": { - "p50": 2064.768075942993, - "p90": 2067.13604927063, - "p95": 2069.024085998535, - "p99": 2083.7440490722656 - }, - "isolatedSum": { - "p50": 62.591999769210815, - "p90": 66.56000018119812, - "p95": 69.72799822688103, - "p99": 83.13599973917007 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 30.527999624609947, - "p90": 32.54399821162224, - "p95": 35.26400029659271, - "p99": 40.063999593257904 + "p50": 158.01599621772766, + "p90": 162.6559942960739, + "p95": 164.48000073432922, + "p99": 177.88800597190857 }, "combine": { - "p50": 34.2399999499321, - "p90": 37.53599897027016, - "p95": 38.24000060558319, - "p99": 40.031999349594116 + "p50": 108.35199803113937, + "p90": 110.43199896812439, + "p95": 111.455999314785, + "p99": 118.56000125408173 }, "roundtrip": { - "p50": 2065.376043319702, - "p90": 2067.3279762268066, - "p95": 2068.3200359344482, - "p99": 2070.5599784851074 + "p50": 259.39199328422546, + "p90": 266.36800169944763, + "p95": 268.22400093078613, + "p99": 283.55199098587036 }, "isolatedSum": { - "p50": 64.76799957454205, - "p90": 70.0799971818924, - "p95": 73.5040009021759, - "p99": 80.09599894285202 + "p50": 266.36799424886703, + "p90": 273.0879932641983, + "p95": 275.9360000491142, + "p99": 296.4480072259903 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 31.231999397277832, - "p90": 33.055998384952545, - "p95": 35.61599925160408, - "p99": 38.94399851560593 + "p50": 219.32800114154816, + "p90": 222.52799570560455, + "p95": 223.55200350284576, + "p99": 230.335995554924 }, "combine": { - "p50": 33.76000002026558, - "p90": 35.999998450279236, - "p95": 37.76000067591667, - "p99": 53.888000547885895 + "p50": 152.28800475597382, + "p90": 155.74400126934052, + "p95": 157.4079990386963, + "p99": 167.64800250530243 }, "roundtrip": { - "p50": 2066.528081893921, - "p90": 2068.511962890625, - "p95": 2069.6959495544434, - "p99": 2078.07993888855 + "p50": 371.36000394821167, + "p90": 375.5199909210205, + "p95": 376.99198722839355, + "p99": 389.0239894390106 }, "isolatedSum": { - "p50": 64.99199941754341, - "p90": 69.05599683523178, - "p95": 73.37599992752075, - "p99": 92.83199906349182 + "p50": 371.616005897522, + "p90": 378.27199697494507, + "p95": 380.96000254154205, + "p99": 397.98399806022644 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 32.51200169324875, - "p90": 34.20799970626831, - "p95": 36.86400130391121, - "p99": 40.09599983692169 + "p50": 360.9600067138672, + "p90": 365.1840090751648, + "p95": 367.23199486732483, + "p99": 394.9120044708252 }, "combine": { - "p50": 37.21600025892258, - "p90": 39.45599868893623, - "p95": 40.41599854826927, - "p99": 42.399998754262924 + "p50": 264.5759880542755, + "p90": 268.7680125236511, + "p95": 271.232008934021, + "p99": 281.76000714302063 }, "roundtrip": { - "p50": 2071.392059326172, - "p90": 2074.687957763672, - "p95": 2078.7200927734375, - "p99": 2156.5120220184326 + "p50": 614.7840023040771, + "p90": 620.9279894828796, + "p95": 623.6799955368042, + "p99": 633.4720253944397 }, "isolatedSum": { - "p50": 69.72800195217133, - "p90": 73.66399839520454, - "p95": 77.27999985218048, - "p99": 82.49599859118462 + "p50": 625.5359947681427, + "p90": 633.9520215988159, + "p95": 638.4640038013458, + "p99": 676.6720116138458 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 35.10399907827377, - "p90": 36.38400137424469, - "p95": 38.43199834227562, - "p99": 42.208001017570496 + "p50": 627.9360055923462, + "p90": 633.5999965667725, + "p95": 635.8720064163208, + "p99": 650.111973285675 }, "combine": { - "p50": 42.7200011909008, - "p90": 44.89599913358688, - "p95": 45.66400125622749, - "p99": 48.70399832725525 + "p50": 453.0239999294281, + "p90": 457.66401290893555, + "p95": 460.31999588012695, + "p99": 473.56799244880676 }, "roundtrip": { - "p50": 2080.22403717041, - "p90": 2081.9520950317383, - "p95": 2083.359956741333, - "p99": 2118.4639930725098 + "p50": 1066.5600299835205, + "p90": 1073.6639499664307, + "p95": 1077.5359869003296, + "p99": 1090.1119709014893 }, "isolatedSum": { - "p50": 77.82400026917458, - "p90": 81.28000050783157, - "p95": 84.09599959850311, - "p99": 90.91199934482574 + "p50": 1080.9600055217743, + "p90": 1091.264009475708, + "p95": 1096.1920022964478, + "p99": 1123.6799657344818 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 7, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 42.047999799251556, - "p90": 47.90399968624115, - "p95": 48.8319993019104, - "p99": 53.119998425245285 + "p50": 1180.3840398788452, + "p90": 1187.1999502182007, + "p95": 1190.4640197753906, + "p99": 1241.3439750671387 }, "combine": { - "p50": 57.40800127387047, - "p90": 62.68800050020218, - "p95": 64.51199948787689, - "p99": 67.03999638557434 + "p50": 815.3600096702576, + "p90": 822.5280046463013, + "p95": 825.8879780769348, + "p99": 834.2080116271973 }, "roundtrip": { - "p50": 2100.5120277404785, - "p90": 2108.383893966675, - "p95": 2109.503984451294, - "p99": 2111.9039058685303 + "p50": 1978.0479669570923, + "p90": 1988.8639450073242, + "p95": 1993.8240051269531, + "p99": 2242.1441078186035 }, "isolatedSum": { - "p50": 99.45600107312202, - "p90": 110.59200018644333, - "p95": 113.34399878978729, - "p99": 120.15999481081963 + "p50": 1995.7440495491028, + "p90": 2009.727954864502, + "p95": 2016.3519978523254, + "p99": 2075.551986694336 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 53.599998354911804, - "p90": 55.39200082421303, - "p95": 56.41600117087364, - "p99": 61.08799949288368 + "p50": 2262.399911880493, + "p90": 2271.775960922241, + "p95": 2276.8959999084473, + "p99": 2323.2638835906982 }, "combine": { - "p50": 83.5840031504631, - "p90": 86.11200004816055, - "p95": 87.2960016131401, - "p99": 91.51999652385712 + "p50": 1527.232050895691, + "p90": 1535.6800556182861, + "p95": 1539.29603099823, + "p99": 1596.2879657745361 }, "roundtrip": { - "p50": 2139.967918395996, - "p90": 2142.303943634033, - "p95": 2142.911911010742, - "p99": 2144.831895828247 + "p50": 3780.895948410034, + "p90": 3792.6719188690186, + "p95": 3798.464059829712, + "p99": 3837.4719619750977 }, "isolatedSum": { - "p50": 137.1840015053749, - "p90": 141.50400087237358, - "p95": 143.71200278401375, - "p99": 152.6079960167408 + "p50": 3789.631962776184, + "p90": 3807.4560165405273, + "p95": 3816.1920309066772, + "p99": 3919.5518493652344 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 7, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28832,46 +29209,47 @@ ] }, { - "id": "cx-8265fe0e", - "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h100_ff7906f8", - "comparisonKey": "d0edce95a580d060", + "id": "cx-d208a3bd", + "identity": "b300|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "5ca15c20f75abaa9", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:06.777183+00:00", + "generatedAt": "2026-06-27T11:14:09.340656+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", + "runner": "b300-nv_07", + "sku": "b300", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -28884,8 +29262,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -28893,44 +29271,44 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271688175", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271688175", - "createdAt": "2026-06-26T23:50:32Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287503879", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503879", + "createdAt": "2026-06-27T11:14:09.340656+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 92.32000261545181, - "p90": 96.41599655151367, - "p95": 98.39999675750732, - "p99": 104.22399640083313 + "p50": 86.27200126647949, + "p90": 89.12000060081482, + "p95": 90.30400216579437, + "p99": 98.30400347709656 }, "combine": { - "p50": 86.97599917650223, - "p90": 88.41600269079208, - "p95": 89.50400352478027, - "p99": 93.31200271844864 + "p50": 108.86400192975998, + "p90": 110.97600311040878, + "p95": 112.2559979557991, + "p99": 117.76000261306763 }, "roundtrip": { - "p50": 156.73600137233734, - "p90": 160.70400178432465, - "p95": 161.6639941930771, - "p99": 166.04800522327423 + "p50": 221.18400037288666, + "p90": 224.99200701713562, + "p95": 226.68799757957458, + "p99": 240.12799561023712 }, "isolatedSum": { - "p50": 179.29600179195404, - "p90": 184.83199924230576, - "p95": 187.9040002822876, - "p99": 197.53599911928177 + "p50": 195.13600319623947, + "p90": 200.0960037112236, + "p95": 202.56000012159348, + "p99": 216.06400609016418 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -28940,35 +29318,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 111.10399663448334, - "p90": 133.98399949073792, - "p95": 135.96799969673157, - "p99": 139.96799290180206 + "p50": 118.04799735546112, + "p90": 121.08799815177917, + "p95": 122.3360002040863, + "p99": 129.85600531101227 }, "combine": { - "p50": 112.99200356006622, - "p90": 121.47200107574463, - "p95": 122.01599776744843, - "p99": 128.35200130939484 + "p50": 157.05600380897522, + "p90": 161.15200519561768, + "p95": 161.95200383663177, + "p99": 169.8240041732788 }, "roundtrip": { - "p50": 202.72000133991241, - "p90": 217.6000028848648, - "p95": 219.39200162887573, - "p99": 223.7440049648285 + "p50": 329.3440043926239, + "p90": 333.5680067539215, + "p95": 335.32801270484924, + "p99": 343.58400106430054 }, "isolatedSum": { - "p50": 224.09600019454956, - "p90": 255.45600056648254, - "p95": 257.98399746418, - "p99": 268.3199942111969 + "p50": 275.10400116443634, + "p90": 282.24000334739685, + "p95": 284.2880040407181, + "p99": 299.6800094842911 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 7, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28977,34 +29355,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 153.08800339698792, - "p90": 166.9439971446991, - "p95": 168.67199540138245, - "p99": 175.55199563503265 + "p50": 176.92799866199493, + "p90": 180.67200481891632, + "p95": 182.46400356292725, + "p99": 189.60000574588776 }, "combine": { - "p50": 168.92799735069275, - "p90": 181.15200102329254, - "p95": 183.07200074195862, - "p99": 186.0480010509491 + "p50": 266.975998878479, + "p90": 271.87201380729675, + "p95": 273.6319899559021, + "p99": 285.3119969367981 }, "roundtrip": { - "p50": 291.29600524902344, - "p90": 307.45598673820496, - "p95": 309.6959888935089, - "p99": 313.9199912548065 + "p50": 550.2079725265503, + "p90": 556.6719770431519, + "p95": 559.328019618988, + "p99": 570.8479881286621 }, "isolatedSum": { - "p50": 322.01600074768066, - "p90": 348.09599816799164, - "p95": 351.74399614334106, - "p99": 361.59999668598175 + "p50": 443.90399754047394, + "p90": 452.5440186262131, + "p95": 456.09599351882935, + "p99": 474.91200268268585 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -29014,35 +29392,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 219.26400065422058, - "p90": 230.71999847888947, - "p95": 234.9119931459427, - "p99": 238.62400650978088 + "p50": 299.26401376724243, + "p90": 304.57600951194763, + "p95": 306.40000104904175, + "p99": 312.9279911518097 }, "combine": { - "p50": 274.04800057411194, - "p90": 280.5440127849579, - "p95": 281.69599175453186, - "p99": 284.1919958591461 + "p50": 455.9360146522522, + "p90": 462.0479941368103, + "p95": 467.6479995250702, + "p99": 488.5759949684143 }, "roundtrip": { - "p50": 467.4240052700043, - "p90": 473.2159972190857, - "p95": 475.8079946041107, - "p99": 479.2639911174774 + "p50": 977.5360226631165, + "p90": 984.0959906578064, + "p95": 988.3840084075928, + "p99": 1000.1920461654663 }, "isolatedSum": { - "p50": 493.3120012283325, - "p90": 511.26401126384735, - "p95": 516.6079849004745, - "p99": 522.816002368927 + "p50": 755.2000284194946, + "p90": 766.6240036487579, + "p95": 774.0480005741119, + "p99": 801.503986120224 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 5, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29051,35 +29429,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 360.79999804496765, - "p90": 374.36801195144653, - "p95": 376.5760064125061, - "p99": 380.2880048751831 + "p50": 535.0720286369324, + "p90": 539.3919944763184, + "p95": 542.3679947853088, + "p99": 553.6320209503174 }, "combine": { - "p50": 465.88799357414246, - "p90": 475.77598690986633, - "p95": 478.4319996833801, - "p99": 481.53600096702576 + "p50": 812.1280074119568, + "p90": 818.4319734573364, + "p95": 821.120023727417, + "p99": 830.8799862861633 }, "roundtrip": { - "p50": 799.1999983787537, - "p90": 816.6720271110535, - "p95": 819.8080062866211, - "p99": 824.7680068016052 + "p50": 1807.520031929016, + "p90": 1816.864013671875, + "p95": 1821.1840391159058, + "p99": 1864.832043647766 }, "isolatedSum": { - "p50": 826.6879916191101, - "p90": 850.1439988613129, - "p95": 855.0080060958862, - "p99": 861.8240058422089 + "p50": 1347.2000360488892, + "p90": 1357.8239679336548, + "p95": 1363.4880185127258, + "p99": 1384.5120072364807 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 0, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29088,35 +29466,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 638.975977897644, - "p90": 648.1279730796814, - "p95": 652.7040004730225, - "p99": 661.1520051956177 + "p50": 1011.9999647140503, + "p90": 1018.9759731292725, + "p95": 1022.5919485092163, + "p99": 1036.6719961166382 }, "combine": { - "p50": 848.4799861907959, - "p90": 856.8000197410583, - "p95": 859.5520257949829, - "p99": 898.5919952392578 + "p50": 1512.671947479248, + "p90": 1519.5200443267822, + "p95": 1524.0000486373901, + "p99": 1541.6959524154663 }, "roundtrip": { - "p50": 1462.623953819275, - "p90": 1474.079966545105, - "p95": 1478.4959554672241, - "p99": 1489.3120527267456 + "p50": 3455.4879665374756, + "p90": 3466.2721157073975, + "p95": 3470.144033432007, + "p99": 3507.744073867798 }, "isolatedSum": { - "p50": 1487.45596408844, - "p90": 1504.9279928207397, - "p95": 1512.2560262680054, - "p99": 1559.7440004348755 + "p50": 2524.6719121932983, + "p90": 2538.4960174560547, + "p95": 2546.5919971466064, + "p99": 2578.3679485321045 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 7, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29124,16 +29502,16 @@ ] }, { - "id": "cx-2dcc1e5c", - "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h100_ff7906f8", - "comparisonKey": "69b861c40f88be42", + "id": "cx-252efc4d", + "identity": "b300|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_c4c63f07", + "comparisonKey": "d0265daf2fea0a3e", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:59.492832+00:00", + "generatedAt": "2026-06-27T09:51:32.842462+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", + "runner": "b300-nv_17", + "sku": "b300", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -29141,29 +29519,30 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -29176,8 +29555,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -29185,45 +29564,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271702702", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271702702", - "createdAt": "2026-06-26T23:50:59Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285685489", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285685489", + "createdAt": "2026-06-27T09:51:32.842462+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 99.45599734783173, - "p90": 105.05600273609161, - "p95": 106.04800283908844, - "p99": 110.23999750614166 + "p50": 164.86400365829468, + "p90": 169.855996966362, + "p95": 173.69599640369415, + "p99": 187.26399540901184 }, "combine": { - "p50": 95.58399766683578, - "p90": 97.47199714183807, - "p95": 98.39999675750732, - "p99": 102.9760017991066 + "p50": 108.2879975438118, + "p90": 110.68800091743469, + "p95": 112.15999722480774, + "p99": 124.64000284671783 }, "roundtrip": { - "p50": 170.33599317073822, - "p90": 175.10400712490082, - "p95": 177.85599827766418, - "p99": 179.58399653434753 + "p50": 267.1999931335449, + "p90": 272.0000147819519, + "p95": 274.7200131416321, + "p99": 301.472008228302 }, "isolatedSum": { - "p50": 195.0399950146675, - "p90": 202.5279998779297, - "p95": 204.44799959659576, - "p99": 213.21599930524826 + "p50": 273.1520012021065, + "p90": 280.5439978837967, + "p95": 285.8559936285019, + "p99": 311.9039982557297 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 0, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29232,35 +29611,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 119.29599940776825, - "p90": 124.22399967908859, - "p95": 126.30400061607361, - "p99": 130.5599957704544 + "p50": 221.95200622081757, + "p90": 225.0880002975464, + "p95": 227.84000635147095, + "p99": 246.11200392246246 }, "combine": { - "p50": 122.079998254776, - "p90": 127.80800461769104, - "p95": 128.67200374603271, - "p99": 132.9919993877411 + "p50": 153.3759981393814, + "p90": 157.0879966020584, + "p95": 158.33599865436554, + "p99": 163.5199934244156 }, "roundtrip": { - "p50": 219.32800114154816, - "p90": 223.1680005788803, - "p95": 224.5440036058426, - "p99": 228.7359982728958 + "p50": 374.87998604774475, + "p90": 379.61599230766296, + "p95": 385.72800159454346, + "p99": 410.2720022201538 }, "isolatedSum": { - "p50": 241.37599766254425, - "p90": 252.03200429677963, - "p95": 254.97600436210632, - "p99": 263.5519951581955 + "p50": 375.328004360199, + "p90": 382.1759968996048, + "p95": 386.1760050058365, + "p99": 409.63199734687805 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 111104000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 0, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29269,35 +29648,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 165.53600132465363, - "p90": 178.1120002269745, - "p95": 180.12799322605133, - "p99": 184.25600230693817 + "p50": 358.271986246109, + "p90": 362.43200302124023, + "p95": 364.8639917373657, + "p99": 389.44000005722046 }, "combine": { - "p50": 190.46400487422943, - "p90": 198.71999323368073, - "p95": 200.9280025959015, - "p99": 213.79199624061584 + "p50": 265.4399871826172, + "p90": 270.6559896469116, + "p95": 273.8560140132904, + "p99": 306.68801069259644 }, "roundtrip": { - "p50": 325.76000690460205, - "p90": 331.07200264930725, - "p95": 332.73598551750183, - "p99": 336.1920118331909 + "p50": 616.159975528717, + "p90": 622.8799819946289, + "p95": 628.063976764679, + "p99": 656.4800143241882 }, "isolatedSum": { - "p50": 356.00000619888306, - "p90": 376.8319934606552, - "p95": 381.0559958219528, - "p99": 398.047998547554 + "p50": 623.7119734287262, + "p90": 633.0879926681519, + "p95": 638.7200057506561, + "p99": 696.1280107498169 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 223098880, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 1, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29306,35 +29685,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 244.57600712776184, - "p90": 249.439999461174, - "p95": 253.56799364089966, - "p99": 409.56801176071167 + "p50": 629.1840076446533, + "p90": 634.1760158538818, + "p95": 637.2799873352051, + "p99": 658.3679914474487 }, "combine": { - "p50": 299.1040050983429, - "p90": 303.9360046386719, - "p95": 305.759996175766, - "p99": 311.0719919204712 + "p50": 454.912006855011, + "p90": 460.1280093193054, + "p95": 465.2479887008667, + "p99": 487.61600255966187 }, "roundtrip": { - "p50": 515.7759785652161, - "p90": 522.2399830818176, - "p95": 524.1600275039673, - "p99": 528.8959741592407 + "p50": 1072.5760459899902, + "p90": 1080.7360410690308, + "p95": 1090.3040170669556, + "p99": 1124.351978302002 }, "isolatedSum": { - "p50": 543.6800122261047, - "p90": 553.3760040998459, - "p95": 559.3279898166656, - "p99": 720.6400036811829 + "p50": 1084.0960144996643, + "p90": 1094.3040251731873, + "p95": 1102.5279760360718, + "p99": 1145.9839940071106 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 446730240, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 0, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29343,35 +29722,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 413.1520092487335, - "p90": 423.0720102787018, - "p95": 426.2399971485138, - "p99": 432.5760006904602 + "p50": 1168.992042541504, + "p90": 1176.31995677948, + "p95": 1185.5360269546509, + "p99": 1203.6160230636597 }, "combine": { - "p50": 515.7439708709717, - "p90": 523.7119793891907, - "p95": 526.4319777488708, - "p99": 530.3360223770142 + "p50": 810.2719783782959, + "p90": 818.943977355957, + "p95": 826.1759877204895, + "p99": 878.6560297012329 }, "roundtrip": { - "p50": 898.2080221176147, - "p90": 911.0400080680847, - "p95": 915.2960181236267, - "p99": 921.6639995574951 + "p50": 1966.6880369186401, + "p90": 1979.6799421310425, + "p95": 1991.487979888916, + "p99": 2013.6001110076904 }, "isolatedSum": { - "p50": 928.8959801197052, - "p90": 946.7839896678925, - "p95": 952.6719748973846, - "p99": 962.9120230674744 + "p50": 1979.2640209197998, + "p90": 1995.263934135437, + "p95": 2011.7120146751404, + "p99": 2082.2720527648926 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 893634560, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 6, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29380,35 +29759,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 745.0559735298157, - "p90": 758.2719922065735, - "p95": 762.112021446228, - "p99": 772.4159955978394 + "p50": 2255.136013031006, + "p90": 2265.471935272217, + "p95": 2275.2959728240967, + "p99": 2326.7838954925537 }, "combine": { - "p50": 933.247983455658, - "p90": 941.9839978218079, - "p95": 945.1839923858643, - "p99": 951.3279795646667 + "p50": 1510.5600357055664, + "p90": 1526.144027709961, + "p95": 1534.656047821045, + "p99": 1569.7920322418213 }, "roundtrip": { - "p50": 1646.2719440460205, - "p90": 1661.9199514389038, - "p95": 1667.3599481582642, - "p99": 1685.7600212097168 + "p50": 3753.2479763031006, + "p90": 3776.5119075775146, + "p95": 3788.383960723877, + "p99": 3816.6720867156982 }, "isolatedSum": { - "p50": 1678.3039569854736, - "p90": 1700.2559900283813, - "p95": 1707.2960138320923, - "p99": 1723.743975162506 + "p50": 3765.6960487365723, + "p90": 3791.6159629821777, + "p95": 3809.9520206451416, + "p99": 3896.575927734375 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1786265600, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 3, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29416,30 +29795,31 @@ ] }, { - "id": "cx-29bbdbee", - "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ff7906f8", - "comparisonKey": "4401899311d5e08c", + "id": "cx-c8d1506e", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_eee29686", + "comparisonKey": "efab2d3670b24be2", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:30.177352+00:00", + "generatedAt": "2026-06-26T17:42:54.702578+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -29447,19 +29827,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -29469,7 +29849,7 @@ }, "routingConsistent": true, "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -29477,45 +29857,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271717621", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271717621", - "createdAt": "2026-06-26T23:51:27Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254479346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", + "createdAt": "2026-06-26T17:42:54.702578+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 111.42399907112122, - "p90": 114.94400352239609, - "p95": 116.03199690580368, - "p99": 119.61600184440613 + "p50": 83.45600217580795, + "p90": 86.14400029182434, + "p95": 87.2960016131401, + "p99": 102.08000242710114 }, "combine": { - "p50": 98.33600372076035, - "p90": 103.71199995279312, - "p95": 104.67199981212616, - "p99": 106.4319983124733 + "p50": 108.38399827480316, + "p90": 110.75200140476227, + "p95": 111.61600053310394, + "p99": 114.9120032787323 }, "roundtrip": { - "p50": 184.9599927663803, - "p90": 188.63999843597412, - "p95": 189.66400623321533, - "p99": 194.11200284957886 + "p50": 218.33600103855133, + "p90": 221.6320037841797, + "p95": 222.84799814224243, + "p99": 235.23199558258057 }, "isolatedSum": { - "p50": 209.76000279188156, - "p90": 218.6560034751892, - "p95": 220.70399671792984, - "p99": 226.04800015687943 + "p50": 191.84000045061111, + "p90": 196.8960016965866, + "p95": 198.91200214624405, + "p99": 216.99200570583344 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29524,35 +29904,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 132.86399841308594, - "p90": 137.7599984407425, - "p95": 139.3280029296875, - "p99": 142.4960047006607 + "p50": 116.70400202274323, + "p90": 119.64800208806992, + "p95": 121.15199863910675, + "p99": 135.3600025177002 }, "combine": { - "p50": 137.69599795341492, - "p90": 140.4159963130951, - "p95": 141.37600362300873, - "p99": 145.53600549697876 + "p50": 155.29599785804749, + "p90": 167.4560010433197, + "p95": 176.60799622535706, + "p99": 184.1599941253662 }, "roundtrip": { - "p50": 237.2480034828186, - "p90": 242.08000302314758, - "p95": 243.1039959192276, - "p99": 246.24000489711761 + "p50": 324.47999715805054, + "p90": 328.19199562072754, + "p95": 330.04799485206604, + "p99": 345.40799260139465 }, "isolatedSum": { - "p50": 270.55999636650085, - "p90": 278.1759947538376, - "p95": 280.7040065526962, - "p99": 288.03201019763947 + "p50": 271.9999998807907, + "p90": 287.1040031313896, + "p95": 297.7599948644638, + "p99": 319.5199966430664 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29561,35 +29941,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 184.00000035762787, - "p90": 197.31199741363525, - "p95": 200.15999674797058, - "p99": 204.12799715995789 + "p50": 177.7919977903366, + "p90": 182.27200210094452, + "p95": 183.9040070772171, + "p99": 191.103994846344 }, "combine": { - "p50": 209.6959948539734, - "p90": 216.86400473117828, - "p95": 217.92000532150269, - "p99": 221.95200622081757 + "p50": 267.520010471344, + "p90": 270.81599831581116, + "p95": 272.0640003681183, + "p99": 275.4879891872406 }, "roundtrip": { - "p50": 365.02400040626526, - "p90": 377.21601128578186, - "p95": 380.5760145187378, - "p99": 388.12801241874695 + "p50": 550.8480072021484, + "p90": 556.9599866867065, + "p95": 560.2560043334961, + "p99": 578.3360004425049 }, "isolatedSum": { - "p50": 393.69599521160126, - "p90": 414.17600214481354, - "p95": 418.08000206947327, - "p99": 426.08000338077545 + "p50": 445.3120082616806, + "p90": 453.0880004167557, + "p95": 455.9680074453354, + "p99": 466.5919840335846 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29598,35 +29978,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 273.21600914001465, - "p90": 277.44001150131226, - "p95": 279.87200021743774, - "p99": 289.3120050430298 + "p50": 298.0160117149353, + "p90": 302.4959862232208, + "p95": 304.4799864292145, + "p99": 319.07200813293457 }, "combine": { - "p50": 332.41599798202515, - "p90": 337.119996547699, - "p95": 338.20798993110657, - "p99": 341.66398644447327 + "p50": 452.1920084953308, + "p90": 456.6720128059387, + "p95": 458.624005317688, + "p99": 467.9360091686249 }, "roundtrip": { - "p50": 577.6320099830627, - "p90": 582.751989364624, - "p95": 584.7679972648621, - "p99": 588.7680053710938 + "p50": 976.5759706497192, + "p90": 983.8719964027405, + "p95": 991.5199875831604, + "p99": 1023.3279466629028 }, "isolatedSum": { - "p50": 605.6320071220398, - "p90": 614.5600080490112, - "p95": 618.0799901485443, - "p99": 630.975991487503 + "p50": 750.2080202102661, + "p90": 759.1679990291595, + "p95": 763.1039917469025, + "p99": 787.0080173015594 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29635,32 +30015,32 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 464.32000398635864, - "p90": 473.60000014305115, - "p95": 477.3760139942169, - "p99": 648.8320231437683 + "p50": 541.4720177650452, + "p90": 546.7519760131836, + "p95": 549.4080185890198, + "p99": 557.7920079231262 }, "combine": { - "p50": 584.384024143219, - "p90": 590.9119844436646, - "p95": 593.0560231208801, - "p99": 596.8000292778015 + "p50": 814.7199749946594, + "p90": 820.8320140838623, + "p95": 824.0640163421631, + "p99": 847.2959995269775 }, "roundtrip": { - "p50": 1019.2320346832275, - "p90": 1029.6640396118164, - "p95": 1033.7599515914917, - "p99": 1037.984013557434 + "p50": 1818.0160522460938, + "p90": 1827.712059020996, + "p95": 1832.0000171661377, + "p99": 1889.5679712295532 }, "isolatedSum": { - "p50": 1048.7040281295776, - "p90": 1064.5119845867157, - "p95": 1070.432037115097, - "p99": 1245.6320524215698 + "p50": 1356.1919927597046, + "p90": 1367.583990097046, + "p95": 1373.4720349311829, + "p99": 1405.0880074501038 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, "stragglerRank": 4, @@ -29672,35 +30052,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 879.423975944519, - "p90": 904.6720266342163, - "p95": 913.2480025291443, - "p99": 928.991973400116 + "p50": 1019.6160078048706, + "p90": 1027.9040336608887, + "p95": 1031.391978263855, + "p99": 1045.2799797058105 }, "combine": { - "p50": 1065.6960010528564, - "p90": 1075.3920078277588, - "p95": 1078.3040523529053, - "p99": 1084.2560529708862 + "p50": 1529.4400453567505, + "p90": 1537.2480154037476, + "p95": 1540.8639907836914, + "p99": 1614.6240234375 }, "roundtrip": { - "p50": 1901.9520282745361, - "p90": 1920.7359552383423, - "p95": 1926.5919923782349, - "p99": 1940.1600360870361 + "p50": 3477.3120880126953, + "p90": 3490.272045135498, + "p95": 3495.3598976135254, + "p99": 3531.3920974731445 }, "isolatedSum": { - "p50": 1945.1199769973755, - "p90": 1980.064034461975, - "p95": 1991.5520548820496, - "p99": 2013.2480263710022 + "p50": 2549.056053161621, + "p90": 2565.1520490646362, + "p95": 2572.2559690475464, + "p99": 2659.9040031433105 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29708,28 +30088,29 @@ ] }, { - "id": "cx-d524fd7e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", - "colorKey": "h100_42947950", - "comparisonKey": "4c920ba7523ac63b", + "id": "cx-9971d342", + "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_84b10b26", + "comparisonKey": "1c850249e23e1e8c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:28.917588+00:00", + "generatedAt": "2026-06-26T18:09:25.013454+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", + "runner": "b300-nv_15", + "sku": "b300", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -29739,19 +30120,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -29760,8 +30141,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "157ca81687ddb63", - "workloadId": "set:3:a426d66e479dc893", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -29769,45 +30150,82 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271785174", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271785174", - "createdAt": "2026-06-26T23:53:30Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254499301", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", + "createdAt": "2026-06-26T18:09:25.013454+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 130.52800297737122, - "p90": 135.55200397968292, - "p95": 138.43199610710144, - "p99": 176.79999768733978 + "p50": 77.02399790287018, + "p90": 79.48800176382065, + "p95": 80.89599758386612, + "p99": 85.28000116348267 }, "combine": { - "p50": 113.8560026884079, - "p90": 120.86399644613266, - "p95": 122.11199849843979, - "p99": 145.50399780273438 + "p50": 108.5439994931221, + "p90": 111.29599809646606, + "p95": 112.35199868679047, + "p99": 124.41600114107132 }, "roundtrip": { - "p50": 209.05600488185883, - "p90": 217.56799519062042, - "p95": 219.200000166893, - "p99": 275.04000067710876 + "p50": 211.74399554729462, + "p90": 214.4320011138916, + "p95": 216.0000056028366, + "p99": 233.15200209617615 }, "isolatedSum": { - "p50": 244.3840056657791, - "p90": 256.4160004258156, - "p95": 260.54399460554123, - "p99": 322.30399549007416 + "p50": 185.56799739599228, + "p90": 190.7839998602867, + "p95": 193.24799627065659, + "p99": 209.69600230455399 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.1439967751503, + "p90": 113.53600025177002, + "p95": 115.90400338172913, + "p99": 132.6719969511032 + }, + "combine": { + "p50": 153.3759981393814, + "p90": 157.60000050067902, + "p95": 159.32799875736237, + "p99": 173.69599640369415 + }, + "roundtrip": { + "p50": 318.30400228500366, + "p90": 322.52800464630127, + "p95": 325.408011674881, + "p99": 346.49598598480225 + }, + "isolatedSum": { + "p50": 263.5199949145317, + "p90": 271.13600075244904, + "p95": 275.2320021390915, + "p99": 306.36799335479736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29816,31 +30234,31 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 210.27199923992157, - "p90": 217.056006193161, - "p95": 220.22399306297302, - "p99": 256.99201226234436 + "p50": 171.26399278640747, + "p90": 176.15999281406403, + "p95": 178.6240041255951, + "p99": 194.815993309021 }, "combine": { - "p50": 234.9119931459427, - "p90": 241.40800535678864, - "p95": 244.9920028448105, - "p99": 262.9759907722473 + "p50": 268.2879865169525, + "p90": 273.0560004711151, + "p95": 275.64799785614014, + "p99": 283.58399868011475 }, "roundtrip": { - "p50": 412.54401206970215, - "p90": 420.9280014038086, - "p95": 423.0720102787018, - "p99": 427.35999822616577 + "p50": 543.7120199203491, + "p90": 550.6880283355713, + "p95": 554.1120171546936, + "p99": 576.0639905929565 }, "isolatedSum": { - "p50": 445.18399238586426, - "p90": 458.46401154994965, - "p95": 465.2159959077835, - "p99": 519.9680030345917 + "p50": 439.55197930336, + "p90": 449.21599328517914, + "p95": 454.27200198173523, + "p99": 478.39999198913574 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, + "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, @@ -29849,35 +30267,72 @@ "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 288.92800211906433, + "p90": 294.0160036087036, + "p95": 297.791987657547, + "p99": 315.3280019760132 + }, + "combine": { + "p50": 452.09598541259766, + "p90": 457.37600326538086, + "p95": 461.7280066013336, + "p99": 471.74400091171265 + }, + "roundtrip": { + "p50": 967.1040177345276, + "p90": 974.62397813797, + "p95": 977.5360226631165, + "p99": 995.6160187721252 + }, + "isolatedSum": { + "p50": 741.023987531662, + "p90": 751.3920068740845, + "p95": 759.5199942588806, + "p99": 787.0720028877258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 526.5920162200928, - "p90": 541.4720177650452, - "p95": 545.9200143814087, - "p99": 552.3520112037659 + "p50": 523.3920216560364, + "p90": 529.2800068855286, + "p95": 533.3439707756042, + "p99": 550.1120090484619 }, "combine": { - "p50": 637.5679969787598, - "p90": 649.6959924697876, - "p95": 652.6079773902893, - "p99": 661.0879898071289 + "p50": 816.32000207901, + "p90": 824.9599933624268, + "p95": 831.1359882354736, + "p99": 855.135977268219 }, "roundtrip": { - "p50": 1134.6240043640137, - "p90": 1146.880030632019, - "p95": 1151.2320041656494, - "p99": 1158.5919857025146 + "p50": 1800.096035003662, + "p90": 1811.743974685669, + "p95": 1825.7919549942017, + "p99": 1866.8160438537598 }, "isolatedSum": { - "p50": 1164.1600131988525, - "p90": 1191.1680102348328, - "p95": 1198.527991771698, - "p99": 1213.4400010108948 + "p50": 1339.7120237350464, + "p90": 1354.2400002479553, + "p95": 1364.4799590110779, + "p99": 1405.247986316681 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, + "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, @@ -29885,32 +30340,70 @@ "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.9280118942261, + "p90": 992.2239780426025, + "p95": 996.5760111808777, + "p99": 1026.9759893417358 + }, + "combine": { + "p50": 1529.312014579773, + "p90": 1539.1039848327637, + "p95": 1548.0320453643799, + "p99": 1564.3839836120605 + }, + "roundtrip": { + "p50": 3440.864086151123, + "p90": 3457.6640129089355, + "p95": 3468.832015991211, + "p99": 3514.2080783843994 + }, + "isolatedSum": { + "p50": 2514.240026473999, + "p90": 2531.327962875366, + "p95": 2544.6080565452576, + "p99": 2591.3599729537964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-efe3a643", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_42947950", - "comparisonKey": "4c920ba7523ac63b", + "id": "cx-3feaa006", + "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_49e66a7b", + "comparisonKey": "5b68240330e760fc", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:28.966623+00:00", + "generatedAt": "2026-06-27T09:47:42.062998+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", + "runner": "b300-nv_11", + "sku": "b300", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -29920,15 +30413,15 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -29950,45 +30443,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271547494", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271547494", - "createdAt": "2026-06-26T23:46:11Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28285593016", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285593016", + "createdAt": "2026-06-27T09:47:42.062998+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 111.84000223875046, - "p90": 124.15999919176102, - "p95": 131.1360001564026, - "p99": 137.66400516033173 + "p50": 76.9599974155426, + "p90": 79.64800298213959, + "p95": 80.92799782752991, + "p99": 92.3520028591156 }, "combine": { - "p50": 106.6880002617836, - "p90": 114.30399864912033, - "p95": 120.09599804878235, - "p99": 123.03999811410904 + "p50": 108.51199924945831, + "p90": 111.42399907112122, + "p95": 112.41599917411804, + "p99": 122.40000069141388 }, "roundtrip": { - "p50": 199.0399956703186, - "p90": 207.58399367332458, - "p95": 216.3199931383133, - "p99": 222.1119999885559 + "p50": 210.78400313854218, + "p90": 213.85599672794342, + "p95": 216.5759950876236, + "p99": 232.9919934272766 }, "isolatedSum": { - "p50": 218.52800250053406, - "p90": 238.46399784088135, - "p95": 251.23199820518494, - "p99": 260.70400327444077 + "p50": 185.47199666500092, + "p90": 191.0720020532608, + "p95": 193.34399700164795, + "p99": 214.75200355052948 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29997,31 +30490,31 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 142.97600090503693, - "p90": 152.3520052433014, - "p95": 161.28000617027283, - "p99": 169.21600699424744 + "p50": 109.95200276374817, + "p90": 113.43999952077866, + "p95": 115.26399850845337, + "p99": 126.62400305271149 }, "combine": { - "p50": 150.176003575325, - "p90": 155.68000078201294, - "p95": 162.36799955368042, - "p99": 171.26399278640747 + "p50": 154.91199493408203, + "p90": 159.04000401496887, + "p95": 161.18399798870087, + "p99": 169.88800466060638 }, "roundtrip": { - "p50": 263.2319927215576, - "p90": 269.72800493240356, - "p95": 276.0320007801056, - "p99": 290.5920147895813 + "p50": 318.30400228500366, + "p90": 323.3279883861542, + "p95": 325.21599531173706, + "p99": 336.70398592948914 }, "isolatedSum": { - "p50": 293.15200448036194, - "p90": 308.03200602531433, - "p95": 323.64800572395325, - "p99": 340.4799997806549 + "p50": 264.8639976978302, + "p90": 272.4800035357475, + "p95": 276.44799649715424, + "p99": 296.51200771331787 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, + "dispatchLogicalBytes": 77944832, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, @@ -30034,31 +30527,31 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 200.8640021085739, - "p90": 211.39200031757355, - "p95": 214.27200734615326, - "p99": 220.96000611782074 + "p50": 172.09599912166595, + "p90": 176.38400197029114, + "p95": 178.56000363826752, + "p99": 194.07999515533447 }, "combine": { - "p50": 229.72799837589264, - "p90": 236.67199909687042, - "p95": 238.71999979019165, - "p99": 246.2719976902008 + "p50": 265.79201221466064, + "p90": 270.112007856369, + "p95": 272.0319926738739, + "p99": 286.655992269516 }, "roundtrip": { - "p50": 400.86400508880615, - "p90": 413.5040044784546, - "p95": 418.94400119781494, - "p99": 428.51200699806213 + "p50": 542.7200198173523, + "p90": 548.8640069961548, + "p95": 551.967978477478, + "p99": 560.8000159263611 }, "isolatedSum": { - "p50": 430.59200048446655, - "p90": 448.06399941444397, - "p95": 452.9920071363449, - "p99": 467.23200380802155 + "p50": 437.8880113363266, + "p90": 446.49600982666016, + "p95": 450.5919963121414, + "p99": 480.73598742485046 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, + "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, @@ -30071,31 +30564,31 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 303.1040132045746, - "p90": 308.9280128479004, - "p95": 311.2959861755371, - "p99": 318.015992641449 + "p50": 286.8160009384155, + "p90": 291.9040024280548, + "p95": 294.65600848197937, + "p99": 305.184006690979 }, "combine": { - "p50": 365.9839928150177, - "p90": 372.8959858417511, - "p95": 375.39198994636536, - "p99": 382.4320137500763 + "p50": 452.2559940814972, + "p90": 458.0160081386566, + "p95": 460.7360064983368, + "p99": 470.5919921398163 }, "roundtrip": { - "p50": 644.8000073432922, - "p90": 654.528021812439, - "p95": 657.8879952430725, - "p99": 668.4799790382385 + "p50": 964.1919732093811, + "p90": 972.320020198822, + "p95": 979.5200228691101, + "p99": 989.8560047149658 }, "isolatedSum": { - "p50": 669.0880060195923, - "p90": 681.8239986896515, - "p95": 686.6879761219025, - "p99": 700.4480063915253 + "p50": 739.0719950199127, + "p90": 749.9200105667114, + "p95": 755.3920149803162, + "p99": 775.7759988307953 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, + "dispatchLogicalBytes": 311721984, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, @@ -30108,35 +30601,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 526.8800258636475, - "p90": 540.5759811401367, - "p95": 545.0239777565002, - "p99": 551.6160130500793 + "p50": 523.1040120124817, + "p90": 529.7920107841492, + "p95": 532.2239995002747, + "p99": 541.5999889373779 }, "combine": { - "p50": 638.0159854888916, - "p90": 650.2400040626526, - "p95": 653.1519889831543, - "p99": 660.1920127868652 + "p50": 814.2399787902832, + "p90": 822.5280046463013, + "p95": 828.8639783859253, + "p99": 839.9360179901123 }, "roundtrip": { - "p50": 1135.424017906189, - "p90": 1147.7760076522827, - "p95": 1151.0720252990723, - "p99": 1157.5039625167847 + "p50": 1798.4319925308228, + "p90": 1811.8720054626465, + "p95": 1817.9839849472046, + "p99": 1887.3599767684937 }, "isolatedSum": { - "p50": 1164.896011352539, - "p90": 1190.8159852027893, - "p95": 1198.1759667396545, - "p99": 1211.8080258369446 + "p50": 1337.343990802765, + "p90": 1352.3200154304504, + "p95": 1361.0879778862, + "p99": 1381.5360069274902 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, + "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30145,35 +30638,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1005.2160024642944, - "p90": 1027.2639989852905, - "p95": 1033.5359573364258, - "p99": 1050.271987915039 + "p50": 989.2160296440125, + "p90": 998.3360171318054, + "p95": 1003.7120580673218, + "p99": 1014.8160457611084 }, "combine": { - "p50": 1168.511986732483, - "p90": 1181.7599534988403, - "p95": 1189.1520023345947, - "p99": 1202.015995979309 + "p50": 1527.8079509735107, + "p90": 1537.376046180725, + "p95": 1542.8800582885742, + "p99": 1555.7119846343994 }, "roundtrip": { - "p50": 2131.455898284912, - "p90": 2150.815963745117, - "p95": 2158.112049102783, - "p99": 2167.3600673675537 + "p50": 3446.592092514038, + "p90": 3460.416078567505, + "p95": 3467.77606010437, + "p99": 3511.4240646362305 }, "isolatedSum": { - "p50": 2173.7279891967773, - "p90": 2209.023952484131, - "p95": 2222.6879596710205, - "p99": 2252.287983894348 + "p50": 2517.023980617523, + "p90": 2535.7120633125305, + "p95": 2546.592116355896, + "p99": 2570.528030395508 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, + "dispatchLogicalBytes": 1243504640, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30181,28 +30674,29 @@ ] }, { - "id": "cx-8a96205b", - "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ff7906f8", - "comparisonKey": "6a625438eb544ee8", + "id": "cx-7cddf11f", + "identity": "b300|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_9bdf2cf9", + "comparisonKey": "6cb3f1841938f6d9", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:12.079136+00:00", + "generatedAt": "2026-06-28T02:32:30.085872+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_09", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -30218,12 +30712,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -30233,239 +30727,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "hybrid-e0a5b1d", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271563151", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271563151", - "createdAt": "2026-06-26T23:46:38Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28308873989", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308873989", + "createdAt": "2026-06-28T02:32:30.085872+00:00", + "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 111.61600053310394, - "p90": 117.3119992017746, - "p95": 118.81600320339203, - "p99": 123.74400347471237 + "p50": 119.07199770212173, + "p90": 122.17599898576736, + "p95": 123.36000055074692, + "p99": 126.94400548934937 }, "combine": { - "p50": 105.85600137710571, - "p90": 107.07200318574905, - "p95": 111.16799712181091, - "p99": 113.8560026884079 + "p50": 36.51199862360954, + "p90": 38.015998899936676, + "p95": 38.816001266241074, + "p99": 39.872001856565475 }, "roundtrip": { - "p50": 193.02399456501007, - "p90": 199.52000677585602, - "p95": 200.9280025959015, - "p99": 204.96000349521637 + "p50": 151.48800611495972, + "p90": 155.8080017566681, + "p95": 157.98400342464447, + "p99": 166.52800142765045 }, "isolatedSum": { - "p50": 217.47200191020966, - "p90": 224.38400238752365, - "p95": 229.98400032520294, - "p99": 237.60000616312027 + "p50": 155.58399632573128, + "p90": 160.19199788570404, + "p95": 162.176001816988, + "p99": 166.81600734591484 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 143.23200285434723, - "p90": 147.5200057029724, - "p95": 148.6400067806244, - "p99": 152.28800475597382 + "p50": 122.20799922943115, + "p90": 125.34399330615997, + "p95": 126.75200402736664, + "p99": 132.54399597644806 }, "combine": { - "p50": 148.76799285411835, - "p90": 154.4640064239502, - "p95": 155.29599785804749, - "p99": 156.76799416542053 + "p50": 37.63199970126152, + "p90": 39.5519994199276, + "p95": 39.84000161290169, + "p99": 49.984000623226166 }, "roundtrip": { - "p50": 262.33598589897156, - "p90": 266.431987285614, - "p95": 268.12800765037537, - "p99": 271.1679935455322 + "p50": 154.55999970436096, + "p90": 157.72800147533417, + "p95": 158.65600109100342, + "p99": 166.143998503685 }, "isolatedSum": { - "p50": 291.9999957084656, - "p90": 301.9840121269226, - "p95": 303.9360046386719, - "p99": 309.05599892139435 + "p50": 159.83999893069267, + "p90": 164.89599272608757, + "p95": 166.59200564026833, + "p99": 182.52799659967422 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 196.25599682331085, - "p90": 201.1840045452118, - "p95": 202.72000133991241, - "p99": 214.84799683094025 + "p50": 125.02400577068329, + "p90": 128.76799702644348, + "p95": 130.11200726032257, + "p99": 150.39999783039093 }, "combine": { - "p50": 230.49600422382355, - "p90": 236.12800240516663, - "p95": 237.2799962759018, - "p99": 241.15200340747833 + "p50": 39.99999910593033, + "p90": 41.88799858093262, + "p95": 42.65600070357323, + "p99": 50.97600072622299 }, "roundtrip": { - "p50": 403.0719995498657, - "p90": 408.3839952945709, - "p95": 410.14400124549866, - "p99": 412.76800632476807 + "p50": 159.67999398708344, + "p90": 162.88000345230103, + "p95": 163.96799683570862, + "p99": 178.5919964313507 }, "isolatedSum": { - "p50": 426.7520010471344, - "p90": 437.3120069503784, - "p95": 439.9999976158142, - "p99": 456.0000002384186 + "p50": 165.02400487661362, + "p90": 170.6559956073761, + "p95": 172.7680079638958, + "p99": 201.37599855661392 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 301.6960024833679, - "p90": 306.43200874328613, - "p95": 307.9040050506592, - "p99": 312.1280074119568 + "p50": 130.49599528312683, + "p90": 133.7279975414276, + "p95": 135.04000008106232, + "p99": 140.1599943637848 }, "combine": { - "p50": 364.1279935836792, - "p90": 369.4399893283844, - "p95": 372.0319867134094, - "p99": 374.9760091304779 + "p50": 40.863998234272, + "p90": 42.367998510599136, + "p95": 43.327998369932175, + "p99": 46.30399867892265 }, "roundtrip": { - "p50": 640.064001083374, - "p90": 646.8160152435303, - "p95": 648.5120058059692, - "p99": 653.6960005760193 + "p50": 167.4560010433197, + "p90": 170.78399658203125, + "p95": 172.19200730323792, + "p99": 184.51200425624847 }, "isolatedSum": { - "p50": 665.8239960670471, - "p90": 675.8719980716705, - "p95": 679.9359917640686, - "p99": 687.1040165424347 + "p50": 171.35999351739883, + "p90": 176.09599605202675, + "p95": 178.3679984509945, + "p99": 186.46399304270744 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 532.1599841117859, - "p90": 540.7040119171143, - "p95": 544.1280007362366, - "p99": 549.2799878120422 + "p50": 142.65599846839905, + "p90": 145.6959992647171, + "p95": 147.35999703407288, + "p99": 153.60000729560852 }, "combine": { - "p50": 637.503981590271, - "p90": 645.5039978027344, - "p95": 647.7760076522827, - "p99": 653.9520025253296 + "p50": 44.79999840259552, + "p90": 46.720001846551895, + "p95": 47.42399975657463, + "p99": 48.54400083422661 }, "roundtrip": { - "p50": 1141.9199705123901, - "p90": 1154.4320583343506, - "p95": 1160.1920127868652, - "p99": 1180.9600591659546 + "p50": 185.47199666500092, + "p90": 188.960000872612, + "p95": 191.8720006942749, + "p99": 211.29600703716278 }, "isolatedSum": { - "p50": 1169.6639657020569, - "p90": 1186.2080097198486, - "p95": 1191.9040083885193, - "p99": 1203.2319903373718 + "p50": 187.45599687099457, + "p90": 192.416001111269, + "p95": 194.7839967906475, + "p99": 202.14400812983513 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 993.9200282096863, - "p90": 1017.2799825668335, - "p95": 1023.4240293502808, - "p99": 1036.8319749832153 + "p50": 163.83999586105347, + "p90": 166.52800142765045, + "p95": 167.64800250530243, + "p99": 172.41600155830383 }, "combine": { - "p50": 1165.0559902191162, - "p90": 1175.3599643707275, - "p95": 1177.9520511627197, - "p99": 1283.2640409469604 + "p50": 44.544000178575516, + "p90": 46.33599892258644, + "p95": 46.751998364925385, + "p99": 54.336000233888626 }, "roundtrip": { - "p50": 2117.6319122314453, - "p90": 2134.848117828369, - "p95": 2139.6799087524414, - "p99": 2151.5839099884033 + "p50": 207.7759951353073, + "p90": 211.13599836826324, + "p95": 213.02400529384613, + "p99": 223.4240025281906 }, "isolatedSum": { - "p50": 2158.9760184288025, - "p90": 2192.639946937561, - "p95": 2201.3760805130005, - "p99": 2320.096015930176 + "p50": 208.38399603962898, + "p90": 212.8640003502369, + "p95": 214.4000008702278, + "p99": 226.75200179219246 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 210.9760046005249, + "p90": 214.65599536895752, + "p95": 216.12800657749176, + "p99": 231.455996632576 + }, + "combine": { + "p50": 48.51200059056282, + "p90": 50.27199909090996, + "p95": 50.84799975156784, + "p99": 52.41600051522255 + }, + "roundtrip": { + "p50": 253.76001000404358, + "p90": 258.0159902572632, + "p95": 260.09601354599, + "p99": 319.2639946937561 + }, + "isolatedSum": { + "p50": 259.4880051910877, + "p90": 264.9279944598675, + "p95": 266.9760063290596, + "p99": 283.87199714779854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 209.6640020608902, + "p90": 212.79999613761902, + "p95": 214.33599293231964, + "p99": 221.69600427150726 + }, + "combine": { + "p50": 56.352000683546066, + "p90": 59.007998555898666, + "p95": 59.967998415231705, + "p99": 61.24800071120262 + }, + "roundtrip": { + "p50": 261.7279887199402, + "p90": 264.70398902893066, + "p95": 266.2079930305481, + "p99": 278.8800001144409 + }, + "isolatedSum": { + "p50": 266.01600274443626, + "p90": 271.8079946935177, + "p95": 274.30399134755135, + "p99": 282.9440049827099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30473,32 +31041,33 @@ ] }, { - "id": "cx-32c90de8", - "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h100_ff7906f8", - "comparisonKey": "db866d0065c2a509", + "id": "cx-4a0e300c", + "identity": "b300|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_9bdf2cf9", + "comparisonKey": "e35b7ffee4d4fef7", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:05.825406+00:00", + "generatedAt": "2026-06-28T02:33:00.515887+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", + "runner": "b300-nv_03", + "sku": "b300", + "backend": "deepep-hybrid", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "B300 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -30510,12 +31079,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -30525,54 +31094,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "hybrid-e0a5b1d", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271671786", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271671786", - "createdAt": "2026-06-26T23:50:04Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28308873989", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308873989", + "createdAt": "2026-06-28T02:33:00.515887+00:00", + "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 112.96000331640244, - "p90": 118.78400295972824, - "p95": 120.28799951076508, - "p99": 130.40000200271606 + "p50": 207.7759951353073, + "p90": 211.2639993429184, + "p95": 212.96000480651855, + "p99": 220.15999257564545 }, "combine": { - "p50": 106.1440035700798, - "p90": 109.15199667215347, - "p95": 110.30399799346924, - "p99": 114.49600011110306 + "p50": 57.88800120353699, + "p90": 59.23200026154518, + "p95": 59.99999865889549, + "p99": 67.84000247716904 }, "roundtrip": { - "p50": 196.99199497699738, - "p90": 201.34399831295013, - "p95": 202.94399559497833, - "p99": 206.04799687862396 + "p50": 260.44800877571106, + "p90": 263.61599564552307, + "p95": 266.36800169944763, + "p99": 274.7200131416321 }, "isolatedSum": { - "p50": 219.10400688648224, - "p90": 227.9359996318817, - "p95": 230.5919975042343, - "p99": 244.89600211381912 + "p50": 265.6639963388443, + "p90": 270.4959996044636, + "p95": 272.96000346541405, + "p99": 287.9999950528145 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30581,35 +31150,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 149.21599626541138, - "p90": 155.03999590873718, - "p95": 157.05600380897522, - "p99": 159.4880074262619 + "p50": 216.44799411296844, + "p90": 220.60799598693848, + "p95": 222.33599424362183, + "p99": 248.73599410057068 }, "combine": { - "p50": 153.50399911403656, - "p90": 158.62399339675903, - "p95": 160.25599837303162, - "p99": 165.15199840068817 + "p50": 74.23999905586243, + "p90": 76.38400048017502, + "p95": 76.80000364780426, + "p99": 87.20000088214874 }, "roundtrip": { - "p50": 270.3999876976013, - "p90": 284.0000092983246, - "p95": 285.69599986076355, - "p99": 288.9600098133087 + "p50": 283.87200832366943, + "p90": 287.32800483703613, + "p95": 289.5039916038513, + "p99": 305.27999997138977 }, "isolatedSum": { - "p50": 302.71999537944794, - "p90": 313.6639893054962, - "p95": 317.31200218200684, - "p99": 324.6400058269501 + "p50": 290.6879931688309, + "p90": 296.9919964671135, + "p95": 299.1359978914261, + "p99": 335.9359949827194 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 6, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30618,34 +31187,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 201.92000269889832, - "p90": 212.5760018825531, - "p95": 214.59199488162994, - "p99": 217.8560048341751 + "p50": 226.3679951429367, + "p90": 230.24000227451324, + "p95": 231.77599906921387, + "p99": 250.2399981021881 }, "combine": { - "p50": 229.5999974012375, - "p90": 237.92000114917755, - "p95": 241.2479966878891, - "p99": 245.2159970998764 + "p50": 107.64800012111664, + "p90": 109.79200154542923, + "p95": 110.62400043010712, + "p99": 118.75200271606445 }, "roundtrip": { - "p50": 404.2240083217621, - "p90": 417.5359904766083, - "p95": 419.3919897079468, - "p99": 424.1600036621094 + "p50": 329.47200536727905, + "p90": 333.6319923400879, + "p95": 335.6800079345703, + "p99": 362.2719943523407 }, "isolatedSum": { - "p50": 431.5200001001358, - "p90": 450.49600303173065, - "p95": 455.83999156951904, - "p99": 463.0720019340515 + "p50": 334.01599526405334, + "p90": 340.0320038199425, + "p95": 342.399999499321, + "p99": 368.99200081825256 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -30655,35 +31224,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 304.83201146125793, - "p90": 315.39198756217957, - "p95": 317.6319897174835, - "p99": 320.51199674606323 + "p50": 286.01598739624023, + "p90": 289.34401273727417, + "p95": 290.5600070953369, + "p99": 304.3519854545593 }, "combine": { - "p50": 367.48799681663513, - "p90": 376.96000933647156, - "p95": 381.9200098514557, - "p99": 392.192006111145 + "p50": 185.15199422836304, + "p90": 187.8719925880432, + "p95": 188.54400515556335, + "p99": 190.8160001039505 }, "roundtrip": { - "p50": 644.7039842605591, - "p90": 655.456006526947, - "p95": 677.951991558075, - "p99": 919.8399782180786 + "p50": 467.0400023460388, + "p90": 471.48799896240234, + "p95": 473.91998767852783, + "p99": 505.3759813308716 }, "isolatedSum": { - "p50": 672.3200082778931, - "p90": 692.3519968986511, - "p95": 699.5519995689392, - "p99": 712.7040028572083 + "p50": 471.16798162460327, + "p90": 477.2160053253174, + "p95": 479.10401225090027, + "p99": 495.1679855585098 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30692,35 +31261,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 521.2799906730652, - "p90": 536.4800095558167, - "p95": 540.224015712738, - "p99": 549.3119955062866 + "p50": 458.40001106262207, + "p90": 461.60000562667847, + "p95": 463.1040096282959, + "p99": 475.93599557876587 }, "combine": { - "p50": 632.4160099029541, - "p90": 640.7679915428162, - "p95": 643.3600187301636, - "p99": 651.4559984207153 + "p50": 320.76799869537354, + "p90": 323.93598556518555, + "p95": 325.0240087509155, + "p99": 332.5760066509247 }, "roundtrip": { - "p50": 1126.431941986084, - "p90": 1137.8240585327148, - "p95": 1141.5679454803467, - "p99": 1157.6000452041626 + "p50": 776.095986366272, + "p90": 781.216025352478, + "p95": 785.9200239181519, + "p99": 819.5199966430664 }, "isolatedSum": { - "p50": 1153.6960005760193, - "p90": 1177.2480010986328, - "p95": 1183.5840344429016, - "p99": 1200.767993927002 + "p50": 779.1680097579956, + "p90": 785.535991191864, + "p95": 788.1280183792114, + "p99": 808.5120022296906 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 6, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30729,35 +31298,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1011.2960338592529, - "p90": 1036.895990371704, - "p95": 1044.3840026855469, - "p99": 1057.088017463684 + "p50": 741.0560250282288, + "p90": 745.2800273895264, + "p95": 748.8639950752258, + "p99": 812.4160170555115 }, "combine": { - "p50": 1154.8160314559937, - "p90": 1163.9360189437866, - "p95": 1166.5279865264893, - "p99": 1172.160029411316 + "p50": 593.1199789047241, + "p90": 595.3599810600281, + "p95": 596.1599946022034, + "p99": 601.5999913215637 }, "roundtrip": { - "p50": 2122.7200031280518, - "p90": 2144.9921131134033, - "p95": 2150.559902191162, - "p99": 2167.6158905029297 + "p50": 1334.2399597167969, + "p90": 1338.528037071228, + "p95": 1340.000033378601, + "p99": 1418.4319972991943 }, "isolatedSum": { - "p50": 2166.1120653152466, - "p90": 2200.8320093154907, - "p95": 2210.911989212036, - "p99": 2229.248046875 + "p50": 1334.1760039329529, + "p90": 1340.6400084495544, + "p95": 1345.0239896774292, + "p99": 1414.0160083770752 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30765,49 +31334,50 @@ ] }, { - "id": "cx-3c52549e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", - "colorKey": "h100_16047c28", - "comparisonKey": "987d0ef30063bb5c", + "id": "cx-6136a9d3", + "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|fp8|none|0|tuned||ac583971f94b176", + "colorKey": "b300_5ec8473f", + "comparisonKey": "be2ec236ee21b030", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:36.290170+00:00", + "generatedAt": "2026-06-28T05:40:56.109359+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", + "label": "B300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", - "combineQuantMode": "none" + "combineQuantMode": "fp8" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -30817,274 +31387,349 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271938768", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271938768", - "createdAt": "2026-06-26T23:58:32Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28312753674", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28312753674", + "createdAt": "2026-06-28T05:40:56.109359+00:00", + "sha": "85273c67789913421295080d1d06daacdc027a4a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 127.23200023174286, - "p90": 131.52000308036804, - "p95": 133.08799266815186, - "p99": 136.3839954137802 + "p50": 54.59199845790863, + "p90": 56.60799890756607, + "p95": 57.472001761198044, + "p99": 65.66400080919266 }, "combine": { - "p50": 126.11199915409088, - "p90": 130.62399625778198, - "p95": 131.48799538612366, - "p99": 133.98399949073792 + "p50": 54.59199845790863, + "p90": 56.60799890756607, + "p95": 57.472001761198044, + "p99": 65.66400080919266 }, "roundtrip": { - "p50": 233.43999683856964, - "p90": 236.76800727844238, - "p95": 237.40799725055695, - "p99": 240.4160052537918 + "p50": 54.59199845790863, + "p90": 56.60799890756607, + "p95": 57.472001761198044, + "p99": 65.66400080919266 }, "isolatedSum": { - "p50": 253.34399938583374, - "p90": 262.14399933815, - "p95": 264.5759880542755, - "p99": 270.3679949045181 + "p50": 109.18399691581726, + "p90": 113.21599781513214, + "p95": 114.94400352239609, + "p99": 131.32800161838531 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 180.7039976119995, - "p90": 191.3280040025711, - "p95": 193.08799505233765, - "p99": 197.28000462055206 + "p50": 55.36000058054924, + "p90": 57.440001517534256, + "p95": 58.9120015501976, + "p99": 68.15999746322632 }, "combine": { - "p50": 183.26400220394135, - "p90": 190.97599387168884, - "p95": 192.3840045928955, - "p99": 197.66399264335632 + "p50": 55.36000058054924, + "p90": 57.440001517534256, + "p95": 58.9120015501976, + "p99": 68.15999746322632 }, "roundtrip": { - "p50": 332.15999603271484, - "p90": 344.35200691223145, - "p95": 346.3680148124695, - "p99": 348.83201122283936 + "p50": 55.36000058054924, + "p90": 57.440001517534256, + "p95": 58.9120015501976, + "p99": 68.15999746322632 }, "isolatedSum": { - "p50": 363.96799981594086, - "p90": 382.30399787425995, - "p95": 385.47199964523315, - "p99": 394.9439972639084 + "p50": 110.72000116109848, + "p90": 114.88000303506851, + "p95": 117.8240031003952, + "p99": 136.31999492645264 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 6, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 272.41599559783936, - "p90": 284.0000092983246, - "p95": 286.46400570869446, - "p99": 290.1439964771271 + "p50": 57.18399956822395, + "p90": 59.74400043487549, + "p95": 60.736000537872314, + "p99": 74.23999905586243 }, "combine": { - "p50": 276.2239873409271, - "p90": 285.0880026817322, - "p95": 286.8799865245819, - "p99": 294.624000787735 + "p50": 57.18399956822395, + "p90": 59.74400043487549, + "p95": 60.736000537872314, + "p99": 74.23999905586243 }, "roundtrip": { - "p50": 519.648015499115, - "p90": 533.2159996032715, - "p95": 535.1999998092651, - "p99": 538.0480289459229 + "p50": 57.18399956822395, + "p90": 59.74400043487549, + "p95": 60.736000537872314, + "p99": 74.23999905586243 }, "isolatedSum": { - "p50": 548.6399829387665, - "p90": 569.0880119800568, - "p95": 573.3439922332764, - "p99": 584.7679972648621 + "p50": 114.3679991364479, + "p90": 119.48800086975098, + "p95": 121.47200107574463, + "p99": 148.47999811172485 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 6, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 450.3679871559143, - "p90": 462.14398741722107, - "p95": 464.2559885978699, - "p99": 469.34399008750916 + "p50": 57.50399827957153, + "p90": 60.06399914622307, + "p95": 61.85600161552429, + "p99": 68.7360018491745 }, "combine": { - "p50": 469.11999583244324, - "p90": 477.53599286079407, - "p95": 479.0720045566559, - "p99": 484.0959906578064 + "p50": 57.50399827957153, + "p90": 60.06399914622307, + "p95": 61.85600161552429, + "p99": 68.7360018491745 }, "roundtrip": { - "p50": 892.3839926719666, - "p90": 904.3520092964172, - "p95": 909.0560078620911, - "p99": 1079.967975616455 + "p50": 57.50399827957153, + "p90": 60.06399914622307, + "p95": 61.85600161552429, + "p99": 68.7360018491745 }, "isolatedSum": { - "p50": 919.4879829883575, - "p90": 939.6799802780151, - "p95": 943.3279931545258, - "p99": 953.4399807453156 + "p50": 115.00799655914307, + "p90": 120.12799829244614, + "p95": 123.71200323104858, + "p99": 137.472003698349 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 6, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 810.7200264930725, - "p90": 828.607976436615, - "p95": 831.3599824905396, - "p99": 837.2480273246765 + "p50": 58.20799991488457, + "p90": 60.35200133919716, + "p95": 61.824001371860504, + "p99": 74.17599856853485 }, "combine": { - "p50": 854.8160195350647, - "p90": 863.6159896850586, - "p95": 865.9840226173401, - "p99": 870.3359961509705 + "p50": 58.20799991488457, + "p90": 60.35200133919716, + "p95": 61.824001371860504, + "p99": 74.17599856853485 }, "roundtrip": { - "p50": 1635.583996772766, - "p90": 1645.0239419937134, - "p95": 1648.095965385437, - "p99": 1656.7679643630981 + "p50": 58.20799991488457, + "p90": 60.35200133919716, + "p95": 61.824001371860504, + "p99": 74.17599856853485 }, "isolatedSum": { - "p50": 1665.5360460281372, - "p90": 1692.2239661216736, - "p95": 1697.3440051078796, - "p99": 1707.584023475647 + "p50": 116.41599982976913, + "p90": 120.70400267839432, + "p95": 123.64800274372101, + "p99": 148.3519971370697 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 6, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1546.623945236206, - "p90": 1554.0159940719604, - "p95": 1556.3839673995972, - "p99": 1562.559962272644 + "p50": 59.328000992536545, + "p90": 61.59999966621399, + "p95": 62.72000074386597, + "p99": 75.6160020828247 }, "combine": { - "p50": 1599.552035331726, - "p90": 1609.2480421066284, - "p95": 1612.4800443649292, - "p99": 1621.6000318527222 + "p50": 59.328000992536545, + "p90": 61.59999966621399, + "p95": 62.72000074386597, + "p99": 75.6160020828247 }, "roundtrip": { - "p50": 3122.015953063965, - "p90": 3132.4799060821533, - "p95": 3136.352062225342, - "p99": 3144.4480419158936 + "p50": 59.328000992536545, + "p90": 61.59999966621399, + "p95": 62.72000074386597, + "p99": 75.6160020828247 }, "isolatedSum": { - "p50": 3146.175980567932, - "p90": 3163.264036178589, - "p95": 3168.8640117645264, - "p99": 3184.159994125366 + "p50": 118.65600198507309, + "p90": 123.19999933242798, + "p95": 125.44000148773193, + "p99": 151.2320041656494 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 62.78400123119354, + "p90": 65.21599739789963, + "p95": 66.17599725723267, + "p99": 76.67200267314911 + }, + "combine": { + "p50": 62.78400123119354, + "p90": 65.21599739789963, + "p95": 66.17599725723267, + "p99": 76.67200267314911 + }, + "roundtrip": { + "p50": 62.78400123119354, + "p90": 65.21599739789963, + "p95": 66.17599725723267, + "p99": 76.67200267314911 + }, + "isolatedSum": { + "p50": 125.56800246238708, + "p90": 130.43199479579926, + "p95": 132.35199451446533, + "p99": 153.34400534629822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 71.07199728488922, + "p90": 72.9919970035553, + "p95": 73.63200187683105, + "p99": 87.36000210046768 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 72.9919970035553, + "p95": 73.63200187683105, + "p99": 87.36000210046768 + }, + "roundtrip": { + "p50": 71.07199728488922, + "p90": 72.9919970035553, + "p95": 73.63200187683105, + "p99": 87.36000210046768 + }, + "isolatedSum": { + "p50": 142.14399456977844, + "p90": 145.9839940071106, + "p95": 147.2640037536621, + "p99": 174.72000420093536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-05271e8a", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", - "colorKey": "h100_16047c28", - "comparisonKey": "987d0ef30063bb5c", + "id": "cx-4e6a4685", + "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_5ec8473f", + "comparisonKey": "0f567db5f9c07223", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:32.762651+00:00", + "generatedAt": "2026-06-27T17:26:58.425220+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", + "label": "B300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -31094,12 +31739,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -31109,178 +31754,364 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "9e6ac678a09f7f8", - "workloadId": "set:3:2dad1a73ff872905", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271791847", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271791847", - "createdAt": "2026-06-26T23:53:43Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28296434249", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296434249", + "createdAt": "2026-06-27T17:26:58.425220+00:00", + "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 141.9840008020401, - "p90": 148.15999567508698, - "p95": 150.43200552463531, - "p99": 159.71200168132782 + "p50": 48.576001077890396, + "p90": 49.855999648571014, + "p95": 50.81599950790405, + "p99": 64.4799992442131 }, "combine": { - "p50": 131.77600502967834, - "p90": 138.7840062379837, - "p95": 139.80799913406372, - "p99": 147.07200229167938 + "p50": 48.576001077890396, + "p90": 49.855999648571014, + "p95": 50.81599950790405, + "p99": 64.4799992442131 }, "roundtrip": { - "p50": 243.1039959192276, - "p90": 250.71999430656433, - "p95": 252.03201174736023, - "p99": 257.9840123653412 + "p50": 48.576001077890396, + "p90": 49.855999648571014, + "p95": 50.81599950790405, + "p99": 64.4799992442131 }, "isolatedSum": { - "p50": 273.76000583171844, - "p90": 286.9440019130707, - "p95": 290.24000465869904, - "p99": 306.7840039730072 + "p50": 97.15200215578079, + "p90": 99.71199929714203, + "p95": 101.6319990158081, + "p99": 128.9599984884262 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 282.20799565315247, - "p90": 291.04000329971313, - "p95": 293.3439910411835, - "p99": 299.3920147418976 + "p50": 49.44000020623207, + "p90": 50.912000238895416, + "p95": 51.552001386880875, + "p99": 56.543998420238495 }, "combine": { - "p50": 282.71999955177307, - "p90": 287.4560058116913, - "p95": 288.9600098133087, - "p99": 297.5040078163147 + "p50": 49.44000020623207, + "p90": 50.912000238895416, + "p95": 51.552001386880875, + "p99": 56.543998420238495 }, "roundtrip": { - "p50": 530.239999294281, - "p90": 536.9600057601929, - "p95": 540.0320291519165, - "p99": 549.3119955062866 + "p50": 49.44000020623207, + "p90": 50.912000238895416, + "p95": 51.552001386880875, + "p99": 56.543998420238495 }, "isolatedSum": { - "p50": 564.9279952049255, - "p90": 578.4960091114044, - "p95": 582.3040008544922, - "p99": 596.8960225582123 + "p50": 98.88000041246414, + "p90": 101.82400047779083, + "p95": 103.10400277376175, + "p99": 113.08799684047699 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 815.7439827919006, - "p90": 825.2800107002258, - "p95": 828.5760283470154, - "p99": 835.0080251693726 + "p50": 51.552001386880875, + "p90": 53.888000547885895, + "p95": 54.976001381874084, + "p99": 66.72000139951706 }, "combine": { - "p50": 857.9840064048767, - "p90": 866.27197265625, - "p95": 869.6320056915283, - "p99": 877.8560161590576 + "p50": 51.552001386880875, + "p90": 53.888000547885895, + "p95": 54.976001381874084, + "p99": 66.72000139951706 }, "roundtrip": { - "p50": 1642.5280570983887, - "p90": 1654.5920372009277, - "p95": 1658.944010734558, - "p99": 1692.7039623260498 + "p50": 51.552001386880875, + "p90": 53.888000547885895, + "p95": 54.976001381874084, + "p99": 66.72000139951706 }, "isolatedSum": { - "p50": 1673.7279891967773, - "p90": 1691.5519833564758, - "p95": 1698.2080340385437, - "p99": 1712.8640413284302 + "p50": 103.10400277376175, + "p90": 107.77600109577179, + "p95": 109.95200276374817, + "p99": 133.44000279903412 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.7439991235733, + "p90": 54.43200096487999, + "p95": 55.07199838757515, + "p99": 61.664000153541565 + }, + "combine": { + "p50": 51.7439991235733, + "p90": 54.43200096487999, + "p95": 55.07199838757515, + "p99": 61.664000153541565 + }, + "roundtrip": { + "p50": 51.7439991235733, + "p90": 54.43200096487999, + "p95": 55.07199838757515, + "p99": 61.664000153541565 + }, + "isolatedSum": { + "p50": 103.4879982471466, + "p90": 108.86400192975998, + "p95": 110.1439967751503, + "p99": 123.32800030708313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 54.207999259233475, + "p90": 56.0000017285347, + "p95": 56.703999638557434, + "p99": 63.13599646091461 + }, + "combine": { + "p50": 54.207999259233475, + "p90": 56.0000017285347, + "p95": 56.703999638557434, + "p99": 63.13599646091461 + }, + "roundtrip": { + "p50": 54.207999259233475, + "p90": 56.0000017285347, + "p95": 56.703999638557434, + "p99": 63.13599646091461 + }, + "isolatedSum": { + "p50": 108.41599851846695, + "p90": 112.0000034570694, + "p95": 113.40799927711487, + "p99": 126.27199292182922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 54.976001381874084, + "p90": 57.312000542879105, + "p95": 58.04799869656563, + "p99": 60.7680007815361 + }, + "combine": { + "p50": 54.976001381874084, + "p90": 57.312000542879105, + "p95": 58.04799869656563, + "p99": 60.7680007815361 + }, + "roundtrip": { + "p50": 54.976001381874084, + "p90": 57.312000542879105, + "p95": 58.04799869656563, + "p99": 60.7680007815361 + }, + "isolatedSum": { + "p50": 109.95200276374817, + "p90": 114.62400108575821, + "p95": 116.09599739313126, + "p99": 121.5360015630722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.29600074887276, + "p90": 61.47199869155884, + "p95": 62.912002205848694, + "p99": 81.31200075149536 + }, + "combine": { + "p50": 59.29600074887276, + "p90": 61.47199869155884, + "p95": 62.912002205848694, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 59.29600074887276, + "p90": 61.47199869155884, + "p95": 62.912002205848694, + "p99": 81.31200075149536 + }, + "isolatedSum": { + "p50": 118.59200149774551, + "p90": 122.94399738311768, + "p95": 125.82400441169739, + "p99": 162.62400150299072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.2799985408783, + "p90": 71.00799679756165, + "p95": 71.29599899053574, + "p99": 78.97599786520004 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 71.00799679756165, + "p95": 71.29599899053574, + "p99": 78.97599786520004 + }, + "roundtrip": { + "p50": 69.2799985408783, + "p90": 71.00799679756165, + "p95": 71.29599899053574, + "p99": 78.97599786520004 + }, + "isolatedSum": { + "p50": 138.5599970817566, + "p90": 142.0159935951233, + "p95": 142.59199798107147, + "p99": 157.95199573040009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-06b4b084", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", - "colorKey": "h100_0c515f8b", - "comparisonKey": "e2c5b47e428e10b6", + "id": "cx-c4d51897", + "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|nvfp4|none|0|tuned||ac583971f94b176", + "colorKey": "b300_5ec8473f", + "comparisonKey": "fcbe4c54041214ff", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:50.950252+00:00", + "generatedAt": "2026-06-28T06:30:40.335883+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "label": "B300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", - "combineQuantMode": "none" + "combineQuantMode": "nvfp4" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -31290,420 +32121,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "7aa44c7b86748b9", - "workloadId": "set:3:388ff74baef05c72", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271798809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271798809", - "createdAt": "2026-06-26T23:53:57Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28313781903", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28313781903", + "createdAt": "2026-06-28T06:30:40.335883+00:00", + "sha": "0e61ac1009cdb939b811e283f71ad6306241d3dd" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 102.52799838781357, - "p90": 106.52799904346466, - "p95": 108.31999778747559, - "p99": 112.44799941778183 + "p50": 55.64799904823303, + "p90": 57.88800120353699, + "p95": 60.127999633550644, + "p99": 76.86399668455124 }, "combine": { - "p50": 81.31200075149536, - "p90": 88.128000497818, - "p95": 88.48000317811966, - "p99": 90.4960036277771 - }, - "roundtrip": { - "p50": 155.32800555229187, - "p90": 160.92799603939056, - "p95": 161.79199516773224, - "p99": 165.40800034999847 - }, - "isolatedSum": { - "p50": 183.83999913930893, - "p90": 194.65599954128265, - "p95": 196.80000096559525, - "p99": 202.94400304555893 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 124.95999783277512, - "p90": 130.36799430847168, - "p95": 131.9040060043335, - "p99": 142.17600226402283 - }, - "combine": { - "p50": 128.7039965391159, - "p90": 130.43199479579926, - "p95": 136.80000603199005, - "p99": 147.67999947071075 + "p50": 55.64799904823303, + "p90": 57.88800120353699, + "p95": 60.127999633550644, + "p99": 76.86399668455124 }, "roundtrip": { - "p50": 216.25599265098572, - "p90": 220.57600319385529, - "p95": 223.4880030155182, - "p99": 267.8399980068207 + "p50": 55.64799904823303, + "p90": 57.88800120353699, + "p95": 60.127999633550644, + "p99": 76.86399668455124 }, "isolatedSum": { - "p50": 253.66399437189102, - "p90": 260.79998910427094, - "p95": 268.70401203632355, - "p99": 289.8560017347336 + "p50": 111.29599809646606, + "p90": 115.77600240707397, + "p95": 120.25599926710129, + "p99": 153.72799336910248 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 1, - "recvTokensMax": 512, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 205.6639939546585, - "p90": 211.13599836826324, - "p95": 216.48000180721283, - "p99": 269.1200077533722 + "p50": 56.51199817657471, + "p90": 58.52799862623215, + "p95": 59.61599946022034, + "p99": 66.3679987192154 }, "combine": { - "p50": 295.80798745155334, - "p90": 300.54399371147156, - "p95": 305.2160143852234, - "p99": 337.3439908027649 + "p50": 56.51199817657471, + "p90": 58.52799862623215, + "p95": 59.61599946022034, + "p99": 66.3679987192154 }, "roundtrip": { - "p50": 464.4800126552582, - "p90": 471.45599126815796, - "p95": 474.047988653183, - "p99": 503.35997343063354 + "p50": 56.51199817657471, + "p90": 58.52799862623215, + "p95": 59.61599946022034, + "p99": 66.3679987192154 }, "isolatedSum": { - "p50": 501.47198140621185, - "p90": 511.6799920797348, - "p95": 521.6960161924362, - "p99": 606.4639985561371 + "p50": 113.02399635314941, + "p90": 117.0559972524643, + "p95": 119.23199892044067, + "p99": 132.7359974384308 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 1, - "recvTokensMax": 2048, - "stragglerRank": 4, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-4058f6f5", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", - "colorKey": "h100_c0c0ad86", - "comparisonKey": "252e0af9287be53d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:35.979250+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced+eplb", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "df54a9510825f71", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271942138", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271942138", - "createdAt": "2026-06-26T23:58:39Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 94.14400160312653, - "p90": 98.01600128412247, - "p95": 99.74399954080582, - "p99": 103.29599678516388 + "p50": 57.5999990105629, + "p90": 59.67999994754791, + "p95": 60.92799827456474, + "p99": 67.1359971165657 }, "combine": { - "p50": 83.03999900817871, - "p90": 88.22400122880936, - "p95": 89.15200084447861, - "p99": 90.81599861383438 + "p50": 57.5999990105629, + "p90": 59.67999994754791, + "p95": 60.92799827456474, + "p99": 67.1359971165657 }, "roundtrip": { - "p50": 157.79200196266174, - "p90": 161.9199961423874, - "p95": 163.5199934244156, - "p99": 167.67999529838562 + "p50": 57.5999990105629, + "p90": 59.67999994754791, + "p95": 60.92799827456474, + "p99": 67.1359971165657 }, "isolatedSum": { - "p50": 177.18400061130524, - "p90": 186.24000251293182, - "p95": 188.89600038528442, - "p99": 194.11199539899826 + "p50": 115.1999980211258, + "p90": 119.35999989509583, + "p95": 121.85599654912949, + "p99": 134.2719942331314 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 5, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 122.40000069141388, - "p90": 129.95199859142303, - "p95": 143.10400187969208, - "p99": 173.95199835300446 + "p50": 57.82400071620941, + "p90": 59.87200140953064, + "p95": 60.7680007815361, + "p99": 70.0799971818924 }, "combine": { - "p50": 104.41599786281586, - "p90": 106.65600001811981, - "p95": 120.51200121641159, - "p99": 144.28800344467163 + "p50": 57.82400071620941, + "p90": 59.87200140953064, + "p95": 60.7680007815361, + "p99": 70.0799971818924 }, "roundtrip": { - "p50": 198.43199849128723, - "p90": 202.36800611019135, - "p95": 205.1839977502823, - "p99": 235.32800376415253 + "p50": 57.82400071620941, + "p90": 59.87200140953064, + "p95": 60.7680007815361, + "p99": 70.0799971818924 }, "isolatedSum": { - "p50": 226.81599855422974, - "p90": 236.60799860954285, - "p95": 263.61600309610367, - "p99": 318.2400017976761 + "p50": 115.64800143241882, + "p90": 119.74400281906128, + "p95": 121.5360015630722, + "p99": 140.1599943637848 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 2, - "recvTokensMax": 768, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 150.43200552463531, - "p90": 154.7199934720993, - "p95": 158.4320068359375, - "p99": 386.1120045185089 + "p50": 59.39200147986412, + "p90": 61.63199990987778, + "p95": 63.35999816656113, + "p99": 71.48800045251846 }, "combine": { - "p50": 141.15199446678162, - "p90": 145.91999351978302, - "p95": 146.55999839305878, - "p99": 147.5200057029724 + "p50": 59.39200147986412, + "p90": 61.63199990987778, + "p95": 63.35999816656113, + "p99": 71.48800045251846 }, "roundtrip": { - "p50": 266.1440074443817, - "p90": 274.9119997024536, - "p95": 278.3679962158203, - "p99": 286.9440019130707 + "p50": 59.39200147986412, + "p90": 61.63199990987778, + "p95": 63.35999816656113, + "p99": 71.48800045251846 }, "isolatedSum": { - "p50": 291.58399999141693, - "p90": 300.6399869918823, - "p95": 304.9920052289963, - "p99": 533.6320102214813 + "p50": 118.78400295972824, + "p90": 123.26399981975555, + "p95": 126.71999633312225, + "p99": 142.97600090503693 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 2, - "recvTokensMax": 1536, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 207.61600136756897, - "p90": 213.44000101089478, - "p95": 217.98400580883026, - "p99": 245.5040067434311 + "p50": 60.35200133919716, + "p90": 62.463998794555664, + "p95": 64.19199705123901, + "p99": 72.35199958086014 }, "combine": { - "p50": 219.93599832057953, - "p90": 225.0880002975464, - "p95": 227.2000014781952, - "p99": 244.86400187015533 + "p50": 60.35200133919716, + "p90": 62.463998794555664, + "p95": 64.19199705123901, + "p99": 72.35199958086014 }, "roundtrip": { - "p50": 405.023992061615, - "p90": 410.0480079650879, - "p95": 412.31998801231384, - "p99": 437.6640021800995 + "p50": 60.35200133919716, + "p90": 62.463998794555664, + "p95": 64.19199705123901, + "p99": 72.35199958086014 }, "isolatedSum": { - "p50": 427.5519996881485, - "p90": 438.52800130844116, - "p95": 445.18400728702545, - "p99": 490.3680086135864 + "p50": 120.70400267839432, + "p90": 124.92799758911133, + "p95": 128.38399410247803, + "p99": 144.70399916172028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 2, - "recvTokensMax": 3072, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 329.47200536727905, - "p90": 336.60799264907837, - "p95": 339.04001116752625, - "p99": 460.4159891605377 + "p50": 62.97600269317627, + "p90": 65.76000154018402, + "p95": 66.14399701356888, + "p99": 75.03999769687653 }, "combine": { - "p50": 368.3199882507324, - "p90": 375.2639889717102, - "p95": 377.6960074901581, - "p99": 383.07198882102966 + "p50": 62.97600269317627, + "p90": 65.76000154018402, + "p95": 66.14399701356888, + "p99": 75.03999769687653 }, "roundtrip": { - "p50": 670.0159907341003, - "p90": 675.8400201797485, - "p95": 678.3360242843628, - "p99": 682.3359727859497 + "p50": 62.97600269317627, + "p90": 65.76000154018402, + "p95": 66.14399701356888, + "p99": 75.03999769687653 }, "isolatedSum": { - "p50": 697.7919936180115, - "p90": 711.8719816207886, - "p95": 716.7360186576843, - "p99": 843.4879779815674 + "p50": 125.95200538635254, + "p90": 131.52000308036804, + "p95": 132.28799402713776, + "p99": 150.07999539375305 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 2, - "recvTokensMax": 6144, - "stragglerRank": 7, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 580.8960199356079, - "p90": 590.1119709014893, - "p95": 592.6079750061035, - "p99": 597.5040197372437 + "p50": 70.8480030298233, + "p90": 72.92799651622772, + "p95": 74.07999783754349, + "p99": 86.27200126647949 }, "combine": { - "p50": 647.9039788246155, - "p90": 655.0719738006592, - "p95": 657.2480201721191, - "p99": 660.863995552063 + "p50": 70.8480030298233, + "p90": 72.92799651622772, + "p95": 74.07999783754349, + "p99": 86.27200126647949 }, "roundtrip": { - "p50": 1207.4559926986694, - "p90": 1217.087984085083, - "p95": 1224.0639925003052, - "p99": 1241.312026977539 + "p50": 70.8480030298233, + "p90": 72.92799651622772, + "p95": 74.07999783754349, + "p99": 86.27200126647949 }, "isolatedSum": { - "p50": 1228.7999987602234, - "p90": 1245.1839447021484, - "p95": 1249.8559951782227, - "p99": 1258.3680152893066 + "p50": 141.6960060596466, + "p90": 145.85599303245544, + "p95": 148.15999567508698, + "p99": 172.54400253295898 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 2, - "recvTokensMax": 12288, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31711,49 +32435,50 @@ ] }, { - "id": "cx-b89c63a5", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", - "colorKey": "h100_b654f9b2", - "comparisonKey": "37db9a5137981152", + "id": "cx-30070070", + "identity": "b300|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_a52edb56", + "comparisonKey": "46230412bf8dc722", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:36.358305+00:00", + "generatedAt": "2026-06-28T01:38:11.748195+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "label": "B300 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -31763,178 +32488,364 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "38fd0bcf7109c32", - "workloadId": "set:3:b952d4a43d688b50", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271820121", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271820121", - "createdAt": "2026-06-26T23:54:38Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307775342", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307775342", + "createdAt": "2026-06-28T01:38:11.748195+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 123.61600250005722, - "p90": 127.48800218105316, - "p95": 131.1040073633194, - "p99": 136.19199395179749 + "p50": 50.49600079655647, + "p90": 52.000001072883606, + "p95": 53.21599915623665, + "p99": 66.3359984755516 }, "combine": { - "p50": 116.95999652147293, - "p90": 122.46400117874146, - "p95": 124.95999783277512, - "p99": 131.26400113105774 + "p50": 50.49600079655647, + "p90": 52.000001072883606, + "p95": 53.21599915623665, + "p99": 66.3359984755516 }, "roundtrip": { - "p50": 217.72800385951996, - "p90": 224.89599883556366, - "p95": 229.24800217151642, - "p99": 245.37600576877594 + "p50": 50.49600079655647, + "p90": 52.000001072883606, + "p95": 53.21599915623665, + "p99": 66.3359984755516 }, "isolatedSum": { - "p50": 240.57599902153015, - "p90": 249.95200335979462, - "p95": 256.0640051960945, - "p99": 267.4559950828552 + "p50": 100.99200159311295, + "p90": 104.00000214576721, + "p95": 106.4319983124733, + "p99": 132.6719969511032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 238.91200125217438, - "p90": 257.24801421165466, - "p95": 259.2960000038147, - "p99": 261.9520127773285 + "p50": 53.21599915623665, + "p90": 55.23199960589409, + "p95": 56.12799897789955, + "p99": 60.32000109553337 }, "combine": { - "p50": 271.93599939346313, - "p90": 282.1759879589081, - "p95": 284.8320007324219, - "p99": 288.5119915008545 + "p50": 53.21599915623665, + "p90": 55.23199960589409, + "p95": 56.12799897789955, + "p99": 60.32000109553337 }, "roundtrip": { - "p50": 486.04801297187805, - "p90": 500.8959770202637, - "p95": 503.55201959609985, - "p99": 509.2160105705261 + "p50": 53.21599915623665, + "p90": 55.23199960589409, + "p95": 56.12799897789955, + "p99": 60.32000109553337 }, "isolatedSum": { - "p50": 510.8480006456375, - "p90": 539.4240021705627, - "p95": 544.1280007362366, - "p99": 550.464004278183 + "p50": 106.4319983124733, + "p90": 110.46399921178818, + "p95": 112.2559979557991, + "p99": 120.64000219106674 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 718.3039784431458, - "p90": 732.3840260505676, - "p95": 736.3520264625549, - "p99": 740.4159903526306 + "p50": 54.976001381874084, + "p90": 57.40800127387047, + "p95": 59.20000001788139, + "p99": 64.96000289916992 }, "combine": { - "p50": 829.9520015716553, - "p90": 838.047981262207, - "p95": 840.2559757232666, - "p99": 846.6879725456238 + "p50": 54.976001381874084, + "p90": 57.40800127387047, + "p95": 59.20000001788139, + "p99": 64.96000289916992 }, "roundtrip": { - "p50": 1516.2559747695923, - "p90": 1525.3759622573853, - "p95": 1528.223991394043, - "p99": 1535.2319478988647 + "p50": 54.976001381874084, + "p90": 57.40800127387047, + "p95": 59.20000001788139, + "p99": 64.96000289916992 }, "isolatedSum": { - "p50": 1548.255980014801, - "p90": 1570.4320073127747, - "p95": 1576.6080021858215, - "p99": 1587.1039628982544 + "p50": 109.95200276374817, + "p90": 114.81600254774094, + "p95": 118.40000003576279, + "p99": 129.92000579833984 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 54.9440011382103, + "p90": 58.111999183893204, + "p95": 59.20000001788139, + "p99": 68.86400282382965 + }, + "combine": { + "p50": 54.9440011382103, + "p90": 58.111999183893204, + "p95": 59.20000001788139, + "p99": 68.86400282382965 + }, + "roundtrip": { + "p50": 54.9440011382103, + "p90": 58.111999183893204, + "p95": 59.20000001788139, + "p99": 68.86400282382965 + }, + "isolatedSum": { + "p50": 109.8880022764206, + "p90": 116.22399836778641, + "p95": 118.40000003576279, + "p99": 137.7280056476593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 56.0000017285347, + "p90": 58.33600088953972, + "p95": 59.167999774217606, + "p99": 64.15999680757523 + }, + "combine": { + "p50": 56.0000017285347, + "p90": 58.33600088953972, + "p95": 59.167999774217606, + "p99": 64.15999680757523 + }, + "roundtrip": { + "p50": 56.0000017285347, + "p90": 58.33600088953972, + "p95": 59.167999774217606, + "p99": 64.15999680757523 + }, + "isolatedSum": { + "p50": 112.0000034570694, + "p90": 116.67200177907944, + "p95": 118.33599954843521, + "p99": 128.31999361515045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 56.76800012588501, + "p90": 58.94400179386139, + "p95": 60.15999987721443, + "p99": 69.69600170850754 + }, + "combine": { + "p50": 56.76800012588501, + "p90": 58.94400179386139, + "p95": 60.15999987721443, + "p99": 69.69600170850754 + }, + "roundtrip": { + "p50": 56.76800012588501, + "p90": 58.94400179386139, + "p95": 60.15999987721443, + "p99": 69.69600170850754 + }, + "isolatedSum": { + "p50": 113.53600025177002, + "p90": 117.88800358772278, + "p95": 120.31999975442886, + "p99": 139.39200341701508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 58.52799862623215, + "p90": 61.24800071120262, + "p95": 62.65600025653839, + "p99": 74.49600100517273 + }, + "combine": { + "p50": 58.52799862623215, + "p90": 61.24800071120262, + "p95": 62.65600025653839, + "p99": 74.49600100517273 + }, + "roundtrip": { + "p50": 58.52799862623215, + "p90": 61.24800071120262, + "p95": 62.65600025653839, + "p99": 74.49600100517273 + }, + "isolatedSum": { + "p50": 117.0559972524643, + "p90": 122.49600142240524, + "p95": 125.31200051307678, + "p99": 148.99200201034546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 65.08799642324448, + "p90": 67.26399809122086, + "p95": 67.87200272083282, + "p99": 74.43200051784515 + }, + "combine": { + "p50": 65.08799642324448, + "p90": 67.26399809122086, + "p95": 67.87200272083282, + "p99": 74.43200051784515 + }, + "roundtrip": { + "p50": 65.08799642324448, + "p90": 67.26399809122086, + "p95": 67.87200272083282, + "p99": 74.43200051784515 + }, + "isolatedSum": { + "p50": 130.17599284648895, + "p90": 134.5279961824417, + "p95": 135.74400544166565, + "p99": 148.8640010356903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-fa73d33e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", - "colorKey": "h100_b654f9b2", - "comparisonKey": "37db9a5137981152", + "id": "cx-9a73b5f5", + "identity": "b300|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_6af1abcd", + "comparisonKey": "227468e11845c947", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:55.460957+00:00", + "generatedAt": "2026-06-28T01:38:16.371741+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_06", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "label": "B300 EP8 · flashinfer · mxfp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "mxfp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -31944,238 +32855,312 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "bfbb64a166e9f1c", - "workloadId": "set:6:b952d4a43d688b50", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272012738", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272012738", - "createdAt": "2026-06-27T00:00:49Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307776684", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307776684", + "createdAt": "2026-06-28T01:38:16.371741+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 129.08799946308136, - "p90": 131.42399489879608, - "p95": 132.03200697898865, - "p99": 135.903999209404 + "p50": 50.464000552892685, + "p90": 52.2879995405674, + "p95": 53.21599915623665, + "p99": 66.81600213050842 }, "combine": { - "p50": 119.87199634313583, - "p90": 121.98399752378464, - "p95": 122.36800044775009, - "p99": 125.72799623012543 + "p50": 50.464000552892685, + "p90": 52.2879995405674, + "p95": 53.21599915623665, + "p99": 66.81600213050842 }, "roundtrip": { - "p50": 219.200000166893, - "p90": 223.80800545215607, - "p95": 224.7679978609085, - "p99": 228.0000001192093 + "p50": 50.464000552892685, + "p90": 52.2879995405674, + "p95": 53.21599915623665, + "p99": 66.81600213050842 }, "isolatedSum": { - "p50": 248.9599958062172, - "p90": 253.40799242258072, - "p95": 254.40000742673874, - "p99": 261.6319954395294 + "p50": 100.92800110578537, + "p90": 104.5759990811348, + "p95": 106.4319983124733, + "p99": 133.63200426101685 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 163.96799683570862, - "p90": 168.19199919700623, - "p95": 170.43200135231018, - "p99": 173.12000691890717 + "p50": 51.90400034189224, + "p90": 53.568001836538315, + "p95": 54.46400120854378, + "p99": 58.36800113320351 }, "combine": { - "p50": 171.55200242996216, - "p90": 176.83200538158417, - "p95": 178.3680021762848, - "p99": 180.60800433158875 + "p50": 51.90400034189224, + "p90": 53.568001836538315, + "p95": 54.46400120854378, + "p99": 58.36800113320351 }, "roundtrip": { - "p50": 306.7840039730072, - "p90": 310.94399094581604, - "p95": 312.3199939727783, - "p99": 314.7839903831482 + "p50": 51.90400034189224, + "p90": 53.568001836538315, + "p95": 54.46400120854378, + "p99": 58.36800113320351 }, "isolatedSum": { - "p50": 335.5199992656708, - "p90": 345.0240045785904, - "p95": 348.80000352859497, - "p99": 353.7280112504959 + "p50": 103.80800068378448, + "p90": 107.13600367307663, + "p95": 108.92800241708755, + "p99": 116.73600226640701 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156090368, - "combineLogicalBytes": 156090368, - "fanoutMean": 5.31640625, - "recvTokensMax": 2048, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 237.34399676322937, - "p90": 242.11199581623077, - "p95": 244.1920042037964, - "p99": 248.28800559043884 + "p50": 53.63199859857559, + "p90": 55.39200082421303, + "p95": 56.28800019621849, + "p99": 68.80000233650208 }, "combine": { - "p50": 268.22400093078613, - "p90": 273.53599667549133, - "p95": 274.84801411628723, - "p99": 277.69601345062256 + "p50": 53.63199859857559, + "p90": 55.39200082421303, + "p95": 56.28800019621849, + "p99": 68.80000233650208 }, "roundtrip": { - "p50": 482.7519953250885, - "p90": 488.44799399375916, - "p95": 490.4319941997528, - "p99": 495.07200717926025 + "p50": 53.63199859857559, + "p90": 55.39200082421303, + "p95": 56.28800019621849, + "p99": 68.80000233650208 }, "isolatedSum": { - "p50": 505.5679976940155, - "p90": 515.6479924917221, - "p95": 519.0400183200836, - "p99": 525.9840190410614 + "p50": 107.26399719715118, + "p90": 110.78400164842606, + "p95": 112.57600039243698, + "p99": 137.60000467300415 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 53.727999329566956, + "p90": 55.776000022888184, + "p95": 56.89600110054016, + "p99": 65.63200056552887 + }, + "combine": { + "p50": 53.727999329566956, + "p90": 55.776000022888184, + "p95": 56.89600110054016, + "p99": 65.63200056552887 + }, + "roundtrip": { + "p50": 53.727999329566956, + "p90": 55.776000022888184, + "p95": 56.89600110054016, + "p99": 65.63200056552887 + }, + "isolatedSum": { + "p50": 107.45599865913391, + "p90": 111.55200004577637, + "p95": 113.79200220108032, + "p99": 131.26400113105774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 391.61598682403564, - "p90": 397.5679874420166, - "p95": 399.9040126800537, - "p99": 407.1039855480194 + "p50": 55.135998874902725, + "p90": 56.92800134420395, + "p95": 57.920001447200775, + "p99": 66.3359984755516 }, "combine": { - "p50": 455.6480050086975, - "p90": 461.5359902381897, - "p95": 463.0720019340515, - "p99": 466.5600061416626 + "p50": 55.135998874902725, + "p90": 56.92800134420395, + "p95": 57.920001447200775, + "p99": 66.3359984755516 }, "roundtrip": { - "p50": 823.2960104942322, - "p90": 829.5040130615234, - "p95": 831.5839767456055, - "p99": 835.4560136795044 + "p50": 55.135998874902725, + "p90": 56.92800134420395, + "p95": 57.920001447200775, + "p99": 66.3359984755516 }, "isolatedSum": { - "p50": 847.2639918327332, - "p90": 859.1039776802063, - "p95": 862.9760146141052, - "p99": 873.663991689682 + "p50": 110.27199774980545, + "p90": 113.8560026884079, + "p95": 115.84000289440155, + "p99": 132.6719969511032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620648448, - "combineLogicalBytes": 620648448, - "fanoutMean": 5.2847900390625, - "recvTokensMax": 8192, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 720.6720113754272, - "p90": 733.6320281028748, - "p95": 737.5680208206177, - "p99": 744.9280023574829 + "p50": 55.55199831724167, + "p90": 57.37600103020668, + "p95": 58.04799869656563, + "p99": 64.51199948787689 }, "combine": { - "p50": 825.7279992103577, - "p90": 834.559977054596, - "p95": 837.3759984970093, - "p99": 841.2479758262634 + "p50": 55.55199831724167, + "p90": 57.37600103020668, + "p95": 58.04799869656563, + "p99": 64.51199948787689 }, "roundtrip": { - "p50": 1514.240026473999, - "p90": 1523.7120389938354, - "p95": 1526.6239643096924, - "p99": 1534.3999862670898 + "p50": 55.55199831724167, + "p90": 57.37600103020668, + "p95": 58.04799869656563, + "p99": 64.51199948787689 }, "isolatedSum": { - "p50": 1546.400010585785, - "p90": 1568.1920051574707, - "p95": 1574.944019317627, - "p99": 1586.1759781837463 + "p50": 111.10399663448334, + "p90": 114.75200206041336, + "p95": 116.09599739313126, + "p99": 129.02399897575378 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 5, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1379.1359663009644, - "p90": 1390.1439905166626, - "p95": 1393.280029296875, - "p99": 1400.480031967163 + "p50": 57.40800127387047, + "p90": 59.51999872922897, + "p95": 60.22400036454201, + "p99": 71.35999947786331 }, "combine": { - "p50": 1540.5759811401367, - "p90": 1547.4879741668701, - "p95": 1549.7599840164185, - "p99": 1553.1519651412964 + "p50": 57.40800127387047, + "p90": 59.51999872922897, + "p95": 60.22400036454201, + "p99": 71.35999947786331 }, "roundtrip": { - "p50": 2893.3119773864746, - "p90": 2902.30393409729, - "p95": 2905.695915222168, - "p99": 2912.480115890503 + "p50": 57.40800127387047, + "p90": 59.51999872922897, + "p95": 60.22400036454201, + "p99": 71.35999947786331 }, "isolatedSum": { - "p50": 2919.711947441101, - "p90": 2937.6319646835327, - "p95": 2943.0400133132935, - "p99": 2953.6319971084595 + "p50": 114.81600254774094, + "p90": 119.03999745845795, + "p95": 120.44800072908401, + "p99": 142.71999895572662 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2484242432, - "combineLogicalBytes": 2484242432, - "fanoutMean": 5.288299560546875, - "recvTokensMax": 32768, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 63.840001821517944, + "p90": 65.69600105285645, + "p95": 66.84800237417221, + "p99": 73.79200309515 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 65.69600105285645, + "p95": 66.84800237417221, + "p99": 73.79200309515 + }, + "roundtrip": { + "p50": 63.840001821517944, + "p90": 65.69600105285645, + "p95": 66.84800237417221, + "p99": 73.79200309515 + }, + "isolatedSum": { + "p50": 127.68000364303589, + "p90": 131.3920021057129, + "p95": 133.69600474834442, + "p99": 147.5840061903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -32184,49 +33169,50 @@ ] }, { - "id": "cx-e91dfe75", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", - "colorKey": "h100_456a963c", - "comparisonKey": "54b53207b090a644", + "id": "cx-1cb033e4", + "identity": "b300|flashinfer|7168|8|256|nvfp4|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_f0247ae6", + "comparisonKey": "0025025816a64ee6", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:57.841646+00:00", + "generatedAt": "2026-06-28T01:38:24.335990+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_11", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "label": "B300 EP8 · flashinfer · nvfp4", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "nvfp4", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -32236,239 +33222,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "29ae5ace13636f8", - "workloadId": "set:6:b952d4a43d688b50", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.8466796875, - "eplbImbalanceAfter": 1.0002700343276514, - "backendVersion": "1.2.1", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272016505", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272016505", - "createdAt": "2026-06-27T00:00:56Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307777849", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307777849", + "createdAt": "2026-06-28T01:38:24.335990+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 109.69600081443787, - "p90": 113.98400366306305, - "p95": 115.77600240707397, - "p99": 122.43200093507767 + "p50": 1746.3040351867676, + "p90": 1957.1199417114258, + "p95": 2725.9199619293213, + "p99": 3350.719928741455 }, "combine": { - "p50": 105.50399869680405, - "p90": 111.10399663448334, - "p95": 112.31999844312668, - "p99": 114.27199840545654 + "p50": 1746.3040351867676, + "p90": 1957.1199417114258, + "p95": 2725.9199619293213, + "p99": 3350.719928741455 }, "roundtrip": { - "p50": 196.6720074415207, - "p90": 203.2960057258606, - "p95": 204.0960043668747, - "p99": 207.64799416065216 + "p50": 1746.3040351867676, + "p90": 1957.1199417114258, + "p95": 2725.9199619293213, + "p99": 3350.719928741455 }, "isolatedSum": { - "p50": 215.1999995112419, - "p90": 225.0880002975464, - "p95": 228.09600085020065, - "p99": 236.7039993405342 + "p50": 3492.608070373535, + "p90": 3914.2398834228516, + "p95": 5451.839923858643, + "p99": 6701.43985748291 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77701120, - "combineLogicalBytes": 77701120, - "fanoutMean": 5.29296875, - "recvTokensMax": 697, - "stragglerRank": 6, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 144.44799721240997, - "p90": 149.63200688362122, - "p95": 151.2320041656494, - "p99": 155.83999454975128 + "p50": 1747.3280429840088, + "p90": 2064.6719932556152, + "p95": 2780.8001041412354, + "p99": 3415.9998893737793 }, "combine": { - "p50": 152.0639955997467, - "p90": 153.60000729560852, - "p95": 154.4640064239502, - "p99": 158.52800011634827 + "p50": 1747.3280429840088, + "p90": 2064.6719932556152, + "p95": 2780.8001041412354, + "p99": 3415.9998893737793 }, "roundtrip": { - "p50": 265.0560140609741, - "p90": 268.92799139022827, - "p95": 270.687997341156, - "p99": 273.21600914001465 + "p50": 1747.3280429840088, + "p90": 2064.6719932556152, + "p95": 2780.8001041412354, + "p99": 3415.9998893737793 }, "isolatedSum": { - "p50": 296.5119928121567, - "p90": 303.23201417922974, - "p95": 305.6960105895996, - "p99": 314.36799466609955 + "p50": 3494.6560859680176, + "p90": 4129.3439865112305, + "p95": 5561.600208282471, + "p99": 6831.999778747559 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155187200, - "combineLogicalBytes": 155187200, - "fanoutMean": 5.28564453125, - "recvTokensMax": 1372, - "stragglerRank": 6, + "dispatchLogicalBytes": 308224, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 199.64799284934998, - "p90": 203.45599949359894, - "p95": 204.79999482631683, - "p99": 208.76799523830414 + "p50": 1755.903959274292, + "p90": 1934.3680143356323, + "p95": 2666.5918827056885, + "p99": 3387.00795173645 }, "combine": { - "p50": 228.5120040178299, - "p90": 234.23999547958374, - "p95": 235.167995095253, - "p99": 236.95999383926392 + "p50": 1755.903959274292, + "p90": 1934.3680143356323, + "p95": 2666.5918827056885, + "p99": 3387.00795173645 }, "roundtrip": { - "p50": 403.80799770355225, - "p90": 408.35198760032654, - "p95": 410.0799858570099, - "p99": 413.88800740242004 + "p50": 1755.903959274292, + "p90": 1934.3680143356323, + "p95": 2666.5918827056885, + "p99": 3387.00795173645 }, "isolatedSum": { - "p50": 428.15999686717987, - "p90": 437.6959949731827, - "p95": 439.9679899215698, - "p99": 445.72798907756805 + "p50": 3511.807918548584, + "p90": 3868.7360286712646, + "p95": 5333.183765411377, + "p99": 6774.0159034729 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311162880, - "combineLogicalBytes": 311162880, - "fanoutMean": 5.299072265625, - "recvTokensMax": 2761, - "stragglerRank": 6, + "dispatchLogicalBytes": 620032, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 305.4080009460449, - "p90": 310.016006231308, - "p95": 311.7760121822357, - "p99": 316.76799058914185 + "p50": 1760.9599828720093, + "p90": 2005.1519870758057, + "p95": 2768.415927886963, + "p99": 3292.3200130462646 }, "combine": { - "p50": 367.19998717308044, - "p90": 374.0159869194031, - "p95": 375.5199909210205, - "p99": 379.2960047721863 + "p50": 1760.9599828720093, + "p90": 2005.1519870758057, + "p95": 2768.415927886963, + "p99": 3292.3200130462646 }, "roundtrip": { - "p50": 649.1199731826782, - "p90": 655.6479930877686, - "p95": 658.4640145301819, - "p99": 661.9840264320374 + "p50": 1760.9599828720093, + "p90": 2005.1519870758057, + "p95": 2768.415927886963, + "p99": 3292.3200130462646 }, "isolatedSum": { - "p50": 672.6079881191254, - "p90": 684.0319931507111, - "p95": 687.2960031032562, - "p99": 696.0639953613281 + "p50": 3521.9199657440186, + "p90": 4010.3039741516113, + "p95": 5536.831855773926, + "p99": 6584.640026092529 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619974656, - "combineLogicalBytes": 619974656, - "fanoutMean": 5.279052734375, - "recvTokensMax": 5481, + "dispatchLogicalBytes": 1243648, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 528.8640260696411, - "p90": 539.3919944763184, - "p95": 543.8079833984375, - "p99": 805.9520125389099 + "p50": 1761.0559463500977, + "p90": 2489.1200065612793, + "p95": 2836.575984954834, + "p99": 4053.1201362609863 }, "combine": { - "p50": 633.184015750885, - "p90": 640.9919857978821, - "p95": 643.9039707183838, - "p99": 648.5440135002136 + "p50": 1761.0559463500977, + "p90": 2489.1200065612793, + "p95": 2836.575984954834, + "p99": 4053.1201362609863 }, "roundtrip": { - "p50": 1132.032036781311, - "p90": 1143.8720226287842, - "p95": 1147.3920345306396, - "p99": 1154.8160314559937 + "p50": 1761.0559463500977, + "p90": 2489.1200065612793, + "p95": 2836.575984954834, + "p99": 4053.1201362609863 }, "isolatedSum": { - "p50": 1162.0480418205261, - "p90": 1180.3839802742004, - "p95": 1187.7119541168213, - "p99": 1454.4960260391235 + "p50": 3522.1118927001953, + "p90": 4978.240013122559, + "p95": 5673.151969909668, + "p99": 8106.240272521973 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240020992, - "combineLogicalBytes": 1240020992, - "fanoutMean": 5.27935791015625, - "recvTokensMax": 10883, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 985.8880043029785, - "p90": 1005.5680274963379, - "p95": 1010.9119415283203, - "p99": 1020.5440521240234 + "p50": 1755.552053451538, + "p90": 1923.6479997634888, + "p95": 2723.328113555908, + "p99": 3401.18408203125 }, "combine": { - "p50": 1144.1919803619385, - "p90": 1153.92005443573, - "p95": 1157.439947128296, - "p99": 1163.6799573898315 + "p50": 1755.552053451538, + "p90": 1923.6479997634888, + "p95": 2723.328113555908, + "p99": 3401.18408203125 }, "roundtrip": { - "p50": 2094.464063644409, - "p90": 2109.8880767822266, - "p95": 2115.295886993408, - "p99": 2124.5760917663574 + "p50": 1755.552053451538, + "p90": 1923.6479997634888, + "p95": 2723.328113555908, + "p99": 3401.18408203125 }, "isolatedSum": { - "p50": 2130.079984664917, - "p90": 2159.488081932068, - "p95": 2168.351888656616, - "p99": 2184.224009513855 + "p50": 3511.104106903076, + "p90": 3847.2959995269775, + "p95": 5446.656227111816, + "p99": 6802.3681640625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480414720, - "combineLogicalBytes": 2480414720, - "fanoutMean": 5.2801513671875, - "recvTokensMax": 21702, - "stragglerRank": 5, + "dispatchLogicalBytes": 4931584, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 1759.071946144104, + "p90": 2116.3198947906494, + "p95": 2758.9120864868164, + "p99": 3519.9038982391357 + }, + "combine": { + "p50": 1759.071946144104, + "p90": 2116.3198947906494, + "p95": 2758.9120864868164, + "p99": 3519.9038982391357 + }, + "roundtrip": { + "p50": 1759.071946144104, + "p90": 2116.3198947906494, + "p95": 2758.9120864868164, + "p99": 3519.9038982391357 + }, + "isolatedSum": { + "p50": 3518.143892288208, + "p90": 4232.639789581299, + "p95": 5517.824172973633, + "p99": 7039.8077964782715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 1765.4720544815063, + "p90": 2013.4079456329346, + "p95": 2776.8959999084473, + "p99": 3300.992012023926 + }, + "combine": { + "p50": 1765.4720544815063, + "p90": 2013.4079456329346, + "p95": 2776.8959999084473, + "p99": 3300.992012023926 + }, + "roundtrip": { + "p50": 1765.4720544815063, + "p90": 2013.4079456329346, + "p95": 2776.8959999084473, + "p99": 3300.992012023926 + }, + "isolatedSum": { + "p50": 3530.9441089630127, + "p90": 4026.815891265869, + "p95": 5553.7919998168945, + "p99": 6601.984024047852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32476,49 +33536,50 @@ ] }, { - "id": "cx-f8095d72", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", - "colorKey": "h100_fb5b86de", - "comparisonKey": "cd6da73322e03923", + "id": "cx-207d8ef2", + "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_5ec8473f", + "comparisonKey": "01804e6d9a96754e", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:17.404659+00:00", + "generatedAt": "2026-06-27T17:26:59.581224+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", + "runner": "b300-nv_03", + "sku": "b300", + "backend": "flashinfer", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "label": "B300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, + "experts": 256, "routing": "uniform", - "routingLabel": "uniform+eplb", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -32528,54 +33589,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2225dbbdab9bf2d", + "traceSignature": "64d989e2e2a6b31", "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.006072998046875, - "eplbImbalanceAfter": 1.0000152587890625, - "backendVersion": "1.2.1", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271927356", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271927356", - "createdAt": "2026-06-26T23:58:11Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28296434249", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296434249", + "createdAt": "2026-06-27T17:26:59.581224+00:00", + "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 113.02399635314941, - "p90": 118.40000003576279, - "p95": 120.12799829244614, - "p99": 126.0479986667633 + "p50": 69.31199878454208, + "p90": 70.91200351715088, + "p95": 71.58400118350983, + "p99": 75.42400062084198 }, "combine": { - "p50": 105.66399991512299, - "p90": 108.89600217342377, - "p95": 112.06399649381638, - "p99": 115.9679964184761 + "p50": 69.31199878454208, + "p90": 70.91200351715088, + "p95": 71.58400118350983, + "p99": 75.42400062084198 }, "roundtrip": { - "p50": 195.8719938993454, - "p90": 201.24800503253937, - "p95": 202.62399315834045, - "p99": 207.39200711250305 + "p50": 69.31199878454208, + "p90": 70.91200351715088, + "p95": 71.58400118350983, + "p99": 75.42400062084198 }, "isolatedSum": { - "p50": 218.6879962682724, - "p90": 227.29600220918655, - "p95": 232.1919947862625, - "p99": 242.0159950852394 + "p50": 138.62399756908417, + "p90": 141.82400703430176, + "p95": 143.16800236701965, + "p99": 150.84800124168396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77041664, - "combineLogicalBytes": 77041664, - "fanoutMean": 5.248046875, - "recvTokensMax": 686, - "stragglerRank": 6, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32584,35 +33645,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 146.97599411010742, - "p90": 150.91200172901154, - "p95": 151.7760008573532, - "p99": 155.39200603961945 + "p50": 96.22400254011154, + "p90": 98.2080027461052, + "p95": 99.10400211811066, + "p99": 110.75200140476227 }, "combine": { - "p50": 148.3519971370697, - "p90": 153.82400155067444, - "p95": 154.4959992170334, - "p99": 156.67200088500977 + "p50": 96.22400254011154, + "p90": 98.2080027461052, + "p95": 99.10400211811066, + "p99": 110.75200140476227 }, "roundtrip": { - "p50": 265.9200131893158, - "p90": 270.9760069847107, - "p95": 273.1199860572815, - "p99": 278.4000039100647 + "p50": 96.22400254011154, + "p90": 98.2080027461052, + "p95": 99.10400211811066, + "p99": 110.75200140476227 }, "isolatedSum": { - "p50": 295.3279912471771, - "p90": 304.736003279686, - "p95": 306.2720000743866, - "p99": 312.0640069246292 + "p50": 192.44800508022308, + "p90": 196.4160054922104, + "p95": 198.2080042362213, + "p99": 221.50400280952454 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154542080, - "combineLogicalBytes": 154542080, - "fanoutMean": 5.263671875, - "recvTokensMax": 1365, - "stragglerRank": 6, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32621,35 +33682,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 200.32000541687012, - "p90": 203.3279985189438, - "p95": 204.57600057125092, - "p99": 208.28799903392792 + "p50": 153.08800339698792, + "p90": 155.42399883270264, + "p95": 156.47999942302704, + "p99": 161.79199516773224 }, "combine": { - "p50": 229.8559993505478, - "p90": 235.4239970445633, - "p95": 236.4480048418045, - "p99": 237.98400163650513 + "p50": 153.08800339698792, + "p90": 155.42399883270264, + "p95": 156.47999942302704, + "p99": 161.79199516773224 }, "roundtrip": { - "p50": 402.46400237083435, - "p90": 407.9360067844391, - "p95": 410.0480079650879, - "p99": 413.1839871406555 + "p50": 153.08800339698792, + "p90": 155.42399883270264, + "p95": 156.47999942302704, + "p99": 161.79199516773224 }, "isolatedSum": { - "p50": 430.1760047674179, - "p90": 438.7519955635071, - "p95": 441.0240054130554, - "p99": 446.27200067043304 + "p50": 306.17600679397583, + "p90": 310.8479976654053, + "p95": 312.9599988460541, + "p99": 323.5839903354645 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310589440, - "combineLogicalBytes": 310589440, - "fanoutMean": 5.289306640625, - "recvTokensMax": 2746, - "stragglerRank": 6, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32658,35 +33719,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 303.51999402046204, - "p90": 308.6720108985901, - "p95": 310.2720081806183, - "p99": 315.8400058746338 + "p50": 270.6240117549896, + "p90": 273.6319899559021, + "p95": 275.07200837135315, + "p99": 281.3119888305664 }, "combine": { - "p50": 366.3040101528168, - "p90": 374.33600425720215, - "p95": 375.99998712539673, - "p99": 380.0320029258728 + "p50": 270.6240117549896, + "p90": 273.6319899559021, + "p95": 275.07200837135315, + "p99": 281.3119888305664 }, "roundtrip": { - "p50": 643.9679861068726, - "p90": 650.9119868278503, - "p95": 653.4720063209534, - "p99": 656.9280028343201 + "p50": 270.6240117549896, + "p90": 273.6319899559021, + "p95": 275.07200837135315, + "p99": 281.3119888305664 }, "isolatedSum": { - "p50": 669.8240041732788, - "p90": 683.0080151557922, - "p95": 686.271995306015, - "p99": 695.8720088005066 + "p50": 541.2480235099792, + "p90": 547.2639799118042, + "p95": 550.1440167427063, + "p99": 562.6239776611328 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619171840, - "combineLogicalBytes": 619171840, - "fanoutMean": 5.272216796875, - "recvTokensMax": 5467, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32695,34 +33756,34 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 526.6559720039368, - "p90": 533.8879823684692, - "p95": 536.0000133514404, - "p99": 542.4000024795532 + "p50": 498.6560046672821, + "p90": 501.6000270843506, + "p95": 502.6879906654358, + "p99": 510.3679895401001 }, "combine": { - "p50": 628.607988357544, - "p90": 636.5759968757629, - "p95": 639.3600106239319, - "p99": 643.455982208252 + "p50": 498.6560046672821, + "p90": 501.6000270843506, + "p95": 502.6879906654358, + "p99": 510.3679895401001 }, "roundtrip": { - "p50": 1128.5760402679443, - "p90": 1137.984037399292, - "p95": 1141.5679454803467, - "p99": 1146.1759805679321 + "p50": 498.6560046672821, + "p90": 501.6000270843506, + "p95": 502.6879906654358, + "p99": 510.3679895401001 }, "isolatedSum": { - "p50": 1155.2639603614807, - "p90": 1170.4639792442322, - "p95": 1175.3600239753723, - "p99": 1185.8559846878052 + "p50": 997.3120093345642, + "p90": 1003.2000541687012, + "p95": 1005.3759813308716, + "p99": 1020.7359790802002 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1238945792, - "combineLogicalBytes": 1238945792, - "fanoutMean": 5.2747802734375, - "recvTokensMax": 10913, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -32732,35 +33793,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1018.4320211410522, - "p90": 1046.496033668518, - "p95": 1056.1920404434204, - "p99": 1073.5039710998535 + "p50": 943.9679980278015, + "p90": 947.4560022354126, + "p95": 949.0879774093628, + "p99": 961.6640210151672 }, "combine": { - "p50": 1148.5120058059692, - "p90": 1156.3199758529663, - "p95": 1158.784031867981, - "p99": 1164.031982421875 + "p50": 943.9679980278015, + "p90": 947.4560022354126, + "p95": 949.0879774093628, + "p99": 961.6640210151672 }, "roundtrip": { - "p50": 2113.408088684082, - "p90": 2138.5281085968018, - "p95": 2143.807888031006, - "p99": 2155.679941177368 + "p50": 943.9679980278015, + "p90": 947.4560022354126, + "p95": 949.0879774093628, + "p99": 961.6640210151672 }, "isolatedSum": { - "p50": 2166.9440269470215, - "p90": 2202.8160095214844, - "p95": 2214.9760723114014, - "p99": 2237.5359535217285 + "p50": 1887.935996055603, + "p90": 1894.9120044708252, + "p95": 1898.1759548187256, + "p99": 1923.3280420303345 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481747968, - "combineLogicalBytes": 2481747968, - "fanoutMean": 5.282989501953125, - "recvTokensMax": 21789, - "stragglerRank": 2, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32768,49 +33829,50 @@ ] }, { - "id": "cx-ff5c49bb", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", - "colorKey": "h100_aa268d13", - "comparisonKey": "927a6d7282665742", + "id": "cx-ae942e6d", + "identity": "b300|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_a52edb56", + "comparisonKey": "e30791951192637e", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:17.079494+00:00", + "generatedAt": "2026-06-28T01:38:12.335801+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "flashinfer", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -32820,90 +33882,164 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "4caecd33bedf786", - "workloadId": "set:3:830e36e88869e222", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271806404", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271806404", - "createdAt": "2026-06-26T23:54:11Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307775342", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307775342", + "createdAt": "2026-06-28T01:38:12.335801+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 125.37600100040436, - "p90": 131.9040060043335, - "p95": 132.7359974384308, - "p99": 137.08800077438354 + "p50": 63.93600255250931, + "p90": 65.76000154018402, + "p95": 66.6240006685257, + "p99": 72.73600250482559 }, "combine": { - "p50": 113.0559965968132, - "p90": 114.04799669981003, - "p95": 114.56000059843063, - "p99": 120.67200243473053 + "p50": 63.93600255250931, + "p90": 65.76000154018402, + "p95": 66.6240006685257, + "p99": 72.73600250482559 }, "roundtrip": { - "p50": 216.2880003452301, - "p90": 219.67999637126923, - "p95": 221.15199267864227, - "p99": 226.17599368095398 + "p50": 63.93600255250931, + "p90": 65.76000154018402, + "p95": 66.6240006685257, + "p99": 72.73600250482559 }, "isolatedSum": { - "p50": 238.43199759721756, - "p90": 245.95200270414352, - "p95": 247.29599803686142, - "p99": 257.7600032091141 + "p50": 127.87200510501862, + "p90": 131.52000308036804, + "p95": 133.2480013370514, + "p99": 145.47200500965118 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 239.42400515079498, - "p90": 255.5519938468933, - "p95": 258.14399123191833, - "p99": 261.9200050830841 + "p50": 84.03199911117554, + "p90": 86.11200004816055, + "p95": 86.56000345945358, + "p99": 92.83199906349182 }, "combine": { - "p50": 267.07199215888977, - "p90": 276.63999795913696, - "p95": 277.536004781723, - "p99": 279.90400791168213 + "p50": 84.03199911117554, + "p90": 86.11200004816055, + "p95": 86.56000345945358, + "p99": 92.83199906349182 }, "roundtrip": { - "p50": 476.22400522232056, - "p90": 492.3520088195801, - "p95": 495.03999948501587, - "p99": 499.55201148986816 - }, + "p50": 84.03199911117554, + "p90": 86.11200004816055, + "p95": 86.56000345945358, + "p99": 92.83199906349182 + }, "isolatedSum": { - "p50": 506.49599730968475, - "p90": 532.1919918060303, - "p95": 535.6799960136414, - "p99": 541.8240129947662 + "p50": 168.06399822235107, + "p90": 172.2240000963211, + "p95": 173.12000691890717, + "p99": 185.66399812698364 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 130.87999820709229, + "p90": 133.56800377368927, + "p95": 135.26399433612823, + "p99": 155.5200070142746 + }, + "combine": { + "p50": 130.87999820709229, + "p90": 133.56800377368927, + "p95": 135.26399433612823, + "p99": 155.5200070142746 + }, + "roundtrip": { + "p50": 130.87999820709229, + "p90": 133.56800377368927, + "p95": 135.26399433612823, + "p99": 155.5200070142746 + }, + "isolatedSum": { + "p50": 261.75999641418457, + "p90": 267.13600754737854, + "p95": 270.52798867225647, + "p99": 311.0400140285492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 229.40799593925476, + "p90": 233.11999440193176, + "p95": 234.27200317382812, + "p99": 247.48800694942474 + }, + "combine": { + "p50": 229.40799593925476, + "p90": 233.11999440193176, + "p95": 234.27200317382812, + "p99": 247.48800694942474 + }, + "roundtrip": { + "p50": 229.40799593925476, + "p90": 233.11999440193176, + "p95": 234.27200317382812, + "p99": 247.48800694942474 + }, + "isolatedSum": { + "p50": 458.8159918785095, + "p90": 466.2399888038635, + "p95": 468.54400634765625, + "p99": 494.9760138988495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -32913,35 +34049,72 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 677.183985710144, - "p90": 691.3599967956543, - "p95": 694.8800086975098, - "p99": 701.2479901313782 + "p50": 404.992014169693, + "p90": 407.9360067844391, + "p95": 409.63199734687805, + "p99": 412.6720130443573 }, "combine": { - "p50": 816.2879943847656, - "p90": 828.607976436615, - "p95": 832.5759768486023, - "p99": 837.8239870071411 + "p50": 404.992014169693, + "p90": 407.9360067844391, + "p95": 409.63199734687805, + "p99": 412.6720130443573 }, "roundtrip": { - "p50": 1460.4159593582153, - "p90": 1474.176049232483, - "p95": 1478.4640073776245, - "p99": 1485.8880043029785 + "p50": 404.992014169693, + "p90": 407.9360067844391, + "p95": 409.63199734687805, + "p99": 412.6720130443573 }, "isolatedSum": { - "p50": 1493.4719800949097, - "p90": 1519.9679732322693, - "p95": 1527.455985546112, - "p99": 1539.0719771385193 + "p50": 809.984028339386, + "p90": 815.8720135688782, + "p95": 819.2639946937561, + "p99": 825.3440260887146 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 756.1280131340027, + "p90": 759.6480250358582, + "p95": 760.703980922699, + "p99": 774.944007396698 + }, + "combine": { + "p50": 756.1280131340027, + "p90": 759.6480250358582, + "p95": 760.703980922699, + "p99": 774.944007396698 + }, + "roundtrip": { + "p50": 756.1280131340027, + "p90": 759.6480250358582, + "p95": 760.703980922699, + "p99": 774.944007396698 + }, + "isolatedSum": { + "p50": 1512.2560262680054, + "p90": 1519.2960500717163, + "p95": 1521.407961845398, + "p99": 1549.888014793396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32949,49 +34122,50 @@ ] }, { - "id": "cx-f5264491", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h100_aa268d13", - "comparisonKey": "927a6d7282665742", + "id": "cx-dede56e2", + "identity": "b300|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_6af1abcd", + "comparisonKey": "26534c8239f2bdd1", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:04.176924+00:00", + "generatedAt": "2026-06-28T01:38:47.923344+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", - "backend": "deepep", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", + "label": "B300 EP8 · flashinfer · mxfp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "mxfp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -33001,54 +34175,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271951888", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271951888", - "createdAt": "2026-06-26T23:59:00Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307776684", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307776684", + "createdAt": "2026-06-28T01:38:47.923344+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 124.70400333404541, - "p90": 128.86400520801544, - "p95": 131.071999669075, - "p99": 132.9600065946579 + "p50": 64.7680014371872, + "p90": 66.39999896287918, + "p95": 67.32799857854843, + "p99": 76.4160007238388 }, "combine": { - "p50": 112.5119999051094, - "p90": 114.01599645614624, - "p95": 114.3679991364479, - "p99": 116.5120005607605 + "p50": 64.7680014371872, + "p90": 66.39999896287918, + "p95": 67.32799857854843, + "p99": 76.4160007238388 }, "roundtrip": { - "p50": 216.22399985790253, - "p90": 219.90400552749634, - "p95": 221.02400660514832, - "p99": 223.90399873256683 + "p50": 64.7680014371872, + "p90": 66.39999896287918, + "p95": 67.32799857854843, + "p99": 76.4160007238388 }, "isolatedSum": { - "p50": 237.21600323915482, - "p90": 242.88000166416168, - "p95": 245.43999880552292, - "p99": 249.4720071554184 + "p50": 129.5360028743744, + "p90": 132.79999792575836, + "p95": 134.65599715709686, + "p99": 152.8320014476776 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33057,35 +34231,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 164.57599401474, - "p90": 167.93599724769592, - "p95": 169.5680022239685, - "p99": 229.15199398994446 + "p50": 84.28800106048584, + "p90": 85.40800213813782, + "p95": 86.07999980449677, + "p99": 86.91199868917465 }, "combine": { - "p50": 162.6559942960739, - "p90": 168.64000260829926, - "p95": 169.98399794101715, - "p99": 171.29600048065186 + "p50": 84.28800106048584, + "p90": 85.40800213813782, + "p95": 86.07999980449677, + "p99": 86.91199868917465 }, "roundtrip": { - "p50": 299.80799555778503, - "p90": 305.11999130249023, - "p95": 306.71998858451843, - "p99": 308.9919984340668 + "p50": 84.28800106048584, + "p90": 85.40800213813782, + "p95": 86.07999980449677, + "p99": 86.91199868917465 }, "isolatedSum": { - "p50": 327.2319883108139, - "p90": 336.5759998559952, - "p95": 339.55200016498566, - "p99": 400.4479944705963 + "p50": 168.57600212097168, + "p90": 170.81600427627563, + "p95": 172.15999960899353, + "p99": 173.8239973783493 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 5, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33094,35 +34268,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 237.92000114917755, - "p90": 242.3039972782135, - "p95": 244.4159984588623, - "p99": 250.14400482177734 + "p50": 131.3920021057129, + "p90": 133.91999900341034, + "p95": 134.91199910640717, + "p99": 136.63999736309052 }, "combine": { - "p50": 260.9280049800873, - "p90": 265.6640112400055, - "p95": 267.67998933792114, - "p99": 272.7360129356384 + "p50": 131.3920021057129, + "p90": 133.91999900341034, + "p95": 134.91199910640717, + "p99": 136.63999736309052 }, "roundtrip": { - "p50": 471.77600860595703, - "p90": 476.8959879875183, - "p95": 479.2639911174774, - "p99": 495.2000081539154 + "p50": 131.3920021057129, + "p90": 133.91999900341034, + "p95": 134.91199910640717, + "p99": 136.63999736309052 }, "isolatedSum": { - "p50": 498.84800612926483, - "p90": 507.968008518219, - "p95": 512.0959877967834, - "p99": 522.8800177574158 + "p50": 262.7840042114258, + "p90": 267.8399980068207, + "p95": 269.82399821281433, + "p99": 273.27999472618103 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33131,35 +34305,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 377.85598635673523, - "p90": 384.38400626182556, - "p95": 385.8239948749542, - "p99": 390.6880021095276 + "p50": 229.72799837589264, + "p90": 232.7679991722107, + "p95": 233.95200073719025, + "p99": 236.32000386714935 }, "combine": { - "p50": 442.1760141849518, - "p90": 447.80799746513367, - "p95": 449.3120014667511, - "p99": 452.86399126052856 + "p50": 229.72799837589264, + "p90": 232.7679991722107, + "p95": 233.95200073719025, + "p99": 236.32000386714935 }, "roundtrip": { - "p50": 795.6799864768982, - "p90": 803.167998790741, - "p95": 806.3039779663086, - "p99": 813.0559921264648 + "p50": 229.72799837589264, + "p90": 232.7679991722107, + "p95": 233.95200073719025, + "p99": 236.32000386714935 }, "isolatedSum": { - "p50": 820.032000541687, - "p90": 832.1920037269592, - "p95": 835.1359963417053, - "p99": 843.5519933700562 + "p50": 459.4559967517853, + "p90": 465.5359983444214, + "p95": 467.9040014743805, + "p99": 472.6400077342987 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 4, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33168,35 +34342,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 675.3919720649719, - "p90": 690.4320120811462, - "p95": 694.1120028495789, - "p99": 700.8320093154907 + "p50": 404.7999978065491, + "p90": 407.1680009365082, + "p95": 407.99999237060547, + "p99": 411.3920032978058 }, "combine": { - "p50": 806.1439990997314, - "p90": 816.5119886398315, - "p95": 818.5279965400696, - "p99": 824.5440125465393 + "p50": 404.7999978065491, + "p90": 407.1680009365082, + "p95": 407.99999237060547, + "p99": 411.3920032978058 }, "roundtrip": { - "p50": 1447.1999406814575, - "p90": 1458.143949508667, - "p95": 1462.5600576400757, - "p99": 1468.991994857788 + "p50": 404.7999978065491, + "p90": 407.1680009365082, + "p95": 407.99999237060547, + "p99": 411.3920032978058 }, "isolatedSum": { - "p50": 1481.5359711647034, - "p90": 1506.9440007209778, - "p95": 1512.6399993896484, - "p99": 1525.37602186203 + "p50": 809.5999956130981, + "p90": 814.3360018730164, + "p95": 815.9999847412109, + "p99": 822.7840065956116 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 6, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33205,34 +34379,34 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1273.7280130386353, - "p90": 1286.1759662628174, - "p95": 1290.2400493621826, - "p99": 1300.3519773483276 + "p50": 755.6480169296265, + "p90": 758.9439749717712, + "p95": 759.7439885139465, + "p99": 764.1919851303101 }, "combine": { - "p50": 1515.6480073928833, - "p90": 1529.1199684143066, - "p95": 1554.6239614486694, - "p99": 1575.2639770507812 + "p50": 755.6480169296265, + "p90": 758.9439749717712, + "p95": 759.7439885139465, + "p99": 764.1919851303101 }, "roundtrip": { - "p50": 2763.0081176757812, - "p90": 2772.9599475860596, - "p95": 2776.3519287109375, - "p99": 2782.464027404785 + "p50": 755.6480169296265, + "p90": 758.9439749717712, + "p95": 759.7439885139465, + "p99": 764.1919851303101 }, "isolatedSum": { - "p50": 2789.3760204315186, - "p90": 2815.295934677124, - "p95": 2844.864010810852, - "p99": 2875.615954399109 + "p50": 1511.296033859253, + "p90": 1517.8879499435425, + "p95": 1519.487977027893, + "p99": 1528.3839702606201 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -33241,49 +34415,50 @@ ] }, { - "id": "cx-f680673f", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", - "colorKey": "h100_002beb29", - "comparisonKey": "3715210183d38757", + "id": "cx-85dec801", + "identity": "b300|flashinfer|7168|8|256|nvfp4|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_f0247ae6", + "comparisonKey": "eb4126aa6cf3bfca", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:20.108988+00:00", + "generatedAt": "2026-06-28T01:38:25.905345+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "label": "B300 EP8 · flashinfer · nvfp4", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "nvfp4", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -33293,90 +34468,164 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3dd868cb33839a3", - "workloadId": "set:3:1ca614e23cc66be1", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271813470", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271813470", - "createdAt": "2026-06-26T23:54:25Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307777849", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307777849", + "createdAt": "2026-06-28T01:38:25.905345+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.63200163841248, - "p90": 122.43200093507767, - "p95": 123.55200201272964, - "p99": 126.5919953584671 + "p50": 62.68800050020218, + "p90": 64.64000046253204, + "p95": 65.66400080919266, + "p99": 75.99999755620956 }, "combine": { - "p50": 106.62399977445602, - "p90": 112.31999844312668, - "p95": 113.27999830245972, - "p99": 115.9679964184761 + "p50": 62.68800050020218, + "p90": 64.64000046253204, + "p95": 65.66400080919266, + "p99": 75.99999755620956 }, "roundtrip": { - "p50": 207.58399367332458, - "p90": 211.84000372886658, - "p95": 213.18399906158447, - "p99": 216.35200083255768 + "p50": 62.68800050020218, + "p90": 64.64000046253204, + "p95": 65.66400080919266, + "p99": 75.99999755620956 }, "isolatedSum": { - "p50": 224.2560014128685, - "p90": 234.75199937820435, - "p95": 236.83200031518936, - "p99": 242.5599917769432 + "p50": 125.37600100040436, + "p90": 129.2800009250641, + "p95": 131.32800161838531, + "p99": 151.99999511241913 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 78.36800068616867, + "p90": 80.25600016117096, + "p95": 81.05599880218506, + "p99": 84.95999872684479 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 80.25600016117096, + "p95": 81.05599880218506, + "p99": 84.95999872684479 + }, + "roundtrip": { + "p50": 78.36800068616867, + "p90": 80.25600016117096, + "p95": 81.05599880218506, + "p99": 84.95999872684479 + }, + "isolatedSum": { + "p50": 156.73600137233734, + "p90": 160.51200032234192, + "p95": 162.11199760437012, + "p99": 169.91999745368958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38972416, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 236.35199666023254, - "p90": 249.82400238513947, - "p95": 253.88801097869873, - "p99": 257.02399015426636 + "p50": 118.04799735546112, + "p90": 120.60800194740295, + "p95": 121.34400010108948, + "p99": 124.79999661445618 }, "combine": { - "p50": 251.583993434906, - "p90": 259.7759962081909, - "p95": 260.47998666763306, - "p99": 262.2080147266388 + "p50": 118.04799735546112, + "p90": 120.60800194740295, + "p95": 121.34400010108948, + "p99": 124.79999661445618 }, "roundtrip": { - "p50": 459.29598808288574, - "p90": 472.1919894218445, - "p95": 474.88000988960266, - "p99": 478.5279929637909 + "p50": 118.04799735546112, + "p90": 120.60800194740295, + "p95": 121.34400010108948, + "p99": 124.79999661445618 }, "isolatedSum": { - "p50": 487.93599009513855, - "p90": 509.5999985933304, - "p95": 514.3679976463318, - "p99": 519.2320048809052 + "p50": 236.09599471092224, + "p90": 241.2160038948059, + "p95": 242.68800020217896, + "p99": 249.59999322891235 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 78066688, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 204.83200252056122, + "p90": 208.48000049591064, + "p95": 209.1200053691864, + "p99": 221.76000475883484 + }, + "combine": { + "p50": 204.83200252056122, + "p90": 208.48000049591064, + "p95": 209.1200053691864, + "p99": 221.76000475883484 + }, + "roundtrip": { + "p50": 204.83200252056122, + "p90": 208.48000049591064, + "p95": 209.1200053691864, + "p99": 221.76000475883484 + }, + "isolatedSum": { + "p50": 409.66400504112244, + "p90": 416.9600009918213, + "p95": 418.2400107383728, + "p99": 443.5200095176697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155860992, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -33386,70 +34635,108 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 659.3279838562012, - "p90": 669.0239906311035, - "p95": 672.0960140228271, - "p99": 678.4319877624512 + "p50": 359.51998829841614, + "p90": 362.5600039958954, + "p95": 363.77599835395813, + "p99": 371.16798758506775 }, "combine": { - "p50": 783.456027507782, - "p90": 794.6239709854126, - "p95": 799.0720272064209, - "p99": 807.6800107955933 + "p50": 359.51998829841614, + "p90": 362.5600039958954, + "p95": 363.77599835395813, + "p99": 371.16798758506775 }, "roundtrip": { - "p50": 1412.6399755477905, - "p90": 1421.8239784240723, - "p95": 1426.0480403900146, - "p99": 1434.0159893035889 + "p50": 359.51998829841614, + "p90": 362.5600039958954, + "p95": 363.77599835395813, + "p99": 371.16798758506775 }, "isolatedSum": { - "p50": 1442.7840113639832, - "p90": 1463.647961616516, - "p95": 1471.168041229248, - "p99": 1486.1119985580444 + "p50": 719.0399765968323, + "p90": 725.1200079917908, + "p95": 727.5519967079163, + "p99": 742.3359751701355 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 6, + "dispatchLogicalBytes": 310951424, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-329395ff", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", - "colorKey": "h100_002beb29", - "comparisonKey": "3715210183d38757", + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 662.0479822158813, + "p90": 664.9919748306274, + "p95": 665.9520268440247, + "p99": 692.6400065422058 + }, + "combine": { + "p50": 662.0479822158813, + "p90": 664.9919748306274, + "p95": 665.9520268440247, + "p99": 692.6400065422058 + }, + "roundtrip": { + "p50": 662.0479822158813, + "p90": 664.9919748306274, + "p95": 665.9520268440247, + "p99": 692.6400065422058 + }, + "isolatedSum": { + "p50": 1324.0959644317627, + "p90": 1329.9839496612549, + "p95": 1331.9040536880493, + "p99": 1385.2800130844116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621752320, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2fdde1de", + "identity": "b300|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_e952b5c0", + "comparisonKey": "97ed86fe35a5b2af", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:29.454209+00:00", + "generatedAt": "2026-06-27T17:36:18.907415+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", + "runner": "b300-nv_16", + "sku": "b300", + "backend": "uccl", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "label": "B300 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -33459,9 +34746,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -33474,238 +34761,312 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "bbcd1d9d8d1e4fe", - "workloadId": "set:6:1ca614e23cc66be1", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271996602", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271996602", - "createdAt": "2026-06-27T00:00:22Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28296669967", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296669967", + "createdAt": "2026-06-27T17:36:18.907415+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 115.87200313806534, - "p90": 122.14399874210358, - "p95": 123.10399860143661, - "p99": 127.16799974441528 + "p50": 81.24800026416779, + "p90": 87.45600283145905, + "p95": 88.41600269079208, + "p99": 92.99200028181076 }, "combine": { - "p50": 106.72000050544739, - "p90": 111.7120012640953, - "p95": 112.57600039243698, - "p99": 114.46399986743927 + "p50": 74.23999905586243, + "p90": 76.48000121116638, + "p95": 77.11999863386154, + "p99": 85.66399663686752 }, "roundtrip": { - "p50": 207.07200467586517, - "p90": 210.91200411319733, - "p95": 212.54399418830872, - "p99": 243.52000653743744 + "p50": 140.19200205802917, + "p90": 144.31999623775482, + "p95": 145.37599682807922, + "p99": 151.93599462509155 }, "isolatedSum": { - "p50": 222.59200364351273, - "p90": 233.85600000619888, - "p95": 235.6799989938736, - "p99": 241.63199961185455 + "p50": 155.4879993200302, + "p90": 163.93600404262543, + "p95": 165.53600132465363, + "p99": 178.65599691867828 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 159.29600596427917, - "p90": 166.62399470806122, - "p95": 167.4560010433197, - "p99": 169.21600699424744 + "p50": 80.92799782752991, + "p90": 85.37600189447403, + "p95": 86.2400010228157, + "p99": 89.75999802350998 }, "combine": { - "p50": 154.65599298477173, - "p90": 163.10399770736694, - "p95": 163.7759953737259, - "p99": 165.0560051202774 + "p50": 74.30399954319, + "p90": 76.25599950551987, + "p95": 77.18399912118912, + "p99": 84.44800227880478 }, "roundtrip": { - "p50": 289.44000601768494, - "p90": 301.66399478912354, - "p95": 303.5840094089508, - "p99": 308.03200602531433 + "p50": 141.6960060596466, + "p90": 145.50399780273438, + "p95": 146.7200070619583, + "p99": 162.27200627326965 }, "isolatedSum": { - "p50": 313.9519989490509, - "p90": 329.72799241542816, - "p95": 331.2319964170456, - "p99": 334.27201211452484 + "p50": 155.2319973707199, + "p90": 161.6320013999939, + "p95": 163.42400014400482, + "p99": 174.20800030231476 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 45688832, - "combineLogicalBytes": 45688832, - "fanoutMean": 1.55615234375, - "recvTokensMax": 2048, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.40800148248672, + "p90": 84.44800227880478, + "p95": 87.77599781751633, + "p99": 107.04000294208527 + }, + "combine": { + "p50": 75.6480023264885, + "p90": 84.73599702119827, + "p95": 85.21600067615509, + "p99": 96.38399630784988 + }, + "roundtrip": { + "p50": 138.2720023393631, + "p90": 141.66399836540222, + "p95": 143.51999759674072, + "p99": 149.21599626541138 + }, + "isolatedSum": { + "p50": 157.05600380897522, + "p90": 169.18399930000305, + "p95": 172.99199849367142, + "p99": 203.42399924993515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.40800148248672, + "p90": 83.93599838018417, + "p95": 85.28000116348267, + "p99": 93.28000247478485 + }, + "combine": { + "p50": 84.54400300979614, + "p90": 85.66399663686752, + "p95": 86.01599931716919, + "p99": 88.70399743318558 + }, + "roundtrip": { + "p50": 138.84800672531128, + "p90": 143.99999380111694, + "p95": 147.0080018043518, + "p99": 160.22400557994843 + }, + "isolatedSum": { + "p50": 165.95200449228287, + "p90": 169.5999950170517, + "p95": 171.29600048065186, + "p99": 181.98399990797043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 231.1680018901825, - "p90": 236.4799976348877, - "p95": 237.40799725055695, - "p99": 240.7039999961853 + "p50": 82.84799754619598, + "p90": 84.99199897050858, + "p95": 87.10400015115738, + "p99": 97.37599641084671 }, "combine": { - "p50": 252.73600220680237, - "p90": 260.8639895915985, - "p95": 261.8879973888397, - "p99": 263.64800333976746 + "p50": 84.95999872684479, + "p90": 85.88799834251404, + "p95": 86.2400010228157, + "p99": 92.22400188446045 }, "roundtrip": { - "p50": 461.34400367736816, - "p90": 475.39201378822327, - "p95": 476.639986038208, - "p99": 479.45600748062134 + "p50": 140.25600254535675, + "p90": 147.20000326633453, + "p95": 148.5760062932968, + "p99": 155.07200360298157 }, "isolatedSum": { - "p50": 483.90400409698486, - "p90": 497.3439872264862, - "p95": 499.29599463939667, - "p99": 504.35200333595276 + "p50": 167.80799627304077, + "p90": 170.8799973130226, + "p95": 173.34400117397308, + "p99": 189.59999829530716 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 374.2400109767914, - "p90": 379.040002822876, - "p95": 381.98399543762207, - "p99": 387.4559998512268 + "p50": 99.84000027179718, + "p90": 102.11200267076492, + "p95": 103.58399897813797, + "p99": 106.84800148010254 }, "combine": { - "p50": 431.2640130519867, - "p90": 439.8399889469147, - "p95": 443.07199120521545, - "p99": 446.78398966789246 + "p50": 85.85599809885025, + "p90": 87.26400136947632, + "p95": 88.48000317811966, + "p99": 96.41599655151367 }, "roundtrip": { - "p50": 779.2320251464844, - "p90": 791.3600206375122, - "p95": 794.0160036087036, - "p99": 801.0240197181702 + "p50": 150.59199929237366, + "p90": 153.56799960136414, + "p95": 155.008003115654, + "p99": 163.93600404262543 }, "isolatedSum": { - "p50": 805.5040240287781, - "p90": 818.8799917697906, - "p95": 825.0559866428375, - "p99": 834.2399895191193 + "p50": 185.69599837064743, + "p90": 189.37600404024124, + "p95": 192.06400215625763, + "p99": 203.2639980316162 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 183916544, - "combineLogicalBytes": 183916544, - "fanoutMean": 1.5660400390625, - "recvTokensMax": 8192, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 661.1520051956177, - "p90": 676.1919856071472, - "p95": 679.6479821205139, - "p99": 685.9520077705383 + "p50": 111.10399663448334, + "p90": 114.30399864912033, + "p95": 115.35999923944473, + "p99": 118.97599697113037 }, "combine": { - "p50": 789.9519801139832, - "p90": 800.0959753990173, - "p95": 803.1359910964966, - "p99": 808.7360262870789 + "p50": 99.04000163078308, + "p90": 102.24000364542007, + "p95": 109.0560033917427, + "p99": 112.35199868679047 }, "roundtrip": { - "p50": 1422.271966934204, - "p90": 1435.1680278778076, - "p95": 1439.1039609909058, - "p99": 1454.367995262146 + "p50": 177.2480010986328, + "p90": 184.35199558734894, + "p95": 186.27199530601501, + "p99": 194.36800479888916 }, "isolatedSum": { - "p50": 1451.1039853096008, - "p90": 1476.2879610061646, - "p95": 1482.7839732170105, - "p99": 1494.6880340576172 + "p50": 210.14399826526642, + "p90": 216.5440022945404, + "p95": 224.41600263118744, + "p99": 231.32799565792084 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 7, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1241.8559789657593, - "p90": 1251.871943473816, - "p95": 1256.4799785614014, - "p99": 1264.0639543533325 + "p50": 124.51200187206268, + "p90": 127.3919939994812, + "p95": 128.54400277137756, + "p99": 138.7840062379837 }, "combine": { - "p50": 1471.4560508728027, - "p90": 1480.1599979400635, - "p95": 1482.6240539550781, - "p99": 1489.8879528045654 + "p50": 122.43200093507767, + "p90": 123.32800030708313, + "p95": 124.54400211572647, + "p99": 133.69600474834442 }, "roundtrip": { - "p50": 2687.9680156707764, - "p90": 2698.848009109497, - "p95": 2703.104019165039, - "p99": 2708.928108215332 + "p50": 212.2880071401596, + "p90": 216.25599265098572, + "p95": 218.46400201320648, + "p99": 222.46399521827698 }, "isolatedSum": { - "p50": 2713.312029838562, - "p90": 2732.0319414138794, - "p95": 2739.1040325164795, - "p99": 2753.951907157898 + "p50": 246.94400280714035, + "p90": 250.71999430656433, + "p95": 253.08800488710403, + "p99": 272.4800109863281 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 734720000, - "combineLogicalBytes": 734720000, - "fanoutMean": 1.56402587890625, - "recvTokensMax": 32768, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -33714,46 +35075,47 @@ ] }, { - "id": "cx-c90a67e2", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", - "colorKey": "h100_c44978e5", - "comparisonKey": "6c5c69e3474ec552", + "id": "cx-8d828593", + "identity": "b300|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_e952b5c0", + "comparisonKey": "6e0e03618d466091", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:29.771027+00:00", + "generatedAt": "2026-06-27T17:36:27.427420+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "uccl", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", + "topologyClass": "b300-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "label": "B300 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1351, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 148, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -33766,54 +35128,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "46855e7fa6754eb", - "workloadId": "set:6:1ca614e23cc66be1", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.38995361328125, - "eplbImbalanceAfter": 1.0000210716610862, - "backendVersion": "1.2.1", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272000459", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272000459", - "createdAt": "2026-06-27T00:00:28Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28296669967", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296669967", + "createdAt": "2026-06-27T17:36:27.427420+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 110.75200140476227, - "p90": 114.97599631547928, - "p95": 116.95999652147293, - "p99": 122.01599776744843 + "p50": 125.2799928188324, + "p90": 127.9039978981018, + "p95": 128.9599984884262, + "p99": 135.51999628543854 }, "combine": { - "p50": 105.92000186443329, - "p90": 109.56799983978271, - "p95": 111.23199760913849, - "p99": 114.14399743080139 + "p50": 122.6240023970604, + "p90": 123.52000176906586, + "p95": 124.4800016283989, + "p99": 126.62400305271149 }, "roundtrip": { - "p50": 193.1840032339096, - "p90": 198.7520009279251, - "p95": 200.19200444221497, - "p99": 204.44799959659576 + "p50": 212.6079946756363, + "p90": 216.35200083255768, + "p95": 218.62399578094482, + "p99": 233.72800648212433 }, "isolatedSum": { - "p50": 216.67200326919556, - "p90": 224.543996155262, - "p95": 228.19199413061142, - "p99": 236.15999519824982 + "p50": 247.9039952158928, + "p90": 251.42399966716766, + "p95": 253.4400001168251, + "p99": 262.14399933815 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 79206400, - "combineLogicalBytes": 79206400, - "fanoutMean": 5.3955078125, - "recvTokensMax": 713, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33822,35 +35184,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 146.81600034236908, - "p90": 151.48800611495972, - "p95": 152.44799852371216, - "p99": 156.80000185966492 + "p50": 157.3439985513687, + "p90": 162.9759967327118, + "p95": 164.12800550460815, + "p99": 174.43199455738068 }, "combine": { - "p50": 150.62400698661804, - "p90": 154.7520011663437, - "p95": 155.39200603961945, - "p99": 161.31199896335602 + "p50": 160.38399934768677, + "p90": 169.98399794101715, + "p95": 170.81600427627563, + "p99": 174.6560037136078 }, "roundtrip": { - "p50": 266.59199595451355, - "p90": 270.4640030860901, - "p95": 271.64798974990845, - "p99": 274.84801411628723 + "p50": 290.5920147895813, + "p90": 297.5040078163147, + "p95": 300.927996635437, + "p99": 310.91201305389404 }, "isolatedSum": { - "p50": 297.4400073289871, - "p90": 306.2400072813034, - "p95": 307.8400045633316, - "p99": 318.11200082302094 + "p50": 317.7279978990555, + "p90": 332.95999467372894, + "p95": 334.9440097808838, + "p99": 349.08799827098846 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 159330304, - "combineLogicalBytes": 159330304, - "fanoutMean": 5.4267578125, - "recvTokensMax": 1436, - "stragglerRank": 4, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33859,34 +35221,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 201.05600357055664, - "p90": 204.70400154590607, - "p95": 205.63200116157532, - "p99": 209.1200053691864 + "p50": 222.3999947309494, + "p90": 224.95999932289124, + "p95": 226.30399465560913, + "p99": 234.68799889087677 }, "combine": { - "p50": 227.64800488948822, - "p90": 231.99999332427979, - "p95": 234.17599499225616, - "p99": 235.83999276161194 + "p50": 272.2240090370178, + "p90": 281.66401386260986, + "p95": 282.24000334739685, + "p99": 296.3840067386627 }, "roundtrip": { - "p50": 403.55199575424194, - "p90": 408.160001039505, - "p95": 409.15200114250183, - "p99": 411.77600622177124 + "p50": 466.7840003967285, + "p90": 473.66398572921753, + "p95": 476.73600912094116, + "p99": 491.93599820137024 }, "isolatedSum": { - "p50": 428.70400846004486, - "p90": 436.70399487018585, - "p95": 439.8079961538315, - "p99": 444.95999813079834 + "p50": 494.6240037679672, + "p90": 506.6240131855011, + "p95": 508.543998003006, + "p99": 531.0720056295395 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 319535104, - "combineLogicalBytes": 319535104, - "fanoutMean": 5.441650390625, - "recvTokensMax": 2897, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -33896,35 +35258,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 309.1840147972107, - "p90": 313.2160007953644, - "p95": 314.62401151657104, - "p99": 317.79199838638306 + "p50": 346.015989780426, + "p90": 350.0800132751465, + "p95": 351.6800105571747, + "p99": 360.76799035072327 }, "combine": { - "p50": 368.5440123081207, - "p90": 374.9440014362335, - "p95": 376.22401118278503, - "p99": 380.7680010795593 + "p50": 466.2719964981079, + "p90": 469.5360064506531, + "p95": 477.63198614120483, + "p99": 491.8079972267151 }, "roundtrip": { - "p50": 652.2560119628906, - "p90": 658.9760184288025, - "p95": 661.3759994506836, - "p99": 665.2479767799377 + "p50": 785.152018070221, + "p90": 791.1360263824463, + "p95": 795.2319979667664, + "p99": 807.9360127449036 }, "isolatedSum": { - "p50": 677.7280271053314, - "p90": 688.1600022315979, - "p95": 690.8480226993561, - "p99": 698.5599994659424 + "p50": 812.2879862785339, + "p90": 819.6160197257996, + "p95": 829.3119966983795, + "p99": 852.5759875774384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 638410752, - "combineLogicalBytes": 638410752, - "fanoutMean": 5.43603515625, - "recvTokensMax": 5815, - "stragglerRank": 7, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33933,34 +35295,34 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 532.6079726219177, - "p90": 546.5599894523621, - "p95": 550.495982170105, - "p99": 557.7600002288818 + "p50": 592.0000076293945, + "p90": 598.8479852676392, + "p95": 602.3359894752502, + "p99": 609.5679998397827 }, "combine": { - "p50": 642.5279974937439, - "p90": 649.9519944190979, - "p95": 652.2560119628906, - "p99": 658.8159799575806 + "p50": 826.9439935684204, + "p90": 835.9040021896362, + "p95": 838.1119966506958, + "p99": 860.6079816818237 }, "roundtrip": { - "p50": 1146.399974822998, - "p90": 1156.9600105285645, - "p95": 1160.9920263290405, - "p99": 1168.511986732483 + "p50": 1397.760033607483, + "p90": 1407.039999961853, + "p95": 1411.2639427185059, + "p99": 1424.3839979171753 }, "isolatedSum": { - "p50": 1175.1359701156616, - "p90": 1196.51198387146, - "p95": 1202.7519941329956, - "p99": 1216.5759801864624 + "p50": 1418.944001197815, + "p90": 1434.7519874572754, + "p95": 1440.447986125946, + "p99": 1470.1759815216064 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1275144192, - "combineLogicalBytes": 1275144192, - "fanoutMean": 5.42889404296875, - "recvTokensMax": 11606, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -33970,35 +35332,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1024.351954460144, - "p90": 1048.5440492630005, - "p95": 1056.9599866867065, - "p99": 1069.3119764328003 + "p50": 1092.576026916504, + "p90": 1101.0559797286987, + "p95": 1106.4640283584595, + "p99": 1123.5840320587158 }, "combine": { - "p50": 1185.9840154647827, - "p90": 1194.1759586334229, - "p95": 1196.5759992599487, - "p99": 1201.5680074691772 + "p50": 1536.7679595947266, + "p90": 1549.2479801177979, + "p95": 1559.775948524475, + "p99": 1572.0000267028809 }, "roundtrip": { - "p50": 2167.520046234131, - "p90": 2183.3600997924805, - "p95": 2188.8959407806396, - "p99": 2197.727918624878 + "p50": 2607.2959899902344, + "p90": 2621.471881866455, + "p95": 2628.511905670166, + "p99": 2641.5040493011475 }, "isolatedSum": { - "p50": 2210.3359699249268, - "p90": 2242.7200078964233, - "p95": 2253.5359859466553, - "p99": 2270.8799839019775 + "p50": 2629.3439865112305, + "p90": 2650.3039598464966, + "p95": 2666.2399768829346, + "p99": 2695.5840587615967 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2546374656, - "combineLogicalBytes": 2546374656, - "fanoutMean": 5.420562744140625, - "recvTokensMax": 23170, - "stragglerRank": 3, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34006,18 +35368,18 @@ ] }, { - "id": "cx-fe520015", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", - "colorKey": "h100_9aa30544", - "comparisonKey": "212a6f0661f5d2d6", + "id": "cx-32323f85", + "identity": "h100|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_42947950", + "comparisonKey": "13b620ce9b7928e9", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:29.937355+00:00", + "generatedAt": "2026-06-27T11:16:12.750378+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", + "runner": "h100-dgxc-slurm_00", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -34027,13 +35389,14 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "label": "H100 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -34058,8 +35421,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cf93f8f6b52e428", - "workloadId": "set:6:a224603e5a1640b8", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -34067,229 +35430,303 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271965088", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271965088", - "createdAt": "2026-06-26T23:59:26Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287504962", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287504962", + "createdAt": "2026-06-27T11:16:12.750378+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 123.71200323104858, - "p90": 127.6479959487915, - "p95": 131.20000064373016, - "p99": 133.7279975414276 + "p50": 96.44799679517746, + "p90": 104.54399883747101, + "p95": 106.27199709415436, + "p99": 110.07999628782272 }, "combine": { - "p50": 113.76000195741653, - "p90": 115.13599753379822, - "p95": 119.48800086975098, - "p99": 121.56800180673599 + "p50": 71.32799923419952, + "p90": 73.34399968385696, + "p95": 73.88799637556076, + "p99": 79.68000322580338 }, "roundtrip": { - "p50": 214.65599536895752, - "p90": 219.29599344730377, - "p95": 220.12799978256226, - "p99": 223.61600399017334 + "p50": 136.80000603199005, + "p90": 143.74400675296783, + "p95": 145.50399780273438, + "p99": 150.78400075435638 }, "isolatedSum": { - "p50": 237.47200518846512, - "p90": 242.78399348258972, - "p95": 250.68800151348114, - "p99": 255.2959993481636 + "p50": 167.77599602937698, + "p90": 177.88799852132797, + "p95": 180.15999346971512, + "p99": 189.7599995136261 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 2, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 160.19199788570404, - "p90": 166.4000004529953, - "p95": 167.61599481105804, - "p99": 170.43200135231018 + "p50": 68.9919963479042, + "p90": 101.6319990158081, + "p95": 103.5199984908104, + "p99": 110.91200262308121 }, "combine": { - "p50": 169.37600076198578, - "p90": 172.5119948387146, - "p95": 173.40800166130066, - "p99": 177.50400304794312 + "p50": 63.45599889755249, + "p90": 72.95999675989151, + "p95": 73.27999919652939, + "p99": 78.87999713420868 }, "roundtrip": { - "p50": 299.5840013027191, - "p90": 303.42400074005127, - "p95": 305.1519989967346, - "p99": 310.8479976654053 + "p50": 116.28799885511398, + "p90": 142.2719955444336, + "p95": 144.57599818706512, + "p99": 150.43200552463531 }, "isolatedSum": { - "p50": 329.5679986476898, - "p90": 338.9119952917099, - "p95": 341.0239964723587, - "p99": 347.9360044002533 + "p50": 132.4479952454567, + "p90": 174.59199577569962, + "p95": 176.79999768733978, + "p99": 189.7919997572899 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 140879872, - "combineLogicalBytes": 140879872, - "fanoutMean": 4.79833984375, - "recvTokensMax": 1972, - "stragglerRank": 2, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 233.18399488925934, - "p90": 239.26399648189545, - "p95": 240.28800427913666, - "p99": 242.94400215148926 + "p50": 69.21599805355072, + "p90": 134.20799374580383, + "p95": 137.43999600410461, + "p99": 141.34399592876434 }, "combine": { - "p50": 263.5839879512787, - "p90": 268.70399713516235, - "p95": 270.27198672294617, - "p99": 274.1760015487671 + "p50": 63.58399987220764, + "p90": 86.97599917650223, + "p95": 87.8399983048439, + "p99": 162.36799955368042 }, "roundtrip": { - "p50": 471.71199321746826, - "p90": 476.639986038208, - "p95": 478.5600006580353, - "p99": 481.3440144062042 + "p50": 116.80000275373459, + "p90": 144.28800344467163, + "p95": 147.10399508476257, + "p99": 151.39199793338776 }, "isolatedSum": { - "p50": 496.767982840538, - "p90": 507.9679936170578, - "p95": 510.5599910020828, - "p99": 517.1200037002563 + "p50": 132.79999792575836, + "p90": 221.18399292230606, + "p95": 225.27999430894852, + "p99": 303.71199548244476 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 282333184, - "combineLogicalBytes": 282333184, - "fanoutMean": 4.80810546875, - "recvTokensMax": 3936, - "stragglerRank": 1, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 377.27999687194824, - "p90": 383.35999846458435, - "p95": 385.18399000167847, - "p99": 387.84000277519226 + "p50": 83.52000266313553, + "p90": 104.60799932479858, + "p95": 113.21599781513214, + "p99": 352.54400968551636 }, "combine": { - "p50": 446.30399346351624, - "p90": 453.44001054763794, - "p95": 455.52000403404236, - "p99": 460.89598536491394 + "p50": 64.64000046253204, + "p90": 72.83200323581696, + "p95": 73.18399846553802, + "p99": 77.98399776220322 }, "roundtrip": { - "p50": 797.0240116119385, - "p90": 804.4800162315369, - "p95": 807.1039915084839, - "p99": 811.6480112075806 + "p50": 117.53600090742111, + "p90": 146.30399644374847, + "p95": 149.34399724006653, + "p99": 153.60000729560852 }, "isolatedSum": { - "p50": 823.5839903354645, - "p90": 836.8000090122223, - "p95": 840.7039940357208, - "p99": 848.7359881401062 + "p50": 148.16000312566757, + "p90": 177.44000256061554, + "p95": 186.39999628067017, + "p99": 430.5280074477196 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 566716416, - "combineLogicalBytes": 566716416, - "fanoutMean": 4.8255615234375, - "recvTokensMax": 7855, - "stragglerRank": 2, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 676.4479875564575, - "p90": 686.8799924850464, - "p95": 690.5279755592346, - "p99": 791.9679880142212 + "p50": 96.3200032711029, + "p90": 102.14400291442871, + "p95": 104.47999835014343, + "p99": 109.56799983978271 }, "combine": { - "p50": 796.3520288467407, - "p90": 808.4160089492798, - "p95": 811.3920092582703, - "p99": 820.5440044403076 + "p50": 71.80800288915634, + "p90": 74.11199808120728, + "p95": 78.8159966468811, + "p99": 80.19199967384338 }, "roundtrip": { - "p50": 1445.5360174179077, - "p90": 1457.311987876892, - "p95": 1460.6399536132812, - "p99": 1468.2879447937012 + "p50": 143.71199905872345, + "p90": 151.39199793338776, + "p95": 153.02400290966034, + "p99": 157.95199573040009 }, "isolatedSum": { - "p50": 1472.8000164031982, - "p90": 1495.2960014343262, - "p95": 1501.9199848175049, - "p99": 1612.5119924545288 + "p50": 168.12800616025925, + "p90": 176.256000995636, + "p95": 183.29599499702454, + "p99": 189.7599995136261 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1132285952, - "combineLogicalBytes": 1132285952, - "fanoutMean": 4.8206787109375, - "recvTokensMax": 15694, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 78.11199873685837, + "p90": 98.65599870681763, + "p95": 103.32799702882767, + "p99": 114.3999993801117 + }, + "combine": { + "p50": 65.92000275850296, + "p90": 79.29600030183792, + "p95": 80.44800162315369, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 117.53600090742111, + "p90": 150.36800503730774, + "p95": 152.63999998569489, + "p99": 155.7759940624237 + }, + "isolatedSum": { + "p50": 144.03200149536133, + "p90": 177.95199900865555, + "p95": 183.77599865198135, + "p99": 195.71200013160706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.35200220346451, + "p90": 136.1279934644699, + "p95": 138.91200721263885, + "p99": 147.2959965467453 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 96.3200032711029, + "p95": 102.11200267076492, + "p99": 104.3199971318245 + }, + "roundtrip": { + "p50": 133.760005235672, + "p90": 191.16799533367157, + "p95": 192.73599982261658, + "p99": 197.9839950799942 + }, + "isolatedSum": { + "p50": 162.3999997973442, + "p90": 232.44799673557281, + "p95": 241.02400988340378, + "p99": 251.6159936785698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1284.8639488220215, - "p90": 1296.3199615478516, - "p95": 1299.7759580612183, - "p99": 1306.5279722213745 + "p50": 95.83999961614609, + "p90": 113.6000007390976, + "p95": 117.88800358772278, + "p99": 121.79200351238251 }, "combine": { - "p50": 1503.5840272903442, - "p90": 1517.2799825668335, - "p95": 1524.2880582809448, - "p99": 1540.0960445404053 + "p50": 88.28800171613693, + "p90": 96.16000205278397, + "p95": 96.6079980134964, + "p99": 104.09600287675858 }, "roundtrip": { - "p50": 2760.960102081299, - "p90": 2775.10404586792, - "p95": 2783.936023712158, - "p99": 2810.0481033325195 + "p50": 159.42400693893433, + "p90": 173.3119934797287, + "p95": 175.135999917984, + "p99": 178.01600694656372 }, "isolatedSum": { - "p50": 2788.4479761123657, - "p90": 2813.599944114685, - "p95": 2824.064016342163, - "p99": 2846.62401676178 + "p50": 184.12800133228302, + "p90": 209.76000279188156, + "p95": 214.49600160121918, + "p99": 225.88800638914108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2267840512, - "combineLogicalBytes": 2267840512, - "fanoutMean": 4.82763671875, - "recvTokensMax": 31357, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, "stragglerRank": 2, "correct": true, "samplesPooled": 600, @@ -34298,37 +35735,38 @@ ] }, { - "id": "cx-2b98c773", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", - "colorKey": "h100_e8b903ea", - "comparisonKey": "5961b4bc09451ca4", + "id": "cx-1c34e3d1", + "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_ff7906f8", + "comparisonKey": "ad5ebda2342035d4", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:35.470349+00:00", + "generatedAt": "2026-06-26T23:51:21.600015+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_16", + "runner": "h100-dgxc-slurm_04", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "label": "H100 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -34350,239 +35788,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "27ddc85ded0add9", - "workloadId": "set:6:a224603e5a1640b8", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.545684814453125, - "eplbImbalanceAfter": 1.0001495361328125, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271968791", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271968791", - "createdAt": "2026-06-26T23:59:34Z", + "id": "28271684428", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271684428", + "createdAt": "2026-06-26T23:51:21.600015+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 112.41599917411804, - "p90": 117.18399822711945, - "p95": 118.9119964838028, - "p99": 122.91199713945389 + "p50": 98.4639972448349, + "p90": 106.52799904346466, + "p95": 128.12800705432892, + "p99": 158.87999534606934 }, "combine": { - "p50": 106.33599758148193, - "p90": 112.12799698114395, - "p95": 113.0559965968132, - "p99": 114.43199962377548 + "p50": 66.52799993753433, + "p90": 73.34399968385696, + "p95": 81.34400099515915, + "p99": 91.96799993515015 }, "roundtrip": { - "p50": 198.81600141525269, - "p90": 204.03200387954712, - "p95": 205.4080069065094, - "p99": 207.58399367332458 + "p50": 139.42399621009827, + "p90": 146.84799313545227, + "p95": 150.56000649929047, + "p99": 186.81600689888 }, "isolatedSum": { - "p50": 218.75199675559998, - "p90": 229.3119952082634, - "p95": 231.967993080616, - "p99": 237.34399676322937 + "p50": 164.99199718236923, + "p90": 179.87199872732162, + "p95": 209.47200804948807, + "p99": 250.84799528121948 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78159872, - "combineLogicalBytes": 78159872, - "fanoutMean": 5.32421875, - "recvTokensMax": 702, - "stragglerRank": 5, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 148.44800531864166, - "p90": 151.99999511241913, - "p95": 153.3759981393814, - "p99": 156.3519984483719 + "p50": 71.58400118350983, + "p90": 103.45599800348282, + "p95": 106.39999806880951, + "p99": 124.67200309038162 }, "combine": { - "p50": 149.47199821472168, - "p90": 155.39200603961945, - "p95": 159.39199924468994, - "p99": 164.06400501728058 + "p50": 64.06400352716446, + "p90": 72.92799651622772, + "p95": 73.31199944019318, + "p99": 74.43200051784515 }, "roundtrip": { - "p50": 267.4880027770996, - "p90": 272.2879946231842, - "p95": 274.04800057411194, - "p99": 279.4879972934723 + "p50": 117.53600090742111, + "p90": 144.41600441932678, + "p95": 147.71200716495514, + "p99": 173.5360026359558 }, "isolatedSum": { - "p50": 297.92000353336334, - "p90": 307.3920011520386, - "p95": 312.76799738407135, - "p99": 320.41600346565247 + "p50": 135.6480047106743, + "p90": 176.38399451971054, + "p95": 179.71199750900269, + "p99": 199.10400360822678 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156563456, - "combineLogicalBytes": 156563456, - "fanoutMean": 5.33251953125, - "recvTokensMax": 1393, - "stragglerRank": 5, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 201.12000405788422, - "p90": 204.48000729084015, - "p95": 206.04799687862396, - "p99": 212.22400665283203 + "p50": 92.70399808883667, + "p90": 100.47999769449234, + "p95": 102.75200009346008, + "p99": 106.23999685049057 }, "combine": { - "p50": 229.0239930152893, - "p90": 233.95200073719025, - "p95": 236.4480048418045, - "p99": 238.52799832820892 + "p50": 66.01600348949432, + "p90": 72.38399982452393, + "p95": 72.86400347948074, + "p99": 75.6160020828247 }, "roundtrip": { - "p50": 404.06399965286255, - "p90": 408.86399149894714, - "p95": 411.0719859600067, - "p99": 431.5840005874634 + "p50": 134.33599472045898, + "p90": 143.77599954605103, + "p95": 146.08000218868256, + "p99": 149.82399344444275 }, "isolatedSum": { - "p50": 430.1439970731735, - "p90": 438.4320080280304, - "p95": 442.49600172042847, - "p99": 450.75200498104095 + "p50": 158.720001578331, + "p90": 172.86399751901627, + "p95": 175.61600357294083, + "p99": 181.85599893331528 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312410112, - "combineLogicalBytes": 312410112, - "fanoutMean": 5.3203125, - "recvTokensMax": 2773, - "stragglerRank": 5, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 304.0960133075714, - "p90": 309.28000807762146, - "p95": 311.64801120758057, - "p99": 479.5520007610321 + "p50": 97.75999933481216, + "p90": 105.8880016207695, + "p95": 129.66400384902954, + "p99": 177.44000256061554 }, "combine": { - "p50": 366.11199378967285, - "p90": 372.8959858417511, - "p95": 374.55999851226807, - "p99": 383.4559917449951 + "p50": 71.32799923419952, + "p90": 74.65600222349167, + "p95": 81.53600245714188, + "p99": 92.00000017881393 }, "roundtrip": { - "p50": 644.0640091896057, - "p90": 650.1439809799194, - "p95": 652.1919965744019, - "p99": 656.5120220184326 + "p50": 140.99200069904327, + "p90": 149.6960073709488, + "p95": 159.19999778270721, + "p99": 189.43999707698822 }, "isolatedSum": { - "p50": 670.2080070972443, - "p90": 682.1759939193726, - "p95": 686.2080097198486, - "p99": 863.0079925060272 + "p50": 169.0879985690117, + "p90": 180.54400384426117, + "p95": 211.20000630617142, + "p99": 269.4400027394295 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 622712832, - "combineLogicalBytes": 622712832, - "fanoutMean": 5.3023681640625, - "recvTokensMax": 5498, - "stragglerRank": 5, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 521.5039849281311, - "p90": 530.1120281219482, - "p95": 533.3759784698486, - "p99": 540.5120253562927 + "p50": 97.56799787282944, + "p90": 100.99200159311295, + "p95": 104.3199971318245, + "p99": 107.42399841547012 }, "combine": { - "p50": 632.1920156478882, - "p90": 639.3280029296875, - "p95": 640.9599781036377, - "p99": 647.2960114479065 + "p50": 71.6480016708374, + "p90": 73.7600028514862, + "p95": 75.00799745321274, + "p99": 80.92799782752991 }, "roundtrip": { - "p50": 1123.9999532699585, - "p90": 1132.8959465026855, - "p95": 1135.807991027832, - "p99": 1143.5840129852295 + "p50": 142.68800616264343, + "p90": 150.30400454998016, + "p95": 154.2080044746399, + "p99": 156.09599649906158 }, "isolatedSum": { - "p50": 1153.6960005760193, - "p90": 1169.4400310516357, - "p95": 1174.3359565734863, - "p99": 1187.8080368041992 + "p50": 169.21599954366684, + "p90": 174.75200444459915, + "p95": 179.32799458503723, + "p99": 188.35199624300003 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1245038592, - "combineLogicalBytes": 1245038592, - "fanoutMean": 5.30072021484375, - "recvTokensMax": 10955, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 990.2399778366089, - "p90": 1009.4720125198364, - "p95": 1016.1279439926147, - "p99": 1026.8160104751587 + "p50": 79.80799674987793, + "p90": 99.55199807882309, + "p95": 101.27999633550644, + "p99": 106.08000308275223 }, "combine": { - "p50": 1164.736032485962, - "p90": 1174.015998840332, - "p95": 1177.2799491882324, - "p99": 1183.9359998703003 + "p50": 66.68800115585327, + "p90": 76.03199779987335, + "p95": 80.38400113582611, + "p99": 81.31200075149536 }, "roundtrip": { - "p50": 2116.895914077759, - "p90": 2137.7599239349365, - "p95": 2143.712043762207, - "p99": 2157.8240394592285 + "p50": 123.87199699878693, + "p90": 150.27199685573578, + "p95": 152.16000378131866, + "p99": 155.4879993200302 }, "isolatedSum": { - "p50": 2154.976010322571, - "p90": 2183.4880113601685, - "p95": 2193.407893180847, - "p99": 2210.752010345459 + "p50": 146.4959979057312, + "p90": 175.58399587869644, + "p95": 181.66399747133255, + "p99": 187.3920038342476 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2489460736, - "combineLogicalBytes": 2489460736, - "fanoutMean": 5.299407958984375, - "recvTokensMax": 21864, - "stragglerRank": 5, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.28800368309021, + "p90": 107.96800255775452, + "p95": 109.47199910879135, + "p99": 119.90399658679962 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 87.71199733018875, + "p95": 89.1840010881424, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 151.8079936504364, + "p90": 162.59199380874634, + "p95": 164.06400501728058, + "p99": 168.57600212097168 + }, + "isolatedSum": { + "p50": 181.40800297260284, + "p90": 195.67999988794327, + "p95": 198.65600019693375, + "p99": 210.04799753427505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.69600015878677, + "p90": 115.99999666213989, + "p95": 118.1119978427887, + "p99": 124.83199685811996 + }, + "combine": { + "p50": 88.3840024471283, + "p90": 97.4079966545105, + "p95": 97.88800030946732, + "p99": 100.38399696350098 + }, + "roundtrip": { + "p50": 161.72799468040466, + "p90": 177.2480010986328, + "p95": 181.15200102329254, + "p99": 415.48800468444824 + }, + "isolatedSum": { + "p50": 194.08000260591507, + "p90": 213.4079933166504, + "p95": 215.999998152256, + "p99": 225.21599382162094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34590,34 +36102,35 @@ ] }, { - "id": "cx-0a66c8a3", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h100_552a4b73", - "comparisonKey": "44cbfb11e1668dc5", + "id": "cx-8988cd24", + "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_ff7906f8", + "comparisonKey": "c91a22e0dde262e4", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:00.044863+00:00", + "generatedAt": "2026-06-26T23:51:51.137960+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", + "runner": "h100-dgxc-slurm_18", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "label": "H100 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -34642,8 +36155,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:6709a02c31933a9f", + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -34651,249 +36164,323 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271978834", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271978834", - "createdAt": "2026-06-26T23:59:54Z", + "id": "28271699258", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271699258", + "createdAt": "2026-06-26T23:51:51.137960+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 124.7360035777092, - "p90": 130.68799674510956, - "p95": 132.03200697898865, - "p99": 136.4479959011078 + "p50": 95.551997423172, + "p90": 107.04000294208527, + "p95": 120.38400024175644, + "p99": 156.00000321865082 }, "combine": { - "p50": 112.5119999051094, - "p90": 114.17599767446518, - "p95": 115.07199704647064, - "p99": 120.67200243473053 + "p50": 71.19999825954437, + "p90": 78.84799689054489, + "p95": 81.15199953317642, + "p99": 97.56799787282944 }, "roundtrip": { - "p50": 215.16799926757812, - "p90": 219.35999393463135, - "p95": 221.11999988555908, - "p99": 229.18400168418884 + "p50": 140.25600254535675, + "p90": 152.319997549057, + "p95": 169.8240041732788, + "p99": 207.68000185489655 }, "isolatedSum": { - "p50": 237.2480034828186, - "p90": 244.86399441957474, - "p95": 247.1040040254593, - "p99": 257.1199983358383 + "p50": 166.75199568271637, + "p90": 185.88799983263016, + "p95": 201.53599977493286, + "p99": 253.56800109148026 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 163.7440025806427, - "p90": 167.26399958133698, - "p95": 168.44800114631653, - "p99": 174.6560037136078 + "p50": 68.4799998998642, + "p90": 104.12800312042236, + "p95": 121.69600278139114, + "p99": 155.13600409030914 }, "combine": { - "p50": 164.51199352741241, - "p90": 169.50400173664093, - "p95": 170.1440066099167, - "p99": 174.14399981498718 + "p50": 64.80000168085098, + "p90": 79.00799810886383, + "p95": 88.06400001049042, + "p99": 103.39199751615524 }, "roundtrip": { - "p50": 297.91998863220215, - "p90": 302.72001028060913, - "p95": 304.32000756263733, - "p99": 306.5600097179413 + "p50": 119.6800023317337, + "p90": 147.32800424098969, + "p95": 149.08799529075623, + "p99": 153.888002038002 }, "isolatedSum": { - "p50": 328.2559961080551, - "p90": 336.7680013179779, - "p95": 338.5920077562332, - "p99": 348.80000352859497 + "p50": 133.28000158071518, + "p90": 183.1360012292862, + "p95": 209.76000279188156, + "p99": 258.5280016064644 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 237.44000494480133, - "p90": 241.82400107383728, - "p95": 243.0720031261444, - "p99": 247.74399399757385 + "p50": 70.01599669456482, + "p90": 98.27200323343277, + "p95": 101.47199779748917, + "p99": 114.33599889278412 }, "combine": { - "p50": 264.51200246810913, - "p90": 268.41598749160767, - "p95": 271.5519964694977, - "p99": 281.6320061683655 + "p50": 65.08799642324448, + "p90": 78.8159966468811, + "p95": 79.23199981451035, + "p99": 85.95199882984161 }, "roundtrip": { - "p50": 475.5840003490448, - "p90": 482.59198665618896, - "p95": 490.30399322509766, - "p99": 504.96000051498413 + "p50": 119.03999745845795, + "p90": 149.98400211334229, + "p95": 151.8079936504364, + "p99": 158.33599865436554 }, "isolatedSum": { - "p50": 501.95200741291046, - "p90": 510.23998856544495, - "p95": 514.6239995956421, - "p99": 529.3760001659393 + "p50": 135.1039931178093, + "p90": 177.08799988031387, + "p95": 180.7039976119995, + "p99": 200.28799772262573 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 379.040002822876, - "p90": 385.72800159454346, - "p95": 388.2240056991577, - "p99": 414.3359959125519 + "p50": 70.23999840021133, + "p90": 97.79199957847595, + "p95": 102.01600193977356, + "p99": 116.67200177907944 }, "combine": { - "p50": 447.00801372528076, - "p90": 452.4799883365631, - "p95": 453.5039961338043, - "p99": 456.89600706100464 + "p50": 65.47199934720993, + "p90": 79.0719985961914, + "p95": 79.64800298213959, + "p99": 87.67999708652496 }, "roundtrip": { - "p50": 800.2240061759949, - "p90": 805.791974067688, - "p95": 807.744026184082, - "p99": 811.680018901825 + "p50": 118.367999792099, + "p90": 150.4639983177185, + "p95": 155.68000078201294, + "p99": 188.25599551200867 }, "isolatedSum": { - "p50": 826.0480165481567, - "p90": 838.2079899311066, - "p95": 841.728001832962, - "p99": 871.2320029735565 + "p50": 135.71199774742126, + "p90": 176.86399817466736, + "p95": 181.66400492191315, + "p99": 204.3519988656044 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 4, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 675.3919720649719, - "p90": 695.6800222396851, - "p95": 707.8400254249573, - "p99": 910.8160138130188 + "p50": 95.10400146245956, + "p90": 101.34399682283401, + "p95": 105.6319996714592, + "p99": 117.11999773979187 }, "combine": { - "p50": 819.2319869995117, - "p90": 829.6639919281006, - "p95": 833.2160115242004, - "p99": 841.3439989089966 + "p50": 69.11999732255936, + "p90": 79.42400127649307, + "p95": 80.03199845552444, + "p99": 86.87999844551086 }, "roundtrip": { - "p50": 1459.9679708480835, - "p90": 1476.9599437713623, - "p95": 1481.8559885025024, - "p99": 1501.2799501419067 + "p50": 120.03199756145477, + "p90": 147.039994597435, + "p95": 149.72800016403198, + "p99": 158.55999290943146 }, "isolatedSum": { - "p50": 1494.6239590644836, - "p90": 1525.3440141677856, - "p95": 1541.0560369491577, - "p99": 1752.1600127220154 + "p50": 164.22399878501892, + "p90": 180.7679980993271, + "p95": 185.66399812698364, + "p99": 203.99999618530273 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 2, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1275.10404586792, - "p90": 1287.5200510025024, - "p95": 1291.8399572372437, - "p99": 1346.0479974746704 + "p50": 77.47200131416321, + "p90": 103.16800326108932, + "p95": 109.72800105810165, + "p99": 237.37600445747375 }, "combine": { - "p50": 1538.7200117111206, - "p90": 1550.3679513931274, - "p95": 1555.232048034668, - "p99": 1607.9360246658325 + "p50": 71.99999690055847, + "p90": 87.13600039482117, + "p95": 95.20000219345093, + "p99": 104.16000336408615 }, "roundtrip": { - "p50": 2787.168025970459, - "p90": 2798.784017562866, - "p95": 2802.9439449310303, - "p99": 2818.4640407562256 + "p50": 146.14400267601013, + "p90": 166.52800142765045, + "p95": 171.1679995059967, + "p99": 366.0160005092621 }, "isolatedSum": { - "p50": 2813.8240575790405, - "p90": 2837.88800239563, - "p95": 2847.0720052719116, - "p99": 2953.984022140503 + "p50": 149.47199821472168, + "p90": 190.3040036559105, + "p95": 204.92800325155258, + "p99": 341.5360078215599 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 6, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.67199784517288, + "p90": 111.04000359773636, + "p95": 113.79200220108032, + "p99": 126.68800354003906 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 88.67199718952179, + "p95": 95.45599669218063, + "p99": 96.28800302743912 + }, + "roundtrip": { + "p50": 147.5840061903, + "p90": 168.96000504493713, + "p95": 170.9440052509308, + "p99": 174.9120056629181 + }, + "isolatedSum": { + "p50": 173.75999689102173, + "p90": 199.71200078725815, + "p95": 209.24799889326096, + "p99": 222.97600656747818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.15200346708298, + "p90": 120.7680031657219, + "p95": 122.68800288438797, + "p99": 131.29599392414093 + }, + "combine": { + "p50": 95.90400010347366, + "p90": 104.67199981212616, + "p95": 112.60800063610077, + "p99": 267.5839960575104 + }, + "roundtrip": { + "p50": 173.0239987373352, + "p90": 194.17600333690643, + "p95": 195.90400159358978, + "p99": 308.351993560791 + }, + "isolatedSum": { + "p50": 201.05600357055664, + "p90": 225.44000297784805, + "p95": 235.29600352048874, + "p99": 398.8799899816513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } ] }, { - "id": "cx-7114a01f", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h100_106a51ab", - "comparisonKey": "80b7db884aaf5a8c", + "id": "cx-1d6bf339", + "identity": "h100|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_42947950", + "comparisonKey": "4f849813bdf740d5", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:17.822701+00:00", + "generatedAt": "2026-06-27T11:13:11.578821+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", + "runner": "h100-dgxc-slurm_11", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -34903,16 +36490,17 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "label": "H100 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -34934,238 +36522,312 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:6709a02c31933a9f", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271982260", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271982260", - "createdAt": "2026-06-27T00:00:01Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287492752", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287492752", + "createdAt": "2026-06-27T11:13:11.578821+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 114.30399864912033, - "p90": 120.31999975442886, - "p95": 121.56800180673599, - "p99": 125.02400577068329 + "p50": 96.47999703884125, + "p90": 105.56799918413162, + "p95": 109.15199667215347, + "p99": 129.66400384902954 }, "combine": { - "p50": 106.27199709415436, - "p90": 111.48799955844879, - "p95": 111.77600175142288, - "p99": 114.1119971871376 + "p50": 74.46400076150894, + "p90": 80.73599636554718, + "p95": 81.37600123882294, + "p99": 85.05599945783615 }, "roundtrip": { - "p50": 198.0160027742386, - "p90": 201.82399451732635, - "p95": 203.36000621318817, - "p99": 207.35999941825867 + "p50": 146.40000462532043, + "p90": 152.73599326610565, + "p95": 154.52800691127777, + "p99": 157.79200196266174 }, "isolatedSum": { - "p50": 220.5759957432747, - "p90": 231.80799931287766, - "p95": 233.34400355815887, - "p99": 239.1360029578209 + "p50": 170.9439978003502, + "p90": 186.3039955496788, + "p95": 190.5279979109764, + "p99": 214.7200033068657 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 5, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 144.9279934167862, - "p90": 149.85600113868713, - "p95": 151.45599842071533, - "p99": 155.87200224399567 + "p50": 97.31200337409973, + "p90": 103.32799702882767, + "p95": 104.89600151777267, + "p99": 109.27999764680862 }, "combine": { - "p50": 151.19999647140503, - "p90": 154.84799444675446, - "p95": 156.63999319076538, - "p99": 160.73599457740784 + "p50": 74.8480036854744, + "p90": 80.28800040483475, + "p95": 81.69600367546082, + "p99": 86.33600175380707 }, "roundtrip": { - "p50": 266.11199975013733, - "p90": 271.5519964694977, - "p95": 273.6000120639801, - "p99": 277.1199941635132 + "p50": 144.16000247001648, + "p90": 152.19199657440186, + "p95": 154.52800691127777, + "p99": 164.8319959640503 }, "isolatedSum": { - "p50": 296.1279898881912, - "p90": 304.7039955854416, - "p95": 308.0959916114807, - "p99": 316.6079968214035 + "p50": 172.16000705957413, + "p90": 183.61599743366241, + "p95": 186.5920051932335, + "p99": 195.6159994006157 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 200.32000541687012, - "p90": 204.12799715995789, - "p95": 205.4399996995926, - "p99": 208.38400721549988 + "p50": 95.96800059080124, + "p90": 102.39999741315842, + "p95": 104.8320010304451, + "p99": 110.88000237941742 }, "combine": { - "p50": 227.58400440216064, - "p90": 233.75999927520752, - "p95": 234.55999791622162, - "p99": 238.3359968662262 + "p50": 74.46400076150894, + "p90": 79.77599650621414, + "p95": 81.216000020504, + "p99": 83.96799862384796 }, "roundtrip": { - "p50": 402.0479917526245, - "p90": 407.1039855480194, - "p95": 408.735990524292, - "p99": 412.06398606300354 + "p50": 144.16000247001648, + "p90": 152.28800475597382, + "p95": 155.03999590873718, + "p99": 161.40800714492798 }, "isolatedSum": { - "p50": 427.90400981903076, - "p90": 437.8879964351654, - "p95": 439.9999976158142, - "p99": 446.7200040817261 + "p50": 170.43200135231018, + "p90": 182.17599391937256, + "p95": 186.0480010509491, + "p99": 194.84800100326538 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 303.16799879074097, - "p90": 307.3920011520386, - "p95": 308.76800417900085, - "p99": 313.27998638153076 + "p50": 95.58399766683578, + "p90": 103.58399897813797, + "p95": 106.49599879980087, + "p99": 112.2559979557991 }, "combine": { - "p50": 362.2399866580963, - "p90": 368.76800656318665, - "p95": 370.3039884567261, - "p99": 372.70399928092957 + "p50": 75.6160020828247, + "p90": 81.60000294446945, + "p95": 82.65600353479385, + "p99": 87.00799942016602 }, "roundtrip": { - "p50": 641.1839723587036, - "p90": 647.9359865188599, - "p95": 650.7520079612732, - "p99": 656.6399931907654 + "p50": 147.2959965467453, + "p90": 154.23999726772308, + "p95": 156.44800662994385, + "p99": 160.73599457740784 }, "isolatedSum": { - "p50": 665.4079854488373, - "p90": 676.1600077152252, - "p95": 679.0719926357269, - "p99": 685.9839856624603 + "p50": 171.1999997496605, + "p90": 185.18400192260742, + "p95": 189.15200233459473, + "p99": 199.26399737596512 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 520.9919810295105, - "p90": 531.4239859580994, - "p95": 534.4640016555786, - "p99": 541.1840081214905 + "p50": 96.0640013217926, + "p90": 102.30399668216705, + "p95": 105.59999942779541, + "p99": 112.47999966144562 }, "combine": { - "p50": 639.3600106239319, - "p90": 650.592029094696, - "p95": 654.5600295066833, - "p99": 660.4800224304199 + "p50": 78.68800312280655, + "p90": 81.95199817419052, + "p95": 83.71199667453766, + "p99": 89.47200328111649 }, "roundtrip": { - "p50": 1128.864049911499, - "p90": 1138.2720470428467, - "p95": 1141.2479877471924, - "p99": 1146.3040113449097 + "p50": 149.05600249767303, + "p90": 155.42399883270264, + "p95": 158.84800255298615, + "p99": 165.6319946050644 }, "isolatedSum": { - "p50": 1160.3519916534424, - "p90": 1182.0160150527954, - "p95": 1189.024031162262, - "p99": 1201.6640305519104 + "p50": 174.75200444459915, + "p90": 184.25599485635757, + "p95": 189.31199610233307, + "p99": 201.9520029425621 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 5, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1005.5999755859375, - "p90": 1031.7120552062988, - "p95": 1038.3360385894775, - "p99": 1051.103949546814 + "p50": 96.0640013217926, + "p90": 104.16000336408615, + "p95": 105.6319996714592, + "p99": 113.27999830245972 }, "combine": { - "p50": 1158.9759588241577, - "p90": 1167.8719520568848, - "p95": 1169.9199676513672, - "p99": 1174.6560335159302 + "p50": 82.71999657154083, + "p90": 87.99999952316284, + "p95": 88.99199962615967, + "p99": 91.20000153779984 }, "roundtrip": { - "p50": 2121.5360164642334, - "p90": 2138.2720470428467, - "p95": 2142.6239013671875, - "p99": 2150.0160694122314 + "p50": 150.27199685573578, + "p90": 159.8079949617386, + "p95": 162.08000481128693, + "p99": 168.92799735069275 }, "isolatedSum": { - "p50": 2164.575934410095, - "p90": 2199.5840072631836, - "p95": 2208.2560062408447, - "p99": 2225.759983062744 + "p50": 178.78399789333344, + "p90": 192.160002887249, + "p95": 194.62399929761887, + "p99": 204.47999984025955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.744000852108, + "p90": 114.9120032787323, + "p95": 116.22399836778641, + "p99": 124.06399846076965 + }, + "combine": { + "p50": 91.96799993515015, + "p90": 96.3520035147667, + "p95": 97.6639986038208, + "p99": 103.61599922180176 + }, + "roundtrip": { + "p50": 164.60800170898438, + "p90": 177.40799486637115, + "p95": 179.26399409770966, + "p99": 182.3360025882721 + }, + "isolatedSum": { + "p50": 199.71200078725815, + "p90": 211.264006793499, + "p95": 213.8879969716072, + "p99": 227.6799976825714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.86399644613266, + "p90": 128.92800569534302, + "p95": 130.65600395202637, + "p99": 136.7039978504181 + }, + "combine": { + "p50": 107.16799646615982, + "p90": 112.12799698114395, + "p95": 112.99200356006622, + "p99": 115.29599875211716 + }, + "roundtrip": { + "p50": 197.76000082492828, + "p90": 205.4080069065094, + "p95": 210.4959934949875, + "p99": 479.45600748062134 + }, + "isolatedSum": { + "p50": 228.03199291229248, + "p90": 241.05600267648697, + "p95": 243.6480075120926, + "p99": 251.99999660253525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -35174,37 +36836,38 @@ ] }, { - "id": "cx-71b6107f", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h100_769b9c4b", - "comparisonKey": "24fc2cc385891299", + "id": "cx-d5af8f11", + "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ff7906f8", + "comparisonKey": "bb40f1d7fb8ef5bf", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:08.090138+00:00", + "generatedAt": "2026-06-26T23:52:15.657129+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", + "runner": "h100-dgxc-slurm_00", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", + "measurementContract": "runtime-visible-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "label": "H100 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -35226,239 +36889,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271955196", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271955196", - "createdAt": "2026-06-26T23:59:06Z", + "id": "28271714089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271714089", + "createdAt": "2026-06-26T23:52:15.657129+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 111.07199639081955, - "p90": 115.93600362539291, - "p95": 118.14399808645248, - "p99": 121.08799815177917 + "p50": 95.51999717950821, + "p90": 104.99200224876404, + "p95": 123.16799908876419, + "p99": 153.05599570274353 }, "combine": { - "p50": 106.08000308275223, - "p90": 111.26399785280228, - "p95": 112.38399893045425, - "p99": 114.14399743080139 + "p50": 74.0479975938797, + "p90": 82.36800134181976, + "p95": 90.65599739551544, + "p99": 115.13599753379822 }, "roundtrip": { - "p50": 195.68000733852386, - "p90": 201.1840045452118, - "p95": 202.39999890327454, - "p99": 204.96000349521637 + "p50": 144.73600685596466, + "p90": 151.7760008573532, + "p95": 153.9199948310852, + "p99": 191.74399971961975 }, "isolatedSum": { - "p50": 217.15199947357178, - "p90": 227.2000014781952, - "p95": 230.52799701690674, - "p99": 235.23199558258057 + "p50": 169.5679947733879, + "p90": 187.3600035905838, + "p95": 213.82399648427963, + "p99": 268.19199323654175 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 5, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 144.48000490665436, - "p90": 148.0640023946762, - "p95": 149.6960073709488, - "p99": 153.60000729560852 + "p50": 71.26399874687195, + "p90": 102.4319976568222, + "p95": 104.47999835014343, + "p99": 143.48800480365753 }, "combine": { - "p50": 148.92800152301788, - "p90": 154.33600544929504, - "p95": 155.008003115654, - "p99": 157.8879952430725 + "p50": 67.77600198984146, + "p90": 81.15199953317642, + "p95": 81.727996468544, + "p99": 87.71199733018875 }, "roundtrip": { - "p50": 262.81601190567017, - "p90": 266.975998878479, - "p95": 268.3199942111969, - "p99": 272.44800329208374 + "p50": 124.03199821710587, + "p90": 153.02400290966034, + "p95": 154.94400262832642, + "p99": 158.36800634860992 }, "isolatedSum": { - "p50": 293.40800642967224, - "p90": 302.40000784397125, - "p95": 304.7040104866028, - "p99": 311.48800253868103 + "p50": 139.0400007367134, + "p90": 183.58399718999863, + "p95": 186.20799481868744, + "p99": 231.20000213384628 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 5, - "correct": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 199.68000054359436, - "p90": 203.42400670051575, - "p95": 205.47200739383698, - "p99": 222.52799570560455 + "p50": 91.58399701118469, + "p90": 103.20000350475311, + "p95": 104.032002389431, + "p99": 107.58399963378906 }, "combine": { - "p50": 227.80799865722656, - "p90": 232.9919934272766, - "p95": 234.3679964542389, - "p99": 237.34399676322937 + "p50": 74.20799881219864, + "p90": 80.64000308513641, + "p95": 81.31200075149536, + "p99": 82.49600231647491 }, "roundtrip": { - "p50": 399.83999729156494, - "p90": 405.023992061615, - "p95": 406.3040018081665, - "p99": 414.43198919296265 + "p50": 145.79200744628906, + "p90": 152.38399803638458, + "p95": 154.55999970436096, + "p99": 172.38399386405945 }, "isolatedSum": { - "p50": 427.4879992008209, - "p90": 436.41600012779236, - "p95": 439.84000384807587, - "p99": 459.8719924688339 + "p50": 165.79199582338333, + "p90": 183.84000658988953, + "p95": 185.34400314092636, + "p99": 190.08000195026398 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 5, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 303.5840094089508, - "p90": 309.471994638443, - "p95": 310.4960024356842, - "p99": 313.82399797439575 + "p50": 92.6079973578453, + "p90": 103.00800204277039, + "p95": 114.46399986743927, + "p99": 149.98400211334229 }, "combine": { - "p50": 362.8480136394501, - "p90": 367.74399876594543, - "p95": 369.6320056915283, - "p99": 523.7119793891907 + "p50": 76.1599987745285, + "p90": 82.49600231647491, + "p95": 86.68799698352814, + "p99": 95.77599912881851 }, "roundtrip": { - "p50": 640.8320069313049, - "p90": 648.576021194458, - "p95": 651.2960195541382, - "p99": 733.4399819374084 + "p50": 146.84799313545227, + "p90": 161.40800714492798, + "p95": 192.09599494934082, + "p99": 203.74399423599243 }, "isolatedSum": { - "p50": 666.4320230484009, - "p90": 677.2159934043884, - "p95": 680.1280081272125, - "p99": 837.5359773635864 + "p50": 168.7679961323738, + "p90": 185.5040043592453, + "p95": 201.1519968509674, + "p99": 245.7600012421608 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 5, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 524.3200063705444, - "p90": 533.5680246353149, - "p95": 536.191999912262, - "p99": 542.2080159187317 + "p50": 91.2960022687912, + "p90": 99.16800260543823, + "p95": 101.1200025677681, + "p99": 107.68000036478043 }, "combine": { - "p50": 643.9039707183838, - "p90": 653.1839966773987, - "p95": 655.8719873428345, - "p99": 661.1520051956177 + "p50": 77.37600058317184, + "p90": 81.53600245714188, + "p95": 82.24000036716461, + "p99": 87.13600039482117 }, "roundtrip": { - "p50": 1135.2959871292114, - "p90": 1144.8320150375366, - "p95": 1148.4800577163696, - "p99": 1153.92005443573 + "p50": 150.30400454998016, + "p90": 157.05600380897522, + "p95": 158.9760035276413, + "p99": 162.49600052833557 }, "isolatedSum": { - "p50": 1168.2239770889282, - "p90": 1186.7520213127136, - "p95": 1192.0639872550964, - "p99": 1203.3600211143494 + "p50": 168.67200285196304, + "p90": 180.7040050625801, + "p95": 183.3600029349327, + "p99": 194.8160007596016 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 5, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1020.4800367355347, - "p90": 1048.8959550857544, - "p95": 1056.2560558319092, - "p99": 1071.4880228042603 + "p50": 96.03200107812881, + "p90": 103.90400141477585, + "p95": 107.68000036478043, + "p99": 194.815993309021 }, "combine": { - "p50": 1164.6720170974731, - "p90": 1173.375964164734, - "p95": 1177.024006843567, - "p99": 1183.135986328125 + "p50": 80.51200211048126, + "p90": 87.00799942016602, + "p95": 90.55999666452408, + "p99": 383.7119936943054 }, "roundtrip": { - "p50": 2140.575885772705, - "p90": 2157.248020172119, - "p95": 2164.031982421875, - "p99": 2171.4560985565186 + "p50": 134.97599959373474, + "p90": 158.27199816703796, + "p95": 171.36000096797943, + "p99": 204.0960043668747 }, "isolatedSum": { - "p50": 2185.152053833008, - "p90": 2222.2719192504883, - "p95": 2233.280062675476, - "p99": 2254.6240091323853 + "p50": 176.54400318861008, + "p90": 190.91200083494186, + "p95": 198.2399970293045, + "p99": 578.5279870033264 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 5, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.61599987745285, + "p90": 114.49600011110306, + "p95": 116.35199934244156, + "p99": 122.84799665212631 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 98.2080027461052, + "p95": 98.68799895048141, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 167.84000396728516, + "p90": 173.567995429039, + "p95": 175.90400576591492, + "p99": 179.4240027666092 + }, + "isolatedSum": { + "p50": 199.68000054359436, + "p90": 212.70400285720825, + "p95": 215.03999829292297, + "p99": 225.3119945526123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.26399981975555, + "p90": 130.68799674510956, + "p95": 132.83200562000275, + "p99": 148.0959951877594 + }, + "combine": { + "p50": 106.6880002617836, + "p90": 114.23999816179276, + "p95": 115.23199826478958, + "p99": 137.85600662231445 + }, + "roundtrip": { + "p50": 197.60000705718994, + "p90": 204.8639953136444, + "p95": 207.07200467586517, + "p99": 225.8879989385605 + }, + "isolatedSum": { + "p50": 229.95200008153915, + "p90": 244.9279949069023, + "p95": 248.06400388479233, + "p99": 285.95200181007385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35466,28 +37203,29 @@ ] }, { - "id": "cx-19a8d159", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_7b3247bf", - "comparisonKey": "0ac8f8817cb63abb", + "id": "cx-7171c240", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "1fe2184d83233e7e", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:47.651979+00:00", + "generatedAt": "2026-06-27T00:05:32.898956+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", + "runner": "h100-dgxc-slurm_03", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -35498,18 +37236,18 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "fp8-saturation", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -35518,8 +37256,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -35527,230 +37265,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254315809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", - "createdAt": "2026-06-26T17:26:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28272125238", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272125238", + "createdAt": "2026-06-27T00:05:32.898956+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 110.46399921178818, - "p90": 116.35199934244156, - "p95": 117.8240031003952, - "p99": 166.01599752902985 + "p50": 94.7519987821579, + "p90": 118.49600076675415, + "p95": 129.60000336170197, + "p99": 144.31999623775482 }, "combine": { - "p50": 106.1440035700798, - "p90": 111.51999980211258, - "p95": 112.06399649381638, - "p99": 114.07999694347382 + "p50": 76.64000242948532, + "p90": 87.2960016131401, + "p95": 90.52799642086029, + "p99": 103.10400277376175 }, "roundtrip": { - "p50": 197.40800559520721, - "p90": 200.9280025959015, - "p95": 203.0400037765503, - "p99": 206.01600408554077 + "p50": 147.2640037536621, + "p90": 170.30400037765503, + "p95": 184.89600718021393, + "p99": 195.6160068511963 }, "isolatedSum": { - "p50": 216.60800278186798, - "p90": 227.87199914455414, - "p95": 229.88799959421158, - "p99": 280.09599447250366 + "p50": 171.39200121164322, + "p90": 205.79200237989426, + "p95": 220.12799978256226, + "p99": 247.42399901151657 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 147.39200472831726, - "p90": 150.68799257278442, - "p95": 151.7760008573532, - "p99": 154.33600544929504 + "p50": 98.68799895048141, + "p90": 122.17599898576736, + "p95": 138.7840062379837, + "p99": 191.9039934873581 }, "combine": { - "p50": 145.1839953660965, - "p90": 149.88799393177032, - "p95": 151.67999267578125, - "p99": 154.7199934720993 + "p50": 81.31200075149536, + "p90": 89.72799777984619, + "p95": 97.08800166845322, + "p99": 106.62399977445602 }, "roundtrip": { - "p50": 262.4000012874603, - "p90": 267.2640085220337, - "p95": 269.27998661994934, - "p99": 357.34400153160095 + "p50": 152.70400047302246, + "p90": 174.9120056629181, + "p95": 184.03199315071106, + "p99": 195.51999866962433 }, "isolatedSum": { - "p50": 292.57600009441376, - "p90": 300.57598650455475, - "p95": 303.45599353313446, - "p99": 309.05599892139435 + "p50": 179.99999970197678, + "p90": 211.90399676561356, + "p95": 235.87200790643692, + "p99": 298.5279932618141 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 204.92799580097198, - "p90": 219.39200162887573, - "p95": 221.76000475883484, - "p99": 226.4000028371811 + "p50": 101.79200023412704, + "p90": 127.96799838542938, + "p95": 147.42399752140045, + "p99": 195.16800343990326 }, "combine": { - "p50": 217.15199947357178, - "p90": 221.3120013475418, - "p95": 224.57599639892578, - "p99": 227.743998169899 + "p50": 89.66399729251862, + "p90": 103.4879982471466, + "p95": 113.02399635314941, + "p99": 128.1599998474121 }, "roundtrip": { - "p50": 392.60798692703247, - "p90": 397.47199416160583, - "p95": 400.09599924087524, - "p99": 421.37598991394043 + "p50": 162.88000345230103, + "p90": 193.53599846363068, + "p95": 214.08000588417053, + "p99": 247.71200120449066 }, "isolatedSum": { - "p50": 422.07999527454376, - "p90": 440.70400297641754, - "p95": 446.3360011577606, - "p99": 454.1440010070801 + "p50": 191.45599752664566, + "p90": 231.455996632576, + "p95": 260.44799387454987, + "p99": 323.32800328731537 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 319.93600726127625, - "p90": 324.8960077762604, - "p95": 327.1679878234863, - "p99": 330.55999875068665 + "p50": 130.49599528312683, + "p90": 143.8719928264618, + "p95": 152.70400047302246, + "p99": 158.9760035276413 }, "combine": { - "p50": 330.01598715782166, - "p90": 335.1680040359497, - "p95": 336.64000034332275, - "p99": 340.2239978313446 + "p50": 114.81600254774094, + "p90": 127.23200023174286, + "p95": 131.071999669075, + "p99": 139.5840048789978 }, "roundtrip": { - "p50": 624.064028263092, - "p90": 629.2480230331421, - "p95": 631.6159963607788, - "p99": 638.2399797439575 + "p50": 212.70400285720825, + "p90": 226.33600234985352, + "p95": 233.69599878787994, + "p99": 247.8400021791458 }, "isolatedSum": { - "p50": 649.9519944190979, - "p90": 660.0640118122101, - "p95": 663.8079881668091, - "p99": 670.7839965820312 + "p50": 245.31199783086777, + "p90": 271.10399305820465, + "p95": 283.7760001420975, + "p99": 298.5600084066391 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 570.9440112113953, - "p90": 584.5119953155518, - "p95": 589.1519784927368, - "p99": 593.9199924468994 - }, - "combine": { - "p50": 564.9920105934143, - "p90": 574.3039846420288, - "p95": 576.7999887466431, - "p99": 583.5199952125549 - }, - "roundtrip": { - "p50": 1105.5680513381958, - "p90": 1120.1599836349487, - "p95": 1124.7680187225342, - "p99": 1134.719967842102 - }, - "isolatedSum": { - "p50": 1135.9360218048096, - "p90": 1158.8159799575806, - "p95": 1165.9519672393799, - "p99": 1177.4399876594543 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1075.8719444274902, - "p90": 1088.703989982605, - "p95": 1093.5360193252563, - "p99": 1102.463960647583 - }, - "combine": { - "p50": 1031.872034072876, - "p90": 1041.3119792938232, - "p95": 1044.4799661636353, - "p99": 1055.359959602356 - }, - "roundtrip": { - "p50": 2082.304000854492, - "p90": 2096.640110015869, - "p95": 2100.895881652832, - "p99": 2108.031988143921 - }, - "isolatedSum": { - "p50": 2107.743978500366, - "p90": 2130.015969276428, - "p95": 2138.0159854888916, - "p99": 2157.823920249939 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35758,34 +37422,35 @@ ] }, { - "id": "cx-107dd39c", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", - "colorKey": "h100_716e65b9", - "comparisonKey": "ea5a5b6f1b74dc9d", + "id": "cx-7a284f4e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_42947950", + "comparisonKey": "2b24bee4ac6d8f67", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:48.643579+00:00", + "generatedAt": "2026-06-27T10:09:52.345460+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", + "runner": "h100-dgxc-slurm_17", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · balanced", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -35794,14 +37459,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -35810,239 +37475,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "2.0.0+af9a040", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254367516", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", - "createdAt": "2026-06-26T17:27:52Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28286083501", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286083501", + "createdAt": "2026-06-27T10:09:52.345460+00:00", + "sha": "76a3032d20288ee17220eb6099346f74d56ce005" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 126.65599584579468, - "p90": 131.74399733543396, - "p95": 132.83200562000275, - "p99": 139.80799913406372 + "p50": 74.36800003051758, + "p90": 106.81600123643875, + "p95": 108.06400328874588, + "p99": 112.89600282907486 }, "combine": { - "p50": 120.4800009727478, - "p90": 122.40000069141388, - "p95": 124.28800016641617, - "p99": 129.12000715732574 + "p50": 74.87999647855759, + "p90": 83.80799740552902, + "p95": 84.22400057315826, + "p99": 88.99199962615967 }, "roundtrip": { - "p50": 221.40799462795258, - "p90": 226.49599611759186, - "p95": 227.77600586414337, - "p99": 232.16000199317932 + "p50": 134.24000144004822, + "p90": 164.0319973230362, + "p95": 166.81599617004395, + "p99": 169.91999745368958 }, "isolatedSum": { - "p50": 247.13599681854248, - "p90": 254.14399802684784, - "p95": 257.1200057864189, - "p99": 268.92800629138947 + "p50": 149.24799650907516, + "p90": 190.62399864196777, + "p95": 192.28800386190414, + "p99": 201.88800245523453 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 174.04800653457642, - "p90": 177.5359958410263, - "p95": 179.29600179195404, - "p99": 190.0160014629364 + "p50": 72.60800153017044, + "p90": 107.84000158309937, + "p95": 108.76800119876862, + "p99": 112.44799941778183 }, "combine": { - "p50": 172.67200350761414, - "p90": 174.52800273895264, - "p95": 175.4239946603775, - "p99": 180.28800189495087 + "p50": 74.91199672222137, + "p90": 84.03199911117554, + "p95": 84.48000252246857, + "p99": 89.24800157546997 }, "roundtrip": { - "p50": 317.05600023269653, - "p90": 321.3759958744049, - "p95": 322.4320113658905, - "p99": 326.04798674583435 + "p50": 134.8160058259964, + "p90": 165.69599509239197, + "p95": 167.42399334907532, + "p99": 170.04799842834473 }, "isolatedSum": { - "p50": 346.72001004219055, - "p90": 352.06399857997894, - "p95": 354.71999645233154, - "p99": 370.30400335788727 + "p50": 147.51999825239182, + "p90": 191.8720006942749, + "p95": 193.24800372123718, + "p99": 201.6960009932518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 5, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 260.70401072502136, - "p90": 264.41600918769836, - "p95": 265.76000452041626, - "p99": 269.6639895439148 + "p50": 72.57600128650665, + "p90": 103.32799702882767, + "p95": 106.1440035700798, + "p99": 112.22399771213531 }, "combine": { - "p50": 255.13601303100586, - "p90": 258.2080066204071, - "p95": 259.5840096473694, - "p99": 263.5520100593567 + "p50": 75.55200159549713, + "p90": 87.90399879217148, + "p95": 88.92799913883209, + "p99": 91.26400202512741 }, "roundtrip": { - "p50": 489.3760085105896, - "p90": 493.696004152298, - "p95": 495.0079917907715, - "p99": 498.9120066165924 + "p50": 134.36800241470337, + "p90": 164.8319959640503, + "p95": 166.75199568271637, + "p99": 172.44799435138702 }, "isolatedSum": { - "p50": 515.8400237560272, - "p90": 522.6240158081055, - "p95": 525.3440141677856, - "p99": 533.2159996032715 + "p50": 148.12800288200378, + "p90": 191.23199582099915, + "p95": 195.0720027089119, + "p99": 203.48799973726273 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 5, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 437.6640021800995, - "p90": 443.7119960784912, - "p95": 445.248007774353, - "p99": 449.50398802757263 + "p50": 75.93599706888199, + "p90": 104.54399883747101, + "p95": 107.51999914646149, + "p99": 112.64000087976456 }, "combine": { - "p50": 422.14399576187134, - "p90": 426.07998847961426, - "p95": 427.90400981903076, - "p99": 431.0399889945984 + "p50": 75.48800110816956, + "p90": 84.28800106048584, + "p95": 88.86399865150452, + "p99": 91.32800251245499 }, "roundtrip": { - "p50": 834.0799808502197, - "p90": 840.3199911117554, - "p95": 842.8159952163696, - "p99": 852.512001991272 + "p50": 134.5279961824417, + "p90": 165.47200083732605, + "p95": 167.35999286174774, + "p99": 170.71999609470367 }, "isolatedSum": { - "p50": 859.8079979419708, - "p90": 869.7919845581055, - "p95": 873.1520175933838, - "p99": 880.543977022171 + "p50": 151.42399817705154, + "p90": 188.83199989795685, + "p95": 196.383997797966, + "p99": 203.96800339221954 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 5, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 802.623987197876, - "p90": 819.7439908981323, - "p95": 822.3680257797241, - "p99": 830.3359746932983 + "p50": 84.51200276613235, + "p90": 102.68799960613251, + "p95": 104.41599786281586, + "p99": 111.13599687814713 }, "combine": { - "p50": 751.9360184669495, - "p90": 759.6160173416138, - "p95": 762.0480060577393, - "p99": 765.5680179595947 + "p50": 75.96799731254578, + "p90": 90.62399715185165, + "p95": 91.58399701118469, + "p99": 92.6079973578453 }, "roundtrip": { - "p50": 1521.9520330429077, - "p90": 1534.208059310913, - "p95": 1541.4400100708008, - "p99": 1552.5120496749878 + "p50": 133.44000279903412, + "p90": 164.67200219631195, + "p95": 167.29600727558136, + "p99": 349.88799691200256 }, "isolatedSum": { - "p50": 1554.5600056648254, - "p90": 1579.360008239746, - "p95": 1584.4160318374634, - "p99": 1595.903992652893 + "p50": 160.48000007867813, + "p90": 193.31199675798416, + "p95": 195.99999487400055, + "p99": 203.74399423599243 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 5, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1529.0240049362183, - "p90": 1539.5519733428955, - "p95": 1543.4880256652832, - "p99": 1549.504041671753 + "p50": 91.87199920415878, + "p90": 107.58399963378906, + "p95": 108.60799998044968, + "p99": 113.15199732780457 }, "combine": { - "p50": 1399.6479511260986, - "p90": 1406.7840576171875, - "p95": 1409.440040588379, - "p99": 1416.767954826355 + "p50": 83.23200047016144, + "p90": 92.0960009098053, + "p95": 92.70399808883667, + "p99": 97.59999811649323 }, "roundtrip": { - "p50": 2903.520107269287, - "p90": 2916.3520336151123, - "p95": 2920.2558994293213, - "p99": 2930.016040802002 + "p50": 143.71199905872345, + "p90": 170.1440066099167, + "p95": 173.66400361061096, + "p99": 177.2480010986328 }, "isolatedSum": { - "p50": 2928.671956062317, - "p90": 2946.336030960083, - "p95": 2952.928066253662, - "p99": 2966.271996498108 + "p50": 175.10399967432022, + "p90": 199.68000054359436, + "p95": 201.31199806928635, + "p99": 210.7519954442978 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 5, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.24000298976898, + "p90": 121.69600278139114, + "p95": 124.95999783277512, + "p99": 132.06399977207184 + }, + "combine": { + "p50": 92.00000017881393, + "p90": 105.34399747848511, + "p95": 106.52799904346466, + "p99": 107.93600231409073 + }, + "roundtrip": { + "p50": 167.67999529838562, + "p90": 185.2799952030182, + "p95": 188.4479969739914, + "p99": 196.0960030555725 + }, + "isolatedSum": { + "p50": 190.24000316858292, + "p90": 227.04000025987625, + "p95": 231.48799687623978, + "p99": 240.00000208616257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.65600198507309, + "p90": 134.3040019273758, + "p95": 136.86400651931763, + "p99": 142.17600226402283 + }, + "combine": { + "p50": 108.70400071144104, + "p90": 121.76000326871872, + "p95": 122.8799968957901, + "p99": 124.35200065374374 + }, + "roundtrip": { + "p50": 202.65600085258484, + "p90": 218.6560034751892, + "p95": 221.3120013475418, + "p99": 225.0880002975464 + }, + "isolatedSum": { + "p50": 227.36000269651413, + "p90": 256.0640051960945, + "p95": 259.7440034151077, + "p99": 266.52800291776657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36050,34 +37789,35 @@ ] }, { - "id": "cx-a1762095", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", - "colorKey": "h100_f7ec28aa", - "comparisonKey": "18d3cab3936a264e", + "id": "cx-9a231e73", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", + "colorKey": "h100_42947950", + "comparisonKey": "fb346b1019e55bb0", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:07.856119+00:00", + "generatedAt": "2026-06-27T00:13:23.336108+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", + "runner": "h100-dgxc-slurm_12", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -36086,14 +37826,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -36102,8 +37842,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "b029c1a6fded400", + "workloadId": "set:3:07d544ac2af401ec", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -36111,230 +37851,119 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254376151", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", - "createdAt": "2026-06-26T17:28:02Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28272369133", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272369133", + "createdAt": "2026-06-27T00:13:23.336108+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 119.03999745845795, - "p90": 125.44000148773193, - "p95": 126.01600587368011, - "p99": 130.68799674510956 + "p50": 98.88000041246414, + "p90": 104.8320010304451, + "p95": 107.96800255775452, + "p99": 118.97599697113037 }, "combine": { - "p50": 111.32799834012985, - "p90": 113.92000317573547, - "p95": 114.33599889278412, - "p99": 119.77600306272507 + "p50": 79.93599772453308, + "p90": 87.5839963555336, + "p95": 87.99999952316284, + "p99": 92.28800237178802 }, "roundtrip": { - "p50": 207.42399990558624, - "p90": 212.351992726326, - "p95": 214.56000208854675, - "p99": 233.3119958639145 + "p50": 154.11199629306793, + "p90": 159.2639982700348, + "p95": 161.43999993801117, + "p99": 167.29600727558136 }, "isolatedSum": { - "p50": 230.3679957985878, - "p90": 239.3600046634674, - "p95": 240.35200476646423, - "p99": 250.46399980783463 + "p50": 178.81599813699722, + "p90": 192.4159973859787, + "p95": 195.96800208091736, + "p99": 211.2639993429184 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 159.42400693893433, - "p90": 165.8879965543747, - "p95": 166.6879951953888, - "p99": 169.69600319862366 + "p50": 103.04000228643417, + "p90": 108.22399705648422, + "p95": 110.43199896812439, + "p99": 116.64000153541565 }, "combine": { - "p50": 156.19200468063354, - "p90": 162.49600052833557, - "p95": 163.26400637626648, - "p99": 168.83200407028198 + "p50": 87.93599903583527, + "p90": 94.94400024414062, + "p95": 96.03200107812881, + "p99": 98.49599748849869 }, "roundtrip": { - "p50": 290.336012840271, - "p90": 296.4160144329071, - "p95": 298.43199253082275, - "p99": 313.4399950504303 + "p50": 162.4639928340912, + "p90": 170.3999936580658, + "p95": 172.31999337673187, + "p99": 178.9119988679886 }, "isolatedSum": { - "p50": 315.61601161956787, - "p90": 328.38399708271027, - "p95": 329.9520015716553, - "p99": 338.52800726890564 + "p50": 190.97600132226944, + "p90": 203.16799730062485, + "p95": 206.4640000462532, + "p99": 215.13599902391434 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 234.78400707244873, - "p90": 240.22400379180908, - "p95": 242.20800399780273, - "p99": 246.2719976902008 - }, - "combine": { - "p50": 244.47999894618988, - "p90": 252.16001272201538, - "p95": 254.8159956932068, - "p99": 262.4959945678711 - }, - "roundtrip": { - "p50": 450.81600546836853, - "p90": 456.83199167251587, - "p95": 458.624005317688, - "p99": 499.1680085659027 - }, - "isolatedSum": { - "p50": 479.2640060186386, - "p90": 492.38401651382446, - "p95": 497.0239996910095, - "p99": 508.7679922580719 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 379.8399865627289, - "p90": 387.58400082588196, - "p95": 389.60000872612, - "p99": 392.9600119590759 - }, - "combine": { - "p50": 402.72000432014465, - "p90": 408.35198760032654, - "p95": 410.5280041694641, - "p99": 414.2400026321411 - }, - "roundtrip": { - "p50": 753.600001335144, - "p90": 759.8080039024353, - "p95": 761.5039944648743, - "p99": 764.959990978241 - }, - "isolatedSum": { - "p50": 782.5599908828735, - "p90": 795.9359884262085, - "p95": 800.1280128955841, - "p99": 807.200014591217 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 663.7120246887207, - "p90": 672.1919775009155, - "p95": 675.9359836578369, - "p99": 683.0080151557922 - }, - "combine": { - "p50": 711.5839719772339, - "p90": 725.5359888076782, - "p95": 729.8880219459534, - "p99": 740.0320172309875 - }, - "roundtrip": { - "p50": 1344.383955001831, - "p90": 1357.5999736785889, - "p95": 1361.0880374908447, - "p99": 1368.6399459838867 - }, - "isolatedSum": { - "p50": 1375.2959966659546, - "p90": 1397.7279663085938, - "p95": 1405.8240056037903, - "p99": 1423.0400323867798 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1251.1359453201294, - "p90": 1264.8320198059082, - "p95": 1269.6640491485596, - "p99": 1279.0080308914185 + "p50": 129.40800189971924, + "p90": 137.7599984407425, + "p95": 139.45600390434265, + "p99": 143.48800480365753 }, "combine": { - "p50": 1326.9120454788208, - "p90": 1337.3440504074097, - "p95": 1343.008041381836, - "p99": 1352.5439500808716 + "p50": 114.88000303506851, + "p90": 119.87199634313583, + "p95": 120.4800009727478, + "p99": 123.48800152540207 }, "roundtrip": { - "p50": 2547.0080375671387, - "p90": 2561.2800121307373, - "p95": 2564.863920211792, - "p99": 2581.696033477783 + "p50": 213.0880057811737, + "p90": 217.3759937286377, + "p95": 219.10400688648224, + "p99": 223.23200106620789 }, "isolatedSum": { - "p50": 2578.04799079895, - "p90": 2602.176070213318, - "p95": 2612.6720905303955, - "p99": 2631.55198097229 + "p50": 244.28800493478775, + "p90": 257.6319947838783, + "p95": 259.93600487709045, + "p99": 266.9760063290596 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36342,50 +37971,51 @@ ] }, { - "id": "cx-6339c695", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", - "colorKey": "h100_93503624", - "comparisonKey": "99696dfafd6d026a", + "id": "cx-535aa40c", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "h100_42947950", + "comparisonKey": "f31dd87deba90285", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:46:27.794881+00:00", + "generatedAt": "2026-06-27T00:53:48.998127+00:00", "status": "valid", "publicationStatus": "official", "runner": "h100-dgxc-slurm_03", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -36394,239 +38024,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255296001", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", - "createdAt": "2026-06-26T17:45:26Z", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28273506790", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273506790", + "createdAt": "2026-06-27T00:53:48.998127+00:00", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 110.72000116109848, - "p90": 114.78400230407715, - "p95": 116.57600104808807, - "p99": 121.0239976644516 - }, - "combine": { - "p50": 105.8880016207695, - "p90": 111.35999858379364, - "p95": 112.0000034570694, - "p99": 114.56000059843063 - }, - "roundtrip": { - "p50": 195.99999487400055, - "p90": 200.00000298023224, - "p95": 201.24800503253937, - "p99": 205.59999346733093 - }, - "isolatedSum": { - "p50": 216.60800278186798, - "p90": 226.1440008878708, - "p95": 228.57600450515747, - "p99": 235.58399826288223 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 144.31999623775482, - "p90": 148.0640023946762, - "p95": 149.24800395965576, - "p99": 152.0960032939911 + "p50": 94.4959968328476, + "p90": 100.5759984254837, + "p95": 102.81600058078766, + "p99": 107.42399841547012 }, "combine": { - "p50": 146.62399888038635, - "p90": 151.10400319099426, - "p95": 152.51199901103973, - "p99": 155.32800555229187 + "p50": 76.92799717187881, + "p90": 80.89599758386612, + "p95": 81.37600123882294, + "p99": 85.91999858617783 }, "roundtrip": { - "p50": 260.8959972858429, - "p90": 265.3760015964508, - "p95": 266.400009393692, - "p99": 270.7520127296448 + "p50": 150.65599977970123, + "p90": 155.35999834537506, + "p95": 157.02399611473083, + "p99": 163.5199934244156 }, "isolatedSum": { - "p50": 290.9439951181412, - "p90": 299.16800558567047, - "p95": 301.7600029706955, - "p99": 307.42400884628296 + "p50": 171.4239940047264, + "p90": 181.47199600934982, + "p95": 184.1920018196106, + "p99": 193.34399700164795 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 205.56800067424774, - "p90": 210.36800742149353, - "p95": 212.09600567817688, - "p99": 214.6880030632019 + "p50": 90.97599983215332, + "p90": 98.52799773216248, + "p95": 101.02400183677673, + "p99": 107.68000036478043 }, "combine": { - "p50": 214.78399634361267, - "p90": 219.13599967956543, - "p95": 220.70400416851044, - "p99": 225.2800017595291 + "p50": 77.11999863386154, + "p90": 81.216000020504, + "p95": 82.71999657154083, + "p99": 87.55200356245041 }, "roundtrip": { - "p50": 394.8799967765808, - "p90": 400.2879858016968, - "p95": 401.88801288604736, - "p99": 407.9680144786835 + "p50": 149.47199821472168, + "p90": 154.91199493408203, + "p95": 157.151997089386, + "p99": 163.80800306797028 }, "isolatedSum": { - "p50": 420.3519970178604, - "p90": 429.50400710105896, - "p95": 432.8000098466873, - "p99": 439.968004822731 + "p50": 168.09599846601486, + "p90": 179.74399775266647, + "p95": 183.74399840831757, + "p99": 195.23200392723083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 6, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 326.9760012626648, - "p90": 332.35201239585876, - "p95": 334.46401357650757, - "p99": 337.98399567604065 + "p50": 93.12000125646591, + "p90": 99.64799880981445, + "p95": 102.27199643850327, + "p99": 109.43999886512756 }, "combine": { - "p50": 338.75200152397156, - "p90": 346.0479974746704, - "p95": 347.4240005016327, - "p99": 379.5199990272522 + "p50": 79.3600007891655, + "p90": 83.0719992518425, + "p95": 84.22400057315826, + "p99": 88.54400366544724 }, "roundtrip": { - "p50": 642.8160071372986, - "p90": 650.6879925727844, - "p95": 652.895987033844, - "p99": 658.7520241737366 + "p50": 151.96800231933594, + "p90": 158.9439958333969, + "p95": 160.25599837303162, + "p99": 163.07200491428375 }, "isolatedSum": { - "p50": 665.7280027866364, - "p90": 678.4000098705292, - "p95": 681.8880140781403, - "p99": 717.5039947032928 + "p50": 172.4800020456314, + "p90": 182.71999806165695, + "p95": 186.49599701166153, + "p99": 197.9840025305748 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 584.0640068054199, - "p90": 592.6719903945923, - "p95": 595.5520272254944, - "p99": 601.2160181999207 + "p50": 94.27200257778168, + "p90": 100.80000013113022, + "p95": 102.62399911880493, + "p99": 107.80800133943558 }, "combine": { - "p50": 568.8639879226685, - "p90": 576.9280195236206, - "p95": 579.3920159339905, - "p99": 584.5119953155518 + "p50": 78.68800312280655, + "p90": 83.13599973917007, + "p95": 84.25600081682205, + "p99": 86.65599673986435 }, "roundtrip": { - "p50": 1122.3679780960083, - "p90": 1133.8560581207275, - "p95": 1138.6239528656006, - "p99": 1146.783947944641 + "p50": 151.39199793338776, + "p90": 157.79200196266174, + "p95": 160.25599837303162, + "p99": 164.95999693870544 }, "isolatedSum": { - "p50": 1152.9279947280884, - "p90": 1169.600009918213, - "p95": 1174.9440431594849, - "p99": 1185.7280135154724 + "p50": 172.96000570058823, + "p90": 183.9359998703003, + "p95": 186.87999993562698, + "p99": 194.46399807929993 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 6, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1107.200026512146, - "p90": 1119.0400123596191, - "p95": 1124.384045600891, - "p99": 1133.344054222107 + "p50": 96.16000205278397, + "p90": 100.92800110578537, + "p95": 103.71199995279312, + "p99": 108.06400328874588 }, "combine": { - "p50": 1020.6719636917114, - "p90": 1029.1839838027954, - "p95": 1032.1919918060303, - "p99": 1037.8559827804565 + "p50": 81.85599744319916, + "p90": 87.26400136947632, + "p95": 88.8959988951683, + "p99": 90.04800021648407 }, "roundtrip": { - "p50": 2098.4959602355957, - "p90": 2110.1760864257812, - "p95": 2113.856077194214, - "p99": 2120.60809135437 + "p50": 153.6639928817749, + "p90": 160.35200655460358, + "p95": 161.95200383663177, + "p99": 165.3439998626709 }, "isolatedSum": { - "p50": 2127.8719902038574, - "p90": 2148.2239961624146, - "p95": 2156.5760374069214, - "p99": 2171.2000370025635 + "p50": 178.01599949598312, + "p90": 188.1920024752617, + "p95": 192.60799884796143, + "p99": 198.11200350522995 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 6, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36634,28 +38227,29 @@ ] }, { - "id": "cx-96b1ca55", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_5df912ff", - "comparisonKey": "9fdbd6763ea7346a", + "id": "cx-5a3d925c", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "da8c4fcc63f5bf6e", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:28:17.076570+00:00", + "generatedAt": "2026-06-27T00:05:07.028525+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", + "runner": "h100-dgxc-slurm_18", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) [cl]", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -36666,18 +38260,18 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "small-amplitude", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -36686,239 +38280,165 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254332840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", - "createdAt": "2026-06-26T17:27:12Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28272117855", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272117855", + "createdAt": "2026-06-27T00:05:07.028525+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 101.31199657917023, - "p90": 105.69600015878677, - "p95": 107.55199939012527, - "p99": 110.84800213575363 - }, - "combine": { - "p50": 105.82400113344193, - "p90": 107.42399841547012, - "p95": 108.60799998044968, - "p99": 112.64000087976456 - }, - "roundtrip": { - "p50": 183.1360012292862, - "p90": 188.03200125694275, - "p95": 188.960000872612, - "p99": 195.13599574565887 - }, - "isolatedSum": { - "p50": 207.13599771261215, - "p90": 213.1199985742569, - "p95": 216.15999937057495, - "p99": 223.4880030155182 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 132.89600610733032, - "p90": 137.08800077438354, - "p95": 138.2399946451187, - "p99": 140.70400595664978 - }, - "combine": { - "p50": 144.96000111103058, - "p90": 147.5840061903, - "p95": 148.28799664974213, - "p99": 152.63999998569489 - }, - "roundtrip": { - "p50": 249.56800043582916, - "p90": 253.53598594665527, - "p95": 254.59200143814087, - "p99": 256.73601031303406 - }, - "isolatedSum": { - "p50": 277.8560072183609, - "p90": 284.67200696468353, - "p95": 286.52799129486084, - "p99": 293.34400594234467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 193.4400051832199, - "p90": 202.68799364566803, - "p95": 203.87199521064758, - "p99": 209.9519968032837 + "p50": 97.79199957847595, + "p90": 105.02400249242783, + "p95": 107.29599744081497, + "p99": 115.90400338172913 }, "combine": { - "p50": 216.8319970369339, - "p90": 220.92799842357635, - "p95": 223.55200350284576, - "p99": 226.04799270629883 + "p50": 79.77599650621414, + "p90": 82.11199939250946, + "p95": 86.91199868917465, + "p99": 88.79999816417694 }, "roundtrip": { - "p50": 382.4959993362427, - "p90": 387.7759873867035, - "p95": 388.7679874897003, - "p99": 392.767995595932 + "p50": 152.44799852371216, + "p90": 158.59200060367584, + "p95": 160.44799983501434, + "p99": 165.40800034999847 }, "isolatedSum": { - "p50": 410.2720022201538, - "p90": 423.6159920692444, - "p95": 427.42399871349335, - "p99": 435.9999895095825 + "p50": 177.5679960846901, + "p90": 187.1360018849373, + "p95": 194.20799612998962, + "p99": 204.70400154590607 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 0, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 315.0720000267029, - "p90": 320.1279938220978, - "p95": 322.04800844192505, - "p99": 324.5759904384613 + "p50": 98.01600128412247, + "p90": 103.87200117111206, + "p95": 106.01600259542465, + "p99": 113.11999708414078 }, "combine": { - "p50": 329.27998900413513, - "p90": 333.3759903907776, - "p95": 335.61599254608154, - "p99": 338.9120101928711 + "p50": 81.02399855852127, + "p90": 87.71199733018875, + "p95": 87.96799927949905, + "p99": 89.50400352478027 }, "roundtrip": { - "p50": 619.0720200538635, - "p90": 625.2480149269104, - "p95": 627.839982509613, - "p99": 630.7839751243591 + "p50": 155.16799688339233, + "p90": 160.38399934768677, + "p95": 162.23999857902527, + "p99": 166.87999665737152 }, "isolatedSum": { - "p50": 644.351989030838, - "p90": 653.5039842128754, - "p95": 657.6640009880066, - "p99": 663.4880006313324 + "p50": 179.03999984264374, + "p90": 191.5839985013008, + "p95": 193.9840018749237, + "p99": 202.62400060892105 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 3, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 560.8959794044495, - "p90": 569.8879957199097, - "p95": 572.1920132637024, - "p99": 577.2799849510193 + "p50": 101.98400169610977, + "p90": 106.6880002617836, + "p95": 109.95200276374817, + "p99": 120.35199999809265 }, "combine": { - "p50": 563.3599758148193, - "p90": 573.248028755188, - "p95": 576.3840079307556, - "p99": 580.672025680542 + "p50": 88.22400122880936, + "p90": 95.0080007314682, + "p95": 95.93600034713745, + "p99": 96.83199971914291 }, "roundtrip": { - "p50": 1093.727946281433, - "p90": 1102.6240587234497, - "p95": 1105.5999994277954, - "p99": 1112.0959520339966 + "p50": 162.75200247764587, + "p90": 169.63200271129608, + "p95": 171.58399522304535, + "p99": 176.28799378871918 }, "isolatedSum": { - "p50": 1124.2559552192688, - "p90": 1143.1360244750977, - "p95": 1148.576021194458, - "p99": 1157.9520106315613 + "p50": 190.20800292491913, + "p90": 201.6960009932518, + "p95": 205.88800311088562, + "p99": 217.18399971723557 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 2, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1059.0720176696777, - "p90": 1071.7439651489258, - "p95": 1074.8480558395386, - "p99": 1091.2959575653076 + "p50": 129.66400384902954, + "p90": 137.08800077438354, + "p95": 139.0399932861328, + "p99": 142.752006649971 }, "combine": { - "p50": 1026.8800258636475, - "p90": 1036.2880229949951, - "p95": 1038.7840270996094, - "p99": 1047.4879741668701 + "p50": 115.00799655914307, + "p90": 120.7680031657219, + "p95": 121.31199985742569, + "p99": 127.83999741077423 }, "roundtrip": { - "p50": 2055.1679134368896, - "p90": 2067.13604927063, - "p95": 2069.823980331421, - "p99": 2075.5200386047363 + "p50": 212.89600431919098, + "p90": 218.72000396251678, + "p95": 219.9680060148239, + "p99": 224.06400740146637 }, "isolatedSum": { - "p50": 2085.952043533325, - "p90": 2108.031988143921, - "p95": 2113.632082939148, - "p99": 2138.7839317321777 + "p50": 244.6720004081726, + "p90": 257.85600394010544, + "p95": 260.3519931435585, + "p99": 270.59200406074524 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 3, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36926,28 +38446,29 @@ ] }, { - "id": "cx-1ed69eb7", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_17694d2c", - "comparisonKey": "379c3371e525c0fb", + "id": "cx-49497b06", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "5ec10556693a8c2b", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:34.870060+00:00", + "generatedAt": "2026-06-27T00:05:08.113815+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", + "runner": "h100-dgxc-slurm_05", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16 [cl]", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -36958,7 +38479,7 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "wide-dynamic-range", "combineQuantMode": "none" }, "resourceProfile": { @@ -36978,8 +38499,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -36987,230 +38508,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271555838", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271555838", - "createdAt": "2026-06-26T23:46:24Z", + "id": "28272121618", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272121618", + "createdAt": "2026-06-27T00:05:08.113815+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 105.27999699115753, - "p90": 110.36799848079681, - "p95": 112.12799698114395, - "p99": 115.23199826478958 - }, - "combine": { - "p50": 106.175996363163, - "p90": 108.0000028014183, - "p95": 111.1999973654747, - "p99": 113.72800171375275 - }, - "roundtrip": { - "p50": 183.3599954843521, - "p90": 188.48000466823578, - "p95": 190.17599523067474, - "p99": 193.56800615787506 - }, - "isolatedSum": { - "p50": 211.45599335432053, - "p90": 218.36800128221512, - "p95": 223.32799434661865, - "p99": 228.95999997854233 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 134.5919966697693, - "p90": 140.06400108337402, - "p95": 142.46399700641632, - "p99": 146.88000082969666 + "p50": 98.24000298976898, + "p90": 105.69600015878677, + "p95": 108.12799632549286, + "p99": 113.37599903345108 }, "combine": { - "p50": 152.12799608707428, - "p90": 158.36800634860992, - "p95": 161.0880047082901, - "p99": 162.81600296497345 + "p50": 79.68000322580338, + "p90": 82.07999914884567, + "p95": 82.97599852085114, + "p99": 87.61599659919739 }, "roundtrip": { - "p50": 254.46400046348572, - "p90": 259.93600487709045, - "p95": 262.4639868736267, - "p99": 268.2560086250305 + "p50": 146.464005112648, + "p90": 152.8320014476776, + "p95": 154.59200739860535, + "p99": 158.84800255298615 }, "isolatedSum": { - "p50": 286.71999275684357, - "p90": 298.43200743198395, - "p95": 303.5520017147064, - "p99": 309.6960037946701 + "p50": 177.92000621557236, + "p90": 187.77599930763245, + "p95": 191.103994846344, + "p99": 200.99199563264847 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 188.960000872612, - "p90": 194.97600197792053, - "p95": 198.11199605464935, - "p99": 202.5279998779297 + "p50": 100.0640019774437, + "p90": 107.32799768447876, + "p95": 110.27199774980545, + "p99": 160.92799603939056 }, "combine": { - "p50": 228.67199778556824, - "p90": 236.09599471092224, - "p95": 237.05600202083588, - "p99": 241.08800292015076 + "p50": 81.34400099515915, + "p90": 87.16800063848495, + "p95": 87.87199854850769, + "p99": 90.27200192213058 }, "roundtrip": { - "p50": 391.90399646759033, - "p90": 399.80798959732056, - "p95": 402.3999869823456, - "p99": 424.0959882736206 + "p50": 152.92799472808838, + "p90": 160.51200032234192, + "p95": 162.30399906635284, + "p99": 166.24000668525696 }, "isolatedSum": { - "p50": 417.63199865818024, - "p90": 431.0719966888428, - "p95": 435.16799807548523, - "p99": 443.61600279808044 + "p50": 181.40800297260284, + "p90": 194.49599832296371, + "p95": 198.14399629831314, + "p99": 251.19999796152115 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 294.0160036087036, - "p90": 311.3279938697815, - "p95": 315.20000100135803, - "p99": 326.07999444007874 + "p50": 103.39199751615524, + "p90": 108.2879975438118, + "p95": 110.6560006737709, + "p99": 119.03999745845795 }, "combine": { - "p50": 366.1760091781616, - "p90": 382.9120099544525, - "p95": 391.32800698280334, - "p99": 407.039999961853 + "p50": 89.75999802350998, + "p90": 95.20000219345093, + "p95": 95.93600034713745, + "p99": 98.68799895048141 }, "roundtrip": { - "p50": 632.9600214958191, - "p90": 674.3680238723755, - "p95": 687.3279809951782, - "p99": 835.3919982910156 + "p50": 161.6320013999939, + "p90": 169.08800601959229, + "p95": 170.68800330162048, + "p99": 175.64800381660461 }, "isolatedSum": { - "p50": 660.1920127868652, - "p90": 694.240003824234, - "p95": 706.5280079841614, - "p99": 733.1199944019318 + "p50": 193.15199553966522, + "p90": 203.48799973726273, + "p95": 206.59200102090836, + "p99": 217.72799640893936 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 509.15199518203735, - "p90": 521.8560099601746, - "p95": 526.1120200157166, - "p99": 533.0560207366943 + "p50": 130.46400249004364, + "p90": 136.9280070066452, + "p95": 139.23199474811554, + "p99": 143.5839980840683 }, "combine": { - "p50": 635.2319717407227, - "p90": 645.5680131912231, - "p95": 649.4719982147217, - "p99": 656.3839912414551 + "p50": 114.78400230407715, + "p90": 120.83200365304947, + "p95": 122.11199849843979, + "p99": 122.8799968957901 }, "roundtrip": { - "p50": 1114.9760484695435, - "p90": 1128.0319690704346, - "p95": 1131.9680213928223, - "p99": 1147.711992263794 + "p50": 211.71200275421143, + "p90": 219.35999393463135, + "p95": 221.91999852657318, + "p99": 235.00800132751465 }, "isolatedSum": { - "p50": 1144.38396692276, - "p90": 1167.4240231513977, - "p95": 1175.5840182304382, - "p99": 1189.4400119781494 + "p50": 245.2480047941208, + "p90": 257.7600106596947, + "p95": 261.3439932465553, + "p99": 266.4639949798584 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 970.848023891449, - "p90": 995.136022567749, - "p95": 1001.7919540405273, - "p99": 1016.1279439926147 - }, - "combine": { - "p50": 1156.3199758529663, - "p90": 1167.1040058135986, - "p95": 1172.287940979004, - "p99": 1184.928059577942 - }, - "roundtrip": { - "p50": 2089.279890060425, - "p90": 2105.664014816284, - "p95": 2110.431909561157, - "p99": 2118.0479526519775 - }, - "isolatedSum": { - "p50": 2127.1679997444153, - "p90": 2162.2400283813477, - "p95": 2174.0798950195312, - "p99": 2201.0560035705566 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -37218,39 +38665,40 @@ ] }, { - "id": "cx-8fb1cb65", - "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h100_97196257", - "comparisonKey": "d361c128552b2ee8", + "id": "cx-3b04d344", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h100_42947950", + "comparisonKey": "8bd0272e65400ebd", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:51.842450+00:00", + "generatedAt": "2026-06-27T00:05:11.747577+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", + "runner": "h100-dgxc-slurm_00", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", + "dispatchDtype": "bf16", + "activationProfile": "zeros", "combineQuantMode": "none" }, "resourceProfile": { @@ -37270,8 +38718,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -37279,230 +38727,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271695735", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271695735", - "createdAt": "2026-06-26T23:50:45Z", + "id": "28272113941", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272113941", + "createdAt": "2026-06-27T00:05:11.747577+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 196.28800451755524, - "p90": 202.33599841594696, - "p95": 203.96800339221954, - "p99": 210.07999777793884 - }, - "combine": { - "p50": 85.11999994516373, - "p90": 87.5839963555336, - "p95": 89.72799777984619, - "p99": 93.24800223112106 - }, - "roundtrip": { - "p50": 266.7520046234131, - "p90": 273.824006319046, - "p95": 277.5680124759674, - "p99": 291.83998703956604 - }, - "isolatedSum": { - "p50": 281.40800446271896, - "p90": 289.91999477148056, - "p95": 293.69600117206573, - "p99": 303.3280000090599 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 248.3839988708496, - "p90": 296.7680096626282, - "p95": 299.80799555778503, - "p99": 321.28000259399414 - }, - "combine": { - "p50": 118.81600320339203, - "p90": 125.15200674533844, - "p95": 126.17599964141846, - "p99": 128.06400656700134 - }, - "roundtrip": { - "p50": 353.85599732398987, - "p90": 407.9360067844391, - "p95": 410.3040099143982, - "p99": 414.40001130104065 - }, - "isolatedSum": { - "p50": 367.20000207424164, - "p90": 421.9200164079666, - "p95": 425.9839951992035, - "p99": 449.3440091609955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55552000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 341.43999218940735, - "p90": 394.0480053424835, - "p95": 396.5440094470978, - "p99": 400.41598677635193 + "p50": 97.15200215578079, + "p90": 103.67999970912933, + "p95": 105.85600137710571, + "p99": 108.99200290441513 }, "combine": { - "p50": 185.12000143527985, - "p90": 191.3280040025711, - "p95": 193.05600225925446, - "p99": 195.74399292469025 + "p50": 79.64800298213959, + "p90": 82.33600109815598, + "p95": 86.84799820184708, + "p99": 87.96799927949905 }, "roundtrip": { - "p50": 510.0160241127014, - "p90": 567.3919916152954, - "p95": 570.8479881286621, - "p99": 574.176013469696 + "p50": 151.8400013446808, + "p90": 158.01599621772766, + "p95": 160.76800227165222, + "p99": 165.3120070695877 }, "isolatedSum": { - "p50": 526.5599936246872, - "p90": 585.3760093450546, - "p95": 589.6000117063522, - "p99": 596.1599797010422 + "p50": 176.80000513792038, + "p90": 186.0160008072853, + "p95": 192.7039995789528, + "p99": 196.96000218391418 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 111549440, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 5, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 517.9839730262756, - "p90": 568.6720013618469, - "p95": 574.0159749984741, - "p99": 579.8079967498779 + "p50": 97.28000313043594, + "p90": 103.71199995279312, + "p95": 106.4319983124733, + "p99": 121.63200229406357 }, "combine": { - "p50": 291.26399755477905, - "p90": 295.80798745155334, - "p95": 297.08799719810486, - "p99": 299.96800422668457 + "p50": 79.93599772453308, + "p90": 87.39200234413147, + "p95": 87.93599903583527, + "p99": 90.04800021648407 }, "roundtrip": { - "p50": 794.2079901695251, - "p90": 801.3120293617249, - "p95": 804.095983505249, - "p99": 814.4959807395935 + "p50": 153.72799336910248, + "p90": 159.55199301242828, + "p95": 160.7999950647354, + "p99": 165.6000018119812 }, "isolatedSum": { - "p50": 809.2479705810547, - "p90": 864.4799888134003, - "p95": 871.103972196579, - "p99": 879.7760009765625 + "p50": 177.21600085496902, + "p90": 191.1040022969246, + "p95": 194.36799734830856, + "p99": 211.68000251054764 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 223365120, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 962.5599980354309, - "p90": 971.2640047073364, - "p95": 973.0560183525085, - "p99": 978.3999919891357 + "p50": 104.00000214576721, + "p90": 108.44799876213074, + "p95": 111.68000102043152, + "p99": 126.75200402736664 }, "combine": { - "p50": 513.1199955940247, - "p90": 523.5520005226135, - "p95": 526.0800123214722, - "p99": 531.9039821624756 + "p50": 87.99999952316284, + "p90": 93.44000369310379, + "p95": 95.87199985980988, + "p99": 97.59999811649323 }, "roundtrip": { - "p50": 1460.576057434082, - "p90": 1472.4160432815552, - "p95": 1476.6080379486084, - "p99": 1773.3759880065918 + "p50": 161.8880033493042, + "p90": 168.64000260829926, + "p95": 170.0800061225891, + "p99": 175.99999904632568 }, "isolatedSum": { - "p50": 1475.6799936294556, - "p90": 1494.81600522995, - "p95": 1499.1360306739807, - "p99": 1510.3039741516113 + "p50": 192.00000166893005, + "p90": 201.88800245523453, + "p95": 207.5520008802414, + "p99": 224.35200214385986 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 446817280, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 5, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1818.5919523239136, - "p90": 1826.9439935684204, - "p95": 1829.7280073165894, - "p99": 1833.8559865951538 + "p50": 130.0159990787506, + "p90": 137.7280056476593, + "p95": 138.7840062379837, + "p99": 142.2719955444336 }, "combine": { - "p50": 930.3359985351562, - "p90": 939.7119879722595, - "p95": 942.8160190582275, - "p99": 948.0640292167664 + "p50": 115.167997777462, + "p90": 120.54400146007538, + "p95": 120.95999717712402, + "p99": 123.87199699878693 }, "roundtrip": { - "p50": 2736.9279861450195, - "p90": 2750.3039836883545, - "p95": 2755.199909210205, - "p99": 2763.64803314209 + "p50": 212.47999370098114, + "p90": 216.63999557495117, + "p95": 218.1439995765686, + "p99": 221.47199511528015 }, "isolatedSum": { - "p50": 2748.92795085907, - "p90": 2766.65598154068, - "p95": 2772.544026374817, - "p99": 2781.92001581192 + "p50": 245.18399685621262, + "p90": 258.2720071077347, + "p95": 259.7440034151077, + "p99": 266.1439925432205 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 893132800, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 5, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -37510,18 +38884,18 @@ ] }, { - "id": "cx-db3c52ad", - "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_97196257", - "comparisonKey": "d4fd66af6f4726f6", + "id": "cx-d0428a76", + "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ff7906f8", + "comparisonKey": "e3488cf5058170e6", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:17.424978+00:00", + "generatedAt": "2026-06-26T23:47:28.813270+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", + "runner": "h100-dgxc-slurm_07", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -37531,9 +38905,10 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -37541,7 +38916,7 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -37562,8 +38937,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -37571,229 +38946,303 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271710412", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271710412", - "createdAt": "2026-06-26T23:51:13Z", + "id": "28271559607", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271559607", + "createdAt": "2026-06-26T23:47:28.813270+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 208.8640034198761, - "p90": 215.68000316619873, - "p95": 218.75199675559998, - "p99": 469.56801414489746 + "p50": 96.79999947547913, + "p90": 103.39199751615524, + "p95": 104.80000078678131, + "p99": 109.43999886512756 }, "combine": { - "p50": 90.33600240945816, - "p90": 93.21600198745728, - "p95": 95.551997423172, - "p99": 98.1760025024414 + "p50": 79.13599908351898, + "p90": 81.40800148248672, + "p95": 86.68799698352814, + "p99": 87.90399879217148 }, "roundtrip": { - "p50": 286.72000765800476, - "p90": 290.75199365615845, - "p95": 293.0240035057068, - "p99": 295.52000761032104 + "p50": 152.12799608707428, + "p90": 159.96800363063812, + "p95": 162.36799955368042, + "p99": 177.69600450992584 }, "isolatedSum": { - "p50": 299.20000582933426, - "p90": 308.896005153656, - "p95": 314.303994178772, - "p99": 567.7440166473389 + "p50": 175.9359985589981, + "p90": 184.79999899864197, + "p95": 191.48799777030945, + "p99": 197.34399765729904 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 267.2320008277893, - "p90": 273.4079957008362, - "p95": 275.64799785614014, - "p99": 286.8480086326599 + "p50": 71.23199850320816, + "p90": 101.98400169610977, + "p95": 103.84000092744827, + "p99": 108.35199803113937 }, "combine": { - "p50": 127.23200023174286, - "p90": 130.40000200271606, - "p95": 131.52000308036804, - "p99": 134.0479999780655 + "p50": 72.54400104284286, + "p90": 81.40800148248672, + "p95": 82.62400329113007, + "p99": 87.77599781751633 }, "roundtrip": { - "p50": 387.3920142650604, - "p90": 392.2879993915558, - "p95": 394.9440121650696, - "p99": 403.328001499176 + "p50": 129.08799946308136, + "p90": 158.2079976797104, + "p95": 159.58400070667267, + "p99": 165.02399742603302 }, "isolatedSum": { - "p50": 394.46400105953217, - "p90": 403.80799770355225, - "p95": 407.1680009365082, - "p99": 420.8960086107254 + "p50": 143.77599954605103, + "p90": 183.3920031785965, + "p95": 186.46400421857834, + "p99": 196.1279958486557 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 6, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 377.6960074901581, - "p90": 423.93600940704346, - "p95": 426.4320135116577, - "p99": 432.6080083847046 + "p50": 70.52800059318542, + "p90": 99.84000027179718, + "p95": 105.72800040245056, + "p99": 115.07199704647064 }, "combine": { - "p50": 203.42400670051575, - "p90": 213.95200490951538, - "p95": 215.03999829292297, - "p99": 218.6879962682724 + "p50": 72.9919970035553, + "p90": 80.99199831485748, + "p95": 86.94399893283844, + "p99": 103.55199873447418 }, "roundtrip": { - "p50": 564.4479990005493, - "p90": 604.8960089683533, - "p95": 608.1920266151428, - "p99": 615.1360273361206 + "p50": 129.43999469280243, + "p90": 156.19200468063354, + "p95": 159.07199680805206, + "p99": 162.56000101566315 }, "isolatedSum": { - "p50": 581.1200141906738, - "p90": 637.8880143165588, - "p95": 641.4720118045807, - "p99": 651.296004652977 + "p50": 143.51999759674072, + "p90": 180.83199858665466, + "p95": 192.671999335289, + "p99": 218.62399578094482 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 593.504011631012, - "p90": 597.9200005531311, - "p95": 599.295973777771, - "p99": 620.9279894828796 + "p50": 96.54399752616882, + "p90": 101.3759970664978, + "p95": 103.61599922180176, + "p99": 111.26399785280228 }, "combine": { - "p50": 322.59199023246765, - "p90": 326.78401470184326, - "p95": 328.7999927997589, - "p99": 331.36001229286194 + "p50": 79.52000200748444, + "p90": 87.13600039482117, + "p95": 87.64799684286118, + "p99": 88.73599767684937 }, "roundtrip": { - "p50": 899.1680145263672, - "p90": 904.416024684906, - "p95": 906.6240191459656, - "p99": 913.0560159683228 + "p50": 152.16000378131866, + "p90": 159.39199924468994, + "p95": 161.15200519561768, + "p99": 170.52799463272095 }, "isolatedSum": { - "p50": 916.0960018634796, - "p90": 924.7040152549744, - "p95": 928.0959665775299, - "p99": 952.2880017757416 + "p50": 176.06399953365326, + "p90": 188.51199746131897, + "p95": 191.26399606466293, + "p99": 199.99999552965164 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.3520035147667, + "p90": 101.75999999046326, + "p95": 104.89600151777267, + "p99": 110.11199653148651 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 88.03199976682663, + "p95": 89.21600133180618, + "p99": 95.23200243711472 + }, + "roundtrip": { + "p50": 153.05599570274353, + "p90": 160.288006067276, + "p95": 162.432000041008, + "p99": 171.2000072002411 + }, + "isolatedSum": { + "p50": 180.83200603723526, + "p90": 189.7919997572899, + "p95": 194.11200284957886, + "p99": 205.34399896860123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1106.943964958191, - "p90": 1117.9519891738892, - "p95": 1120.7040548324585, - "p99": 1126.2719631195068 + "p50": 89.9519994854927, + "p90": 104.35199737548828, + "p95": 106.65600001811981, + "p99": 117.85600334405899 }, "combine": { - "p50": 574.4640231132507, - "p90": 583.0720067024231, - "p95": 584.991991519928, - "p99": 590.719997882843 + "p50": 81.216000020504, + "p90": 92.19200164079666, + "p95": 95.39200365543365, + "p99": 96.0640013217926 }, "roundtrip": { - "p50": 1684.0640306472778, - "p90": 1699.5840072631836, - "p95": 1705.1520347595215, - "p99": 1751.9680261611938 + "p50": 141.05600118637085, + "p90": 168.2880073785782, + "p95": 169.5680022239685, + "p99": 174.40000176429749 }, "isolatedSum": { - "p50": 1681.4079880714417, - "p90": 1701.0239958763123, - "p95": 1705.6960463523865, - "p99": 1716.9919610023499 + "p50": 171.1679995059967, + "p90": 196.54399901628494, + "p95": 202.04800367355347, + "p99": 213.9200046658516 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 111.51999980211258, + "p90": 119.00799721479416, + "p95": 121.44000083208084, + "p99": 126.56000256538391 + }, + "combine": { + "p50": 95.0080007314682, + "p90": 103.04000228643417, + "p95": 103.35999727249146, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 164.63999450206757, + "p90": 182.3039948940277, + "p95": 185.12000143527985, + "p99": 188.7039989233017 + }, + "isolatedSum": { + "p50": 206.52800053358078, + "p90": 222.04799950122833, + "p95": 224.7999981045723, + "p99": 231.48800432682037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 2105.151891708374, - "p90": 2114.687919616699, - "p95": 2120.1279163360596, - "p99": 2217.0560359954834 + "p50": 116.99199676513672, + "p90": 133.2480013370514, + "p95": 135.51999628543854, + "p99": 140.6400054693222 }, "combine": { - "p50": 1052.9279708862305, - "p90": 1061.0560178756714, - "p95": 1063.264012336731, - "p99": 1068.0320262908936 + "p50": 106.88000172376633, + "p90": 119.55200135707855, + "p95": 120.2239990234375, + "p99": 127.55200266838074 }, "roundtrip": { - "p50": 3201.6959190368652, - "p90": 3233.1199645996094, - "p95": 3240.8320903778076, - "p99": 3259.615898132324 + "p50": 199.3280053138733, + "p90": 215.45599400997162, + "p95": 217.56799519062042, + "p99": 258.91199707984924 }, "isolatedSum": { - "p50": 3158.0798625946045, - "p90": 3175.7439374923706, - "p95": 3183.3919286727905, - "p99": 3285.088062286377 + "p50": 223.87199848890305, + "p90": 252.80000269412994, + "p95": 255.74399530887604, + "p99": 268.19200813770294 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -37802,18 +39251,18 @@ ] }, { - "id": "cx-bf310e7a", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_a96c99f3", - "comparisonKey": "0fd91e8522237192", + "id": "cx-79a82113", + "identity": "h100|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_42947950", + "comparisonKey": "d4720c9e1313f28d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:27.259424+00:00", + "generatedAt": "2026-06-27T11:13:34.351891+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_01", + "runner": "h100-dgxc-slurm_05", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -37823,17 +39272,18 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -37854,8 +39304,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -37863,230 +39313,304 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271570100", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271570100", - "createdAt": "2026-06-26T23:46:51Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287499275", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287499275", + "createdAt": "2026-06-27T11:13:34.351891+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 87.93599903583527, - "p90": 92.0960009098053, - "p95": 94.55999732017517, - "p99": 100.63999891281128 + "p50": 96.09600156545639, + "p90": 104.25599664449692, + "p95": 107.744000852108, + "p99": 122.81599640846252 }, "combine": { - "p50": 97.02400118112564, - "p90": 99.96800124645233, - "p95": 102.39999741315842, - "p99": 105.98400235176086 + "p50": 78.75200361013412, + "p90": 81.24800026416779, + "p95": 81.85599744319916, + "p99": 87.3280018568039 }, "roundtrip": { - "p50": 216.09599888324738, - "p90": 219.4560021162033, - "p95": 220.7999974489212, - "p99": 224.5119959115982 + "p50": 149.3760049343109, + "p90": 157.72800147533417, + "p95": 160.863995552063, + "p99": 184.7359985113144 }, "isolatedSum": { - "p50": 184.9600002169609, - "p90": 192.06400215625763, - "p95": 196.9599947333336, - "p99": 206.62400126457214 + "p50": 174.84800517559052, + "p90": 185.5039969086647, + "p95": 189.59999829530716, + "p99": 210.14399826526642 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 107.29599744081497, - "p90": 124.28800016641617, - "p95": 125.63200294971466, - "p99": 128.9920061826706 + "p50": 73.82400333881378, + "p90": 103.20000350475311, + "p95": 108.19199681282043, + "p99": 178.3359944820404 }, "combine": { - "p50": 144.03200149536133, - "p90": 154.36799824237823, - "p95": 155.58399260044098, - "p99": 158.78400206565857 + "p50": 72.83200323581696, + "p90": 80.9599980711937, + "p95": 82.14399963617325, + "p99": 90.4960036277771 }, "roundtrip": { - "p50": 332.60801434516907, - "p90": 353.05601358413696, - "p95": 354.8479974269867, - "p99": 358.36800932884216 + "p50": 129.02399897575378, + "p90": 156.76799416542053, + "p95": 159.39199924468994, + "p99": 176.64000391960144 }, "isolatedSum": { - "p50": 251.3279989361763, - "p90": 278.6559984087944, - "p95": 281.21599555015564, - "p99": 287.77600824832916 + "p50": 146.65600657463074, + "p90": 184.1600015759468, + "p95": 190.33599644899368, + "p99": 268.8319981098175 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 143.93599331378937, - "p90": 162.36799955368042, - "p95": 164.8319959640503, - "p99": 172.95999825000763 + "p50": 72.95999675989151, + "p90": 101.95200145244598, + "p95": 110.1439967751503, + "p99": 251.10399723052979 }, "combine": { - "p50": 225.21600127220154, - "p90": 234.01600122451782, - "p95": 235.3920042514801, - "p99": 237.8239929676056 + "p50": 72.67200201749802, + "p90": 81.50400221347809, + "p95": 82.43200182914734, + "p99": 87.42400258779526 }, "roundtrip": { - "p50": 529.375970363617, - "p90": 547.9679703712463, - "p95": 550.2399802207947, - "p99": 553.0239939689636 + "p50": 129.02399897575378, + "p90": 155.32800555229187, + "p95": 159.61599349975586, + "p99": 171.87200486660004 }, "isolatedSum": { - "p50": 369.1519945859909, - "p90": 396.38400077819824, - "p95": 400.2240002155304, - "p99": 410.7839912176132 + "p50": 145.63199877738953, + "p90": 183.45600366592407, + "p95": 192.57599860429764, + "p99": 338.52799981832504 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 217.56799519062042, - "p90": 234.047994017601, - "p95": 235.87200045585632, - "p99": 239.55200612545013 + "p50": 95.13600170612335, + "p90": 103.2319962978363, + "p95": 106.46399855613708, + "p99": 127.93600559234619 }, "combine": { - "p50": 362.527996301651, - "p90": 370.7520067691803, - "p95": 373.1519877910614, - "p99": 375.90399384498596 + "p50": 78.65600287914276, + "p90": 81.727996468544, + "p95": 86.496002972126, + "p99": 88.16000074148178 }, "roundtrip": { - "p50": 898.6240029335022, - "p90": 918.2080030441284, - "p95": 922.7200150489807, - "p99": 931.5199851989746 + "p50": 150.751993060112, + "p90": 161.50400042533875, + "p95": 208.41600000858307, + "p99": 230.20799458026886 }, "isolatedSum": { - "p50": 580.0959914922714, - "p90": 604.8000007867813, - "p95": 609.0239882469177, - "p99": 615.4559999704361 + "p50": 173.7920045852661, + "p90": 184.9599927663803, + "p95": 192.9600015282631, + "p99": 216.09600633382797 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 0, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.19200229644775, + "p90": 101.50399804115295, + "p95": 103.96800190210342, + "p99": 112.57600039243698 + }, + "combine": { + "p50": 81.91999793052673, + "p90": 88.19200098514557, + "p95": 89.1840010881424, + "p99": 90.40000289678574 + }, + "roundtrip": { + "p50": 151.0400027036667, + "p90": 159.4880074262619, + "p95": 161.76000237464905, + "p99": 199.77599382400513 + }, + "isolatedSum": { + "p50": 178.1120002269745, + "p90": 189.69599902629852, + "p95": 193.15200299024582, + "p99": 202.97600328922272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 356.25600814819336, - "p90": 370.33599615097046, - "p95": 372.73600697517395, - "p99": 376.8959939479828 + "p50": 90.01599997282028, + "p90": 104.22399640083313, + "p95": 106.39999806880951, + "p99": 117.47200042009354 }, "combine": { - "p50": 631.8399906158447, - "p90": 639.0079855918884, - "p95": 641.6640281677246, - "p99": 645.0560092926025 + "p50": 81.24800026416779, + "p90": 90.36800265312195, + "p95": 94.59199756383896, + "p99": 96.00000083446503 }, "roundtrip": { - "p50": 1633.247971534729, - "p90": 1644.7360515594482, - "p95": 1647.3599672317505, - "p99": 1654.4640064239502 + "p50": 142.81600713729858, + "p90": 168.60799491405487, + "p95": 176.06399953365326, + "p99": 256.8640112876892 }, "isolatedSum": { - "p50": 988.0959987640381, - "p90": 1009.3439817428589, - "p95": 1014.4000351428986, - "p99": 1021.9520032405853 + "p50": 171.26400023698807, + "p90": 194.59199905395508, + "p95": 200.99199563264847, + "p99": 213.47200125455856 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 645.5039978027344, - "p90": 653.2800197601318, - "p95": 656.1599969863892, - "p99": 660.7040166854858 + "p50": 109.40799862146378, + "p90": 139.26400244235992, + "p95": 141.7279988527298, + "p99": 146.84799313545227 }, "combine": { - "p50": 1158.911943435669, - "p90": 1167.9999828338623, - "p95": 1170.4319715499878, - "p99": 1178.2399415969849 + "p50": 95.83999961614609, + "p90": 112.57600039243698, + "p95": 115.35999923944473, + "p99": 119.77600306272507 }, "roundtrip": { - "p50": 3097.759962081909, - "p90": 3109.312057495117, - "p95": 3113.3439540863037, - "p99": 3129.1520595550537 + "p50": 173.40800166130066, + "p90": 206.68800175189972, + "p95": 210.4959934949875, + "p99": 213.3760005235672 }, "isolatedSum": { - "p50": 1804.4159412384033, - "p90": 1821.2800025939941, - "p95": 1826.591968536377, - "p99": 1838.9439582824707 + "p50": 205.24799823760986, + "p90": 251.8400028347969, + "p95": 257.08799809217453, + "p99": 266.62399619817734 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.07199704647064, + "p90": 131.96800649166107, + "p95": 135.19999384880066, + "p99": 139.13600146770477 + }, + "combine": { + "p50": 106.4319983124733, + "p90": 119.29599940776825, + "p95": 120.06399780511856, + "p99": 122.11199849843979 + }, + "roundtrip": { + "p50": 202.5279998779297, + "p90": 216.0319983959198, + "p95": 217.66400337219238, + "p99": 221.50400280952454 + }, + "isolatedSum": { + "p50": 221.50399535894394, + "p90": 251.26400589942932, + "p95": 255.26399165391922, + "p99": 261.24799996614456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -38094,18 +39618,18 @@ ] }, { - "id": "cx-9440251a", - "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_97196257", - "comparisonKey": "2b50b361430bc4f6", + "id": "cx-e96d722b", + "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_ff7906f8", + "comparisonKey": "c69daa1ab05193b6", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:40.278594+00:00", + "generatedAt": "2026-06-26T23:51:56.132475+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", + "runner": "h100-dgxc-slurm_17", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", @@ -38115,17 +39639,18 @@ "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -38146,8 +39671,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -38155,230 +39680,304 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271583505", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271583505", - "createdAt": "2026-06-26T23:47:19Z", + "id": "28271667766", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271667766", + "createdAt": "2026-06-26T23:51:56.132475+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 211.2639993429184, - "p90": 217.28000044822693, - "p95": 219.32800114154816, - "p99": 226.78400576114655 + "p50": 96.09600156545639, + "p90": 102.81600058078766, + "p95": 104.54399883747101, + "p99": 110.59200018644333 }, "combine": { - "p50": 97.15200215578079, - "p90": 100.41599720716476, - "p95": 102.27199643850327, - "p99": 105.59999942779541 + "p50": 79.03999835252762, + "p90": 81.50400221347809, + "p95": 82.11199939250946, + "p99": 87.90399879217148 }, "roundtrip": { - "p50": 296.640008687973, - "p90": 303.26399207115173, - "p95": 305.82401156425476, - "p99": 313.9199912548065 + "p50": 145.56799829006195, + "p90": 153.31199765205383, + "p95": 155.71199357509613, + "p99": 159.39199924468994 }, "isolatedSum": { - "p50": 308.4160014986992, - "p90": 317.6959976553917, - "p95": 321.5999975800514, - "p99": 332.38400518894196 + "p50": 175.135999917984, + "p90": 184.32000279426575, + "p95": 186.65599822998047, + "p99": 198.4959989786148 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 281.98400139808655, - "p90": 324.6400058269501, - "p95": 327.7760148048401, - "p99": 332.99198746681213 + "p50": 71.03999704122543, + "p90": 101.6319990158081, + "p95": 102.65599936246872, + "p99": 106.62399977445602 }, "combine": { - "p50": 141.53599739074707, - "p90": 147.87200093269348, - "p95": 149.9519944190979, - "p99": 152.70400047302246 + "p50": 72.28799909353256, + "p90": 80.54400235414505, + "p95": 81.40800148248672, + "p99": 87.00799942016602 }, "roundtrip": { - "p50": 409.7920060157776, - "p90": 415.45599699020386, - "p95": 417.7280068397522, - "p99": 423.39199781417847 + "p50": 129.18399274349213, + "p90": 152.70400047302246, + "p95": 156.92800283432007, + "p99": 160.76800227165222 }, "isolatedSum": { - "p50": 423.5199987888336, - "p90": 472.51200675964355, - "p95": 477.728009223938, - "p99": 485.6959879398346 + "p50": 143.327996134758, + "p90": 182.17600136995316, + "p95": 184.06400084495544, + "p99": 193.63199919462204 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 402.24000811576843, - "p90": 444.64001059532166, - "p95": 447.3919868469238, - "p99": 454.1440010070801 + "p50": 72.57600128650665, + "p90": 101.02400183677673, + "p95": 103.61599922180176, + "p99": 110.81600189208984 }, "combine": { - "p50": 224.16000068187714, - "p90": 233.0559939146042, - "p95": 235.23199558258057, - "p99": 239.29600417613983 + "p50": 72.25599884986877, + "p90": 79.96799796819687, + "p95": 86.71999722719193, + "p99": 87.64799684286118 }, "roundtrip": { - "p50": 613.9839887619019, - "p90": 657.7600240707397, - "p95": 661.9200110435486, - "p99": 734.7840070724487 + "p50": 129.92000579833984, + "p90": 161.3759994506836, + "p95": 162.30399906635284, + "p99": 166.4319932460785 }, "isolatedSum": { - "p50": 626.4000087976456, - "p90": 677.6960045099258, - "p95": 682.6239824295044, - "p99": 693.4400051832199 + "p50": 144.83200013637543, + "p90": 180.9919998049736, + "p95": 190.33599644899368, + "p99": 198.46399873495102 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 664.7359728813171, - "p90": 669.5680022239685, - "p95": 671.1360216140747, - "p99": 674.7519969940186 + "p50": 96.3200032711029, + "p90": 101.1200025677681, + "p95": 102.52799838781357, + "p99": 109.11999642848969 }, "combine": { - "p50": 358.0799996852875, - "p90": 363.23198676109314, - "p95": 364.47998881340027, - "p99": 369.9199855327606 + "p50": 79.23199981451035, + "p90": 82.11199939250946, + "p95": 87.00799942016602, + "p99": 87.71199733018875 }, "roundtrip": { - "p50": 1005.2160024642944, - "p90": 1010.2720260620117, - "p95": 1012.287974357605, - "p99": 1019.9999809265137 + "p50": 151.5199989080429, + "p90": 159.2320054769516, + "p95": 160.60799360275269, + "p99": 165.21599888801575 }, "isolatedSum": { - "p50": 1022.8159725666046, - "p90": 1032.7999889850616, - "p95": 1035.616010427475, - "p99": 1044.6719825267792 + "p50": 175.55200308561325, + "p90": 183.23200196027756, + "p95": 189.53599780797958, + "p99": 196.83199375867844 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1237.8560304641724, - "p90": 1245.919942855835, - "p95": 1249.0559816360474, - "p99": 1253.6319494247437 + "p50": 96.22400254011154, + "p90": 102.36799716949463, + "p95": 105.05600273609161, + "p99": 110.30399799346924 }, "combine": { - "p50": 632.0639848709106, - "p90": 639.0720009803772, - "p95": 641.5359973907471, - "p99": 646.9119787216187 + "p50": 81.88799768686295, + "p90": 88.28800171613693, + "p95": 89.31200206279755, + "p99": 94.43199634552002 }, "roundtrip": { - "p50": 1845.0239896774292, - "p90": 1854.3039560317993, - "p95": 1857.983946800232, - "p99": 1862.720012664795 + "p50": 152.48000621795654, + "p90": 160.09600460529327, + "p95": 164.19200599193573, + "p99": 172.83199727535248 }, "isolatedSum": { - "p50": 1869.920015335083, - "p90": 1884.9919438362122, - "p95": 1890.5919790267944, - "p99": 1900.5439281463623 + "p50": 178.1120002269745, + "p90": 190.65599888563156, + "p95": 194.36800479888916, + "p99": 204.73599433898926 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2360.80002784729, - "p90": 2371.2639808654785, - "p95": 2375.3280639648438, - "p99": 2383.3279609680176 + "p50": 90.36800265312195, + "p90": 102.59199887514114, + "p95": 104.3199971318245, + "p99": 108.03200304508209 }, "combine": { - "p50": 1150.8159637451172, - "p90": 1160.032033920288, - "p95": 1162.9120111465454, - "p99": 1171.6159582138062 + "p50": 80.92799782752991, + "p90": 90.01599997282028, + "p95": 95.13600170612335, + "p99": 96.41599655151367 }, "roundtrip": { - "p50": 3508.7039470672607, - "p90": 3525.631904602051, - "p95": 3531.615972518921, - "p99": 3547.4560260772705 + "p50": 142.46399700641632, + "p90": 169.95200514793396, + "p95": 174.55999553203583, + "p99": 181.7920058965683 }, "isolatedSum": { - "p50": 3511.615991592407, - "p90": 3531.2960147857666, - "p95": 3538.240075111389, - "p99": 3554.9439191818237 + "p50": 171.29600048065186, + "p90": 192.60799884796143, + "p95": 199.45599883794785, + "p99": 204.44799959659576 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.71199864149094, + "p90": 116.54400080442429, + "p95": 118.59200149774551, + "p99": 125.63200294971466 + }, + "combine": { + "p50": 89.72799777984619, + "p90": 103.74400019645691, + "p95": 104.22399640083313, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 165.66400229930878, + "p90": 185.34399569034576, + "p95": 186.97600066661835, + "p99": 190.08000195026398 + }, + "isolatedSum": { + "p50": 185.43999642133713, + "p90": 220.2880010008812, + "p95": 222.81599789857864, + "p99": 231.6800057888031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.11999708414078, + "p90": 133.82400572299957, + "p95": 137.05599308013916, + "p99": 140.28799533843994 + }, + "combine": { + "p50": 106.46399855613708, + "p90": 120.12799829244614, + "p95": 120.51200121641159, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 196.8960016965866, + "p90": 216.99200570583344, + "p95": 218.9120054244995, + "p99": 220.99199891090393 + }, + "isolatedSum": { + "p50": 219.58399564027786, + "p90": 253.9520040154457, + "p95": 257.56799429655075, + "p99": 261.27999275922775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -38386,38 +39985,39 @@ ] }, { - "id": "cx-0ee3ca7d", - "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h100_97196257", - "comparisonKey": "7f26f72cd9fff78c", + "id": "cx-62470199", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_b681a3a4", + "comparisonKey": "03a9af950bebf5a9", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:56.826066+00:00", + "generatedAt": "2026-06-27T00:12:00.195927+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -38438,239 +40038,128 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271663775", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271663775", - "createdAt": "2026-06-26T23:49:50Z", + "id": "28272331593", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272331593", + "createdAt": "2026-06-27T00:12:00.195927+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 211.93599700927734, - "p90": 218.33600103855133, - "p95": 220.5120027065277, - "p99": 225.055992603302 - }, - "combine": { - "p50": 97.59999811649323, - "p90": 100.0640019774437, - "p95": 101.85600072145462, - "p99": 104.5759990811348 - }, - "roundtrip": { - "p50": 297.91998863220215, - "p90": 303.9360046386719, - "p95": 306.5600097179413, - "p99": 328.000009059906 - }, - "isolatedSum": { - "p50": 309.53599512577057, - "p90": 318.400003015995, - "p95": 322.36800342798233, - "p99": 329.6319916844368 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 278.3359885215759, - "p90": 284.5759987831116, - "p95": 285.8560085296631, - "p99": 292.03200340270996 + "p50": 95.20000219345093, + "p90": 101.24800354242325, + "p95": 103.42399775981903, + "p99": 115.84000289440155 }, "combine": { - "p50": 141.88799262046814, - "p90": 145.1520025730133, - "p95": 146.88000082969666, - "p99": 151.39199793338776 + "p50": 79.29600030183792, + "p90": 80.92799782752991, + "p95": 81.79199695587158, + "p99": 88.03199976682663 }, "roundtrip": { - "p50": 404.4800102710724, - "p90": 410.7840061187744, - "p95": 413.9519929885864, - "p99": 420.51199078559875 + "p50": 148.03199470043182, + "p90": 153.24799716472626, + "p95": 156.41599893569946, + "p99": 176.06399953365326 }, "isolatedSum": { - "p50": 420.22398114204407, - "p90": 429.7280013561249, - "p95": 432.73600935935974, - "p99": 443.4240013360977 + "p50": 174.49600249528885, + "p90": 182.17600136995316, + "p95": 185.2159947156906, + "p99": 203.87200266122818 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 2, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 397.2800076007843, - "p90": 402.8480052947998, - "p95": 405.44000267982483, - "p99": 410.71999073028564 + "p50": 99.5199978351593, + "p90": 107.39199817180634, + "p95": 118.72000247240067, + "p99": 229.95199263095856 }, "combine": { - "p50": 221.02400660514832, - "p90": 225.0880002975464, - "p95": 226.01599991321564, - "p99": 229.50400412082672 + "p50": 87.52000331878662, + "p90": 89.34400230646133, + "p95": 92.3520028591156, + "p99": 96.44799679517746 }, "roundtrip": { - "p50": 601.4400124549866, - "p90": 608.1600189208984, - "p95": 610.4000210762024, - "p99": 616.8000102043152 + "p50": 155.5519998073578, + "p90": 160.70400178432465, + "p95": 164.76799547672272, + "p99": 175.07199943065643 }, "isolatedSum": { - "p50": 618.3040142059326, - "p90": 627.9360055923462, - "p95": 631.4560025930405, - "p99": 640.2239948511124 + "p50": 187.04000115394592, + "p90": 196.73600047826767, + "p95": 211.07200533151627, + "p99": 326.399989426136 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 1, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 659.5199704170227, - "p90": 663.2959842681885, - "p95": 665.0239825248718, - "p99": 667.2319769859314 + "p50": 133.82400572299957, + "p90": 141.08799397945404, + "p95": 142.62400567531586, + "p99": 146.40000462532043 }, "combine": { - "p50": 360.22400856018066, - "p90": 364.9280071258545, - "p95": 366.3040101528168, - "p99": 369.85599994659424 + "p50": 120.28799951076508, + "p90": 122.56000190973282, + "p95": 127.10399925708771, + "p99": 136.00000739097595 }, "roundtrip": { - "p50": 1002.9439926147461, - "p90": 1008.3839893341064, - "p95": 1010.0159645080566, - "p99": 1013.856053352356 + "p50": 221.88800573349, + "p90": 225.79200565814972, + "p95": 227.26400196552277, + "p99": 233.024001121521 }, "isolatedSum": { - "p50": 1019.7439789772034, - "p90": 1028.223991394043, - "p95": 1031.3279926776886, - "p99": 1037.0879769325256 + "p50": 254.11200523376465, + "p90": 263.64799588918686, + "p95": 269.72800493240356, + "p99": 282.4000120162964 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 1226.9760370254517, - "p90": 1235.1679801940918, - "p95": 1236.8320226669312, - "p99": 1242.143988609314 - }, - "combine": { - "p50": 624.5120167732239, - "p90": 631.8399906158447, - "p95": 634.1120004653931, - "p99": 675.8400201797485 - }, - "roundtrip": { - "p50": 1831.455945968628, - "p90": 1840.831995010376, - "p95": 1843.775987625122, - "p99": 1848.2880592346191 - }, - "isolatedSum": { - "p50": 1851.4880537986755, - "p90": 1867.0079708099365, - "p95": 1870.9440231323242, - "p99": 1917.9840087890625 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 2344.1600799560547, - "p90": 2350.719928741455, - "p95": 2352.9601097106934, - "p99": 2358.0799102783203 - }, - "combine": { - "p50": 1141.4719820022583, - "p90": 1150.9439945220947, - "p95": 1153.7920236587524, - "p99": 1162.592053413391 - }, - "roundtrip": { - "p50": 3469.856023788452, - "p90": 3481.6958904266357, - "p95": 3484.3521118164062, - "p99": 3490.528106689453 - }, - "isolatedSum": { - "p50": 3485.632061958313, - "p90": 3501.66392326355, - "p95": 3506.752133369446, - "p99": 3520.6719636917114 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 3, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -38678,50 +40167,51 @@ ] }, { - "id": "cx-560e55e7", - "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|dc27c5e0894e569", - "colorKey": "h100_7f10961a", - "comparisonKey": "6a3a9660e48371b3", + "id": "cx-62dda1f3", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_b981a85d", + "comparisonKey": "03a9af950bebf5a9", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:45:34.307375+00:00", + "generatedAt": "2026-06-27T00:12:08.462042+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_04", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -38730,239 +40220,310 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28273218274", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273218274", - "createdAt": "2026-06-27T00:41:54Z", + "id": "28272335347", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272335347", + "createdAt": "2026-06-27T00:12:08.462042+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 196.79999351501465, - "p90": 203.80799472332, - "p95": 205.79199492931366, - "p99": 214.11199867725372 + "p50": 91.96799993515015, + "p90": 101.85600072145462, + "p95": 102.88000106811523, + "p99": 111.00800335407257 }, "combine": { - "p50": 75.71200281381607, - "p90": 78.5600021481514, - "p95": 80.54400235414505, - "p99": 84.6719965338707 + "p50": 76.60800218582153, + "p90": 81.60000294446945, + "p95": 82.17599987983704, + "p99": 85.21600067615509 }, "roundtrip": { - "p50": 255.64798712730408, - "p90": 264.41600918769836, - "p95": 274.1119861602783, - "p99": 321.9519853591919 + "p50": 146.7839926481247, + "p90": 152.6080071926117, + "p95": 154.27200496196747, + "p99": 160.99199652671814 }, "isolatedSum": { - "p50": 272.5119963288307, - "p90": 282.3679968714714, - "p95": 286.3359972834587, - "p99": 298.7839952111244 + "p50": 168.57600212097168, + "p90": 183.45600366592407, + "p95": 185.05600094795227, + "p99": 196.22400403022766 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 231.26399517059326, - "p90": 269.6639895439148, - "p95": 272.19200134277344, - "p99": 278.01600098609924 + "p50": 98.88000041246414, + "p90": 104.06400263309479, + "p95": 106.30399733781815, + "p99": 139.42399621009827 }, "combine": { - "p50": 100.99200159311295, - "p90": 109.82400178909302, - "p95": 110.81600189208984, - "p99": 113.3119985461235 + "p50": 84.60800349712372, + "p90": 86.30400151014328, + "p95": 86.81599795818329, + "p99": 92.51199662685394 }, "roundtrip": { - "p50": 315.8720135688782, - "p90": 327.39201188087463, - "p95": 355.679988861084, - "p99": 369.53601241111755 + "p50": 154.65599298477173, + "p90": 160.64000129699707, + "p95": 162.59199380874634, + "p99": 168.09600591659546 }, "isolatedSum": { - "p50": 332.2559967637062, - "p90": 379.4879913330078, - "p95": 383.0080032348633, - "p99": 391.32799953222275 + "p50": 183.48800390958786, + "p90": 190.36800414323807, + "p95": 193.11999529600143, + "p99": 231.9359928369522 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 4, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 309.59999561309814, - "p90": 352.1279990673065, - "p95": 355.00800609588623, - "p99": 361.1519932746887 + "p50": 141.50400459766388, + "p90": 146.68799936771393, + "p95": 147.67999947071075, + "p99": 152.41600573062897 }, "combine": { - "p50": 147.90399372577667, - "p90": 156.99200332164764, - "p95": 158.24000537395477, - "p99": 162.08000481128693 + "p50": 118.17599833011627, + "p90": 122.56000190973282, + "p95": 123.58400225639343, + "p99": 125.82400441169739 }, "roundtrip": { - "p50": 442.4000084400177, - "p90": 483.3280146121979, - "p95": 487.8399968147278, - "p99": 518.4000134468079 + "p50": 227.13600099086761, + "p90": 231.23200237751007, + "p95": 232.92799293994904, + "p99": 237.05600202083588 }, "isolatedSum": { - "p50": 457.5039893388748, - "p90": 509.12000238895416, - "p95": 513.248011469841, - "p99": 523.2319980859756 + "p50": 259.68000292778015, + "p90": 269.24800127744675, + "p95": 271.2640017271042, + "p99": 278.24001014232635 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-f337d9a1", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_b881a6ca", + "comparisonKey": "03a9af950bebf5a9", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:29.724404+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272338723", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272338723", + "createdAt": "2026-06-27T00:12:29.724404+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 453.98399233818054, - "p90": 459.3600034713745, - "p95": 461.88798546791077, - "p99": 466.623991727829 + "p50": 94.84799951314926, + "p90": 121.37600034475327, + "p95": 148.8959938287735, + "p99": 189.56799805164337 }, "combine": { - "p50": 235.29599606990814, - "p90": 239.00799453258514, - "p95": 240.51199853420258, - "p99": 242.46400594711304 + "p50": 79.58400249481201, + "p90": 96.6079980134964, + "p95": 113.0559965968132, + "p99": 123.77600371837616 }, "roundtrip": { - "p50": 673.3120083808899, - "p90": 678.8480281829834, - "p95": 680.6079745292664, - "p99": 684.544026851654 + "p50": 148.44800531864166, + "p90": 183.20000171661377, + "p95": 218.78400444984436, + "p99": 249.79199469089508 }, "isolatedSum": { - "p50": 689.2799884080887, - "p90": 698.3679980039597, - "p95": 702.3999840021133, - "p99": 709.087997674942 + "p50": 174.43200200796127, + "p90": 217.98399835824966, + "p95": 261.9519904255867, + "p99": 313.34400177001953 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 784.928023815155, - "p90": 799.8719811439514, - "p95": 803.2000064849854, - "p99": 809.0239763259888 + "p50": 100.41599720716476, + "p90": 127.55200266838074, + "p95": 156.5759927034378, + "p99": 182.81599879264832 }, "combine": { - "p50": 405.4720103740692, - "p90": 416.06399416923523, - "p95": 418.3039963245392, - "p99": 422.4959909915924 + "p50": 87.8399983048439, + "p90": 103.93600165843964, + "p95": 120.38400024175644, + "p99": 128.89599800109863 }, "roundtrip": { - "p50": 1170.1120138168335, - "p90": 1179.58402633667, - "p95": 1183.6479902267456, - "p99": 1192.7679777145386 + "p50": 156.99200332164764, + "p90": 193.7599927186966, + "p95": 223.7119972705841, + "p99": 247.23200500011444 }, "isolatedSum": { - "p50": 1190.4000341892242, - "p90": 1215.9359753131866, - "p95": 1221.5040028095245, - "p99": 1231.5199673175812 + "p50": 188.25599551200867, + "p90": 231.48800432682037, + "p95": 276.95999294519424, + "p99": 311.71199679374695 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1483.3279848098755, - "p90": 1490.496039390564, - "p95": 1493.6319589614868, - "p99": 1501.5679597854614 + "p50": 134.2719942331314, + "p90": 147.39200472831726, + "p95": 173.567995429039, + "p99": 188.1919950246811 }, "combine": { - "p50": 732.2880029678345, - "p90": 738.8160228729248, - "p95": 740.8000230789185, - "p99": 745.9840178489685 + "p50": 120.44800072908401, + "p90": 138.62399756908417, + "p95": 152.38399803638458, + "p99": 160.96000373363495 }, "roundtrip": { - "p50": 2199.039936065674, - "p90": 2209.439992904663, - "p95": 2212.5439643859863, - "p99": 2217.087984085083 + "p50": 222.6880043745041, + "p90": 247.80799448490143, + "p95": 264.6079957485199, + "p99": 279.35999631881714 }, "isolatedSum": { - "p50": 2215.61598777771, - "p90": 2229.3120622634888, - "p95": 2234.4319820404053, - "p99": 2247.55197763443 + "p50": 254.71999496221542, + "p90": 286.0160022974014, + "p95": 325.9519934654236, + "p99": 349.15199875831604 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 4, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -38970,50 +40531,51 @@ ] }, { - "id": "cx-de081cfe", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_91aa6e56", - "comparisonKey": "e439d265ee12c9f2", + "id": "cx-cf5bc26b", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", + "colorKey": "h100_16047c28", + "comparisonKey": "64192d9d479bdd44", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:20.983875+00:00", + "generatedAt": "2026-06-26T23:54:33.118563+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", + "runner": "h100-dgxc-slurm_12", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", + "label": "H100 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -39022,8 +40584,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "2279937619f3971", + "workloadId": "set:4:7af12818400d6348", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -39031,230 +40593,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254323956", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", - "createdAt": "2026-06-26T17:27:01Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271788376", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271788376", + "createdAt": "2026-06-26T23:54:33.118563+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 89.59999680519104, - "p90": 93.72799843549728, - "p95": 95.36000341176987, - "p99": 100.832000374794 + "p50": 94.68799829483032, + "p90": 101.1200025677681, + "p95": 104.41599786281586, + "p99": 111.10399663448334 }, "combine": { - "p50": 98.14400225877762, - "p90": 100.60799866914749, - "p95": 102.11200267076492, - "p99": 105.0880029797554 - }, - "roundtrip": { - "p50": 215.13600647449493, - "p90": 218.55999529361725, - "p95": 220.12799978256226, - "p99": 228.06400060653687 - }, - "isolatedSum": { - "p50": 187.74399906396866, - "p90": 194.33599710464478, - "p95": 197.4720060825348, - "p99": 205.9200033545494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 106.175996363163, - "p90": 121.47200107574463, - "p95": 122.52800166606903, - "p99": 125.91999769210815 - }, - "combine": { - "p50": 139.48799669742584, - "p90": 146.17599546909332, - "p95": 147.61599898338318, - "p99": 149.82399344444275 - }, - "roundtrip": { - "p50": 320.92800736427307, - "p90": 336.41600608825684, - "p95": 337.92001008987427, - "p99": 341.2800133228302 - }, - "isolatedSum": { - "p50": 245.66399306058884, - "p90": 267.64799654483795, - "p95": 270.1440006494522, - "p99": 275.7439911365509 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 138.46400380134583, - "p90": 182.8799992799759, - "p95": 190.97599387168884, - "p99": 197.28000462055206 - }, - "combine": { - "p50": 208.3200067281723, - "p90": 223.00800681114197, - "p95": 231.83999955654144, - "p99": 242.01600253582 + "p50": 80.99199831485748, + "p90": 86.84799820184708, + "p95": 87.8399983048439, + "p99": 89.9519994854927 }, "roundtrip": { - "p50": 509.69600677490234, - "p90": 521.5680003166199, - "p95": 523.4879851341248, - "p99": 528.9599895477295 + "p50": 150.30400454998016, + "p90": 156.95999562740326, + "p95": 159.67999398708344, + "p99": 164.15999829769135 }, "isolatedSum": { - "p50": 346.7840105295181, - "p90": 405.88800609111786, - "p95": 422.8159934282303, - "p99": 439.29600715637207 + "p50": 175.6799966096878, + "p90": 187.96800076961517, + "p95": 192.25599616765976, + "p99": 201.05599611997604 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 197.4399983882904, - "p90": 210.87999641895294, - "p95": 213.31200003623962, - "p99": 216.2880003452301 + "p50": 95.0080007314682, + "p90": 100.00000149011612, + "p95": 102.68799960613251, + "p99": 108.57599973678589 }, "combine": { - "p50": 325.82399249076843, - "p90": 330.1120102405548, - "p95": 331.6799998283386, - "p99": 335.80800890922546 + "p50": 81.727996468544, + "p90": 88.51200342178345, + "p95": 89.37600255012512, + "p99": 90.59199690818787 }, "roundtrip": { - "p50": 847.4879860877991, - "p90": 858.0160140991211, - "p95": 861.0879778862, - "p99": 869.2799806594849 + "p50": 150.65599977970123, + "p90": 159.58400070667267, + "p95": 161.50400042533875, + "p99": 167.42399334907532 }, "isolatedSum": { - "p50": 523.2639908790588, - "p90": 540.9920066595078, - "p95": 544.9919998645782, - "p99": 552.0960092544556 + "p50": 176.7359972000122, + "p90": 188.51200491189957, + "p95": 192.06400215625763, + "p99": 199.16799664497375 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 318.65599751472473, - "p90": 335.29600501060486, - "p95": 338.0799889564514, - "p99": 347.29599952697754 + "p50": 104.63999956846237, + "p90": 112.28799819946289, + "p95": 114.14399743080139, + "p99": 119.84000355005264 }, "combine": { - "p50": 559.7760081291199, - "p90": 566.815972328186, - "p95": 569.5040225982666, - "p99": 573.311984539032 + "p50": 92.25600212812424, + "p90": 97.69599884748459, + "p95": 98.39999675750732, + "p99": 104.47999835014343 }, "roundtrip": { - "p50": 1524.0000486373901, - "p90": 1544.0640449523926, - "p95": 1550.7839918136597, - "p99": 1576.7359733581543 + "p50": 164.000004529953, + "p90": 171.64799571037292, + "p95": 175.4560023546219, + "p99": 228.4799963235855 }, "isolatedSum": { - "p50": 878.4320056438446, - "p90": 902.1119773387909, - "p95": 907.584011554718, - "p99": 920.6079840660095 + "p50": 196.8960016965866, + "p90": 209.98399704694748, + "p95": 212.54399418830872, + "p99": 224.32000190019608 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 574.7519731521606, - "p90": 593.1839942932129, - "p95": 598.1760025024414, - "p99": 604.7999858856201 + "p50": 143.93599331378937, + "p90": 148.00000190734863, + "p95": 149.79200065135956, + "p99": 155.68000078201294 }, "combine": { - "p50": 1025.056004524231, - "p90": 1033.5359573364258, - "p95": 1036.1920595169067, - "p99": 1042.847990989685 + "p50": 132.06399977207184, + "p90": 138.75199854373932, + "p95": 139.29599523544312, + "p99": 145.6959992647171 }, "roundtrip": { - "p50": 2880.863904953003, - "p90": 2894.5279121398926, - "p95": 2899.9040126800537, - "p99": 2908.3518981933594 + "p50": 241.2479966878891, + "p90": 247.6480007171631, + "p95": 249.15200471878052, + "p99": 252.76800990104675 }, "isolatedSum": { - "p50": 1599.8079776763916, - "p90": 1626.7199516296387, - "p95": 1634.3680620193481, - "p99": 1647.6479768753052 + "p50": 275.9999930858612, + "p90": 286.75200045108795, + "p95": 289.0879958868027, + "p99": 301.37600004673004 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -39262,50 +40750,51 @@ ] }, { - "id": "cx-e8c2a4d2", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_eddc3af6", - "comparisonKey": "fd73340f2af530d5", + "id": "cx-4d49fd79", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_16047c28", + "comparisonKey": "64192d9d479bdd44", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:48.926445+00:00", + "generatedAt": "2026-06-26T23:59:13.030328+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", + "runner": "h100-dgxc-slurm_03", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm) [cl]", + "label": "H100 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -39314,8 +40803,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -39323,269 +40812,344 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254341346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", - "createdAt": "2026-06-26T17:27:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271931349", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271931349", + "createdAt": "2026-06-26T23:59:13.030328+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 77.60000228881836, - "p90": 81.4720019698143, - "p95": 83.52000266313553, - "p99": 102.7199998497963 + "p50": 95.93600034713745, + "p90": 101.56799852848053, + "p95": 103.13600301742554, + "p99": 107.744000852108 }, "combine": { - "p50": 98.08000177145004, - "p90": 102.01600193977356, - "p95": 115.35999923944473, - "p99": 344.0319895744324 + "p50": 80.89599758386612, + "p90": 87.07199990749359, + "p95": 87.8399983048439, + "p99": 89.40800279378891 }, "roundtrip": { - "p50": 205.1520049571991, - "p90": 208.19200575351715, - "p95": 209.85600352287292, - "p99": 214.9440050125122 + "p50": 151.42400562763214, + "p90": 160.12799739837646, + "p95": 172.86400496959686, + "p99": 232.12799429893494 }, "isolatedSum": { - "p50": 175.6800040602684, - "p90": 183.48800390958786, - "p95": 198.88000190258026, - "p99": 446.75198942422867 + "p50": 176.83199793100357, + "p90": 188.63999843597412, + "p95": 190.97600132226944, + "p99": 197.1520036458969 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 95.74399888515472, - "p90": 110.07999628782272, - "p95": 111.13599687814713, - "p99": 114.81600254774094 + "p50": 96.47999703884125, + "p90": 103.42399775981903, + "p95": 107.71200060844421, + "p99": 161.40800714492798 }, "combine": { - "p50": 141.7279988527298, - "p90": 148.8959938287735, - "p95": 150.4960060119629, - "p99": 153.02400290966034 + "p50": 81.11999928951263, + "p90": 87.61599659919739, + "p95": 89.1840010881424, + "p99": 185.5359971523285 }, "roundtrip": { - "p50": 311.45599484443665, - "p90": 319.5840120315552, - "p95": 321.696013212204, - "p99": 324.67201352119446 + "p50": 153.43999862670898, + "p90": 159.4880074262619, + "p95": 163.71199488639832, + "p99": 313.1200075149536 }, "isolatedSum": { - "p50": 237.47199773788452, - "p90": 258.9759901165962, - "p95": 261.63200289011, - "p99": 267.8400054574013 + "p50": 177.59999632835388, + "p90": 191.03999435901642, + "p95": 196.8960016965866, + "p99": 346.94400429725647 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 129.56799566745758, - "p90": 144.57599818706512, - "p95": 146.14400267601013, - "p99": 148.8959938287735 + "p50": 95.32800316810608, + "p90": 100.3199964761734, + "p95": 102.1760031580925, + "p99": 106.84800148010254 }, "combine": { - "p50": 213.4079933166504, - "p90": 218.36799383163452, - "p95": 219.7760045528412, - "p99": 224.2240011692047 + "p50": 80.32000064849854, + "p90": 84.22400057315826, + "p95": 88.41600269079208, + "p99": 90.14400094747543 }, "roundtrip": { - "p50": 500.70399045944214, - "p90": 508.1599950790405, - "p95": 510.81597805023193, - "p99": 514.8159861564636 + "p50": 150.94399452209473, + "p90": 158.4639996290207, + "p95": 159.90400314331055, + "p99": 163.32800686359406 }, "isolatedSum": { - "p50": 342.97598898410797, - "p90": 362.94399201869965, - "p95": 365.9200072288513, - "p99": 373.1199949979782 + "p50": 175.64800381660461, + "p90": 184.54399704933167, + "p95": 190.59200584888458, + "p99": 196.99200242757797 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 186.49600446224213, - "p90": 196.0960030555725, - "p95": 197.50399887561798, - "p99": 202.55999267101288 + "p50": 94.87999975681305, + "p90": 98.43199700117111, + "p95": 100.3199964761734, + "p99": 105.3759977221489 }, "combine": { - "p50": 327.7760148048401, - "p90": 333.18400382995605, - "p95": 334.3679904937744, - "p99": 337.72799372673035 + "p50": 80.54400235414505, + "p90": 87.20000088214874, + "p95": 88.73599767684937, + "p99": 89.82399851083755 }, "roundtrip": { - "p50": 835.2640271186829, - "p90": 841.69602394104, - "p95": 844.0639972686768, - "p99": 848.2879996299744 + "p50": 152.0960032939911, + "p90": 158.65600109100342, + "p95": 160.16000509262085, + "p99": 166.97600483894348 }, "isolatedSum": { - "p50": 514.2720192670822, - "p90": 529.2800068855286, - "p95": 531.8719893693924, - "p99": 540.2879863977432 + "p50": 175.4240021109581, + "p90": 185.63199788331985, + "p95": 189.05599415302277, + "p99": 195.19999623298645 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 306.62399530410767, - "p90": 320.19200921058655, - "p95": 322.7519989013672, - "p99": 327.1679878234863 + "p50": 95.93600034713745, + "p90": 103.20000350475311, + "p95": 106.20799660682678, + "p99": 168.57600212097168 }, "combine": { - "p50": 559.6479773521423, - "p90": 567.296028137207, - "p95": 570.1119899749756, - "p99": 574.5919942855835 + "p50": 84.3840017914772, + "p90": 89.40800279378891, + "p95": 89.75999802350998, + "p99": 94.84799951314926 }, "roundtrip": { - "p50": 1509.6960067749023, - "p90": 1522.7199792861938, - "p95": 1525.6320238113403, - "p99": 1585.9839916229248 + "p50": 154.84799444675446, + "p90": 161.02400422096252, + "p95": 163.7440025806427, + "p99": 497.50399589538574 }, "isolatedSum": { - "p50": 866.27197265625, - "p90": 887.4880373477936, - "p95": 892.8639888763428, - "p99": 901.7599821090698 + "p50": 180.32000213861465, + "p90": 192.60800629854202, + "p95": 195.96799463033676, + "p99": 263.42400163412094 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 539.8719906806946, - "p90": 550.7839918136597, - "p95": 555.7119846343994, - "p99": 564.7040009498596 + "p50": 103.16800326108932, + "p90": 109.37599837779999, + "p95": 110.75200140476227, + "p99": 113.43999952077866 }, "combine": { - "p50": 1024.9279737472534, - "p90": 1034.3040227890015, - "p95": 1037.11998462677, - "p99": 1047.0720529556274 + "p50": 88.79999816417694, + "p90": 95.74399888515472, + "p95": 97.120001912117, + "p99": 97.95200079679489 }, "roundtrip": { - "p50": 2850.719928741455, - "p90": 2861.407995223999, - "p95": 2864.9280071258545, - "p99": 2870.176076889038 + "p50": 161.6639941930771, + "p90": 167.1999990940094, + "p95": 168.73599588871002, + "p99": 172.89599776268005 }, "isolatedSum": { - "p50": 1564.799964427948, - "p90": 1585.0880146026611, - "p95": 1592.8319692611694, - "p99": 1611.776053905487 + "p50": 191.96800142526627, + "p90": 205.1199972629547, + "p95": 207.87200331687927, + "p99": 211.39200031757355 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 119.6800023317337, + "p90": 128.00000607967377, + "p95": 129.05600666999817, + "p99": 133.91999900341034 + }, + "combine": { + "p50": 103.16800326108932, + "p90": 106.55999928712845, + "p95": 107.90400207042694, + "p99": 113.63200098276138 + }, + "roundtrip": { + "p50": 186.71999871730804, + "p90": 194.65599954128265, + "p95": 196.31999731063843, + "p99": 199.48799908161163 + }, + "isolatedSum": { + "p50": 222.84800559282303, + "p90": 234.56000536680222, + "p95": 236.9600087404251, + "p99": 247.55199998617172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.66400516033173, + "p90": 146.7200070619583, + "p95": 147.8080004453659, + "p99": 151.10400319099426 + }, + "combine": { + "p50": 131.1360001564026, + "p90": 137.82399892807007, + "p95": 138.46400380134583, + "p99": 145.28000354766846 + }, + "roundtrip": { + "p50": 241.40800535678864, + "p90": 248.60799312591553, + "p95": 250.59199333190918, + "p99": 258.5600018501282 + }, + "isolatedSum": { + "p50": 268.8000053167343, + "p90": 284.5440059900284, + "p95": 286.27200424671173, + "p99": 296.3840067386627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-f6d2d196", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ec72792b", - "comparisonKey": "39b4bc74c45641cb", + "id": "cx-38b8b0c2", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_0c515f8b", + "comparisonKey": "47e8e48c891afabb", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:09.793091+00:00", + "generatedAt": "2026-06-26T23:54:43.774495+00:00", "status": "valid", "publicationStatus": "official", "runner": "h100-dgxc-slurm_09", "sku": "h100", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", + "measurementContract": "layout-and-dispatch-v1", "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · fp8 [cl]", + "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -39606,8 +41170,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "d02a66236b524b8", + "workloadId": "set:4:2eebbed158fe1320", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -39615,230 +41179,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271576503", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271576503", - "createdAt": "2026-06-26T23:47:05Z", + "id": "28271795429", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271795429", + "createdAt": "2026-06-26T23:54:43.774495+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 76.73600316047668, - "p90": 80.19199967384338, - "p95": 82.17599987983704, - "p99": 85.4720026254654 - }, - "combine": { - "p50": 98.68799895048141, - "p90": 100.8640006184578, - "p95": 102.84800082445145, - "p99": 113.27999830245972 - }, - "roundtrip": { - "p50": 204.25599813461304, - "p90": 206.84799551963806, - "p95": 208.0959975719452, - "p99": 211.32799983024597 - }, - "isolatedSum": { - "p50": 175.4240021109581, - "p90": 181.05600029230118, - "p95": 185.02400070428848, - "p99": 198.7520009279251 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 95.551997423172, - "p90": 98.65599870681763, - "p95": 100.44799745082855, - "p99": 104.63999956846237 - }, - "combine": { - "p50": 143.51999759674072, - "p90": 146.04799449443817, - "p95": 147.2640037536621, - "p99": 150.07999539375305 - }, - "roundtrip": { - "p50": 317.05600023269653, - "p90": 320.67200541496277, - "p95": 322.07998633384705, - "p99": 325.56799054145813 - }, - "isolatedSum": { - "p50": 239.07199501991272, - "p90": 244.7039932012558, - "p95": 247.71200120449066, - "p99": 254.71999496221542 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 132.4159950017929, - "p90": 136.09600067138672, - "p95": 137.40800321102142, - "p99": 140.19200205802917 + "p50": 96.03200107812881, + "p90": 102.49599814414978, + "p95": 105.66399991512299, + "p99": 117.88800358772278 }, "combine": { - "p50": 224.16000068187714, - "p90": 228.2239943742752, - "p95": 229.312002658844, - "p99": 232.03200101852417 + "p50": 71.45600020885468, + "p90": 73.98399710655212, + "p95": 77.18399912118912, + "p99": 81.56800270080566 }, "roundtrip": { - "p50": 517.5039768218994, - "p90": 522.5920081138611, - "p95": 523.8400101661682, - "p99": 534.1759920120239 + "p50": 142.04800128936768, + "p90": 149.98400211334229, + "p95": 151.45599842071533, + "p99": 159.07199680805206 }, "isolatedSum": { - "p50": 356.57599568367004, - "p90": 364.3199950456619, - "p95": 366.7200058698654, - "p99": 372.22400307655334 + "p50": 167.4880012869835, + "p90": 176.4799952507019, + "p95": 182.8479990363121, + "p99": 199.45600628852844 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 203.5199999809265, - "p90": 207.39200711250305, - "p95": 208.95999670028687, - "p99": 213.1199985742569 + "p50": 98.9760011434555, + "p90": 106.62399977445602, + "p95": 110.07999628782272, + "p99": 123.00799787044525 }, "combine": { - "p50": 359.0719997882843, - "p90": 364.25599455833435, - "p95": 365.4080033302307, - "p99": 367.35999584198 + "p50": 71.32799923419952, + "p90": 73.69600236415863, + "p95": 78.52800190448761, + "p99": 80.22399991750717 }, "roundtrip": { - "p50": 883.679986000061, - "p90": 889.6960020065308, - "p95": 891.5839791297913, - "p99": 897.7599740028381 + "p50": 143.26399564743042, + "p90": 150.14399588108063, + "p95": 153.1520038843155, + "p99": 162.88000345230103 }, "isolatedSum": { - "p50": 562.5919997692108, - "p90": 571.6480016708374, - "p95": 574.3680000305176, - "p99": 580.4799944162369 + "p50": 170.30400037765503, + "p90": 180.32000213861465, + "p95": 188.60799819231033, + "p99": 203.23199778795242 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 341.0240113735199, - "p90": 352.9280126094818, - "p95": 354.7840118408203, - "p99": 361.31200194358826 + "p50": 100.09600222110748, + "p90": 107.61599987745285, + "p95": 112.31999844312668, + "p99": 163.16799819469452 }, "combine": { - "p50": 631.2000155448914, - "p90": 639.136016368866, - "p95": 641.5359973907471, - "p99": 644.1599726676941 + "p50": 79.71200346946716, + "p90": 87.16800063848495, + "p95": 87.74399757385254, + "p99": 95.8079993724823 }, "roundtrip": { - "p50": 1616.5440082550049, - "p90": 1624.9920129776, - "p95": 1627.3599863052368, - "p99": 1631.9680213928223 + "p50": 154.01600301265717, + "p90": 161.47199273109436, + "p95": 164.5440012216568, + "p99": 176.83200538158417 }, "isolatedSum": { - "p50": 972.2240269184113, - "p90": 992.0640289783478, - "p95": 996.3200092315674, - "p99": 1005.4719746112823 + "p50": 179.80800569057465, + "p90": 194.7840005159378, + "p95": 200.06399601697922, + "p99": 258.9759975671768 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 611.6160154342651, - "p90": 621.0240125656128, - "p95": 624.0959763526917, - "p99": 790.3040051460266 + "p50": 104.38399761915207, + "p90": 108.44799876213074, + "p95": 111.455999314785, + "p99": 119.74400281906128 }, "combine": { - "p50": 1165.503978729248, - "p90": 1175.487995147705, - "p95": 1177.664041519165, - "p99": 1188.9280080795288 + "p50": 83.26400071382523, + "p90": 88.03199976682663, + "p95": 88.22400122880936, + "p99": 92.83199906349182 }, "roundtrip": { - "p50": 3078.4640312194824, - "p90": 3095.8399772644043, - "p95": 3103.071928024292, - "p99": 3115.9679889678955 + "p50": 154.9759954214096, + "p90": 161.18399798870087, + "p95": 165.0879979133606, + "p99": 170.01600563526154 }, "isolatedSum": { - "p50": 1777.1199941635132, - "p90": 1796.5120077133179, - "p95": 1801.7600178718567, - "p99": 1979.2320132255554 + "p50": 187.6479983329773, + "p90": 196.47999852895737, + "p95": 199.68000054359436, + "p99": 212.5760018825531 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -39846,37 +41336,38 @@ ] }, { - "id": "cx-ab8f0534", - "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "40ee6d196d286895", + "id": "cx-94696c7b", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_c0c0ad86", + "comparisonKey": "00faf19eae8c1230", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:38.574880+00:00", + "generatedAt": "2026-06-27T00:00:00.906485+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -39898,18 +41389,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", + "traceSignature": "f0e66a15078595b", + "workloadId": "set:8:7af12818400d6348", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271743900", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271743900", - "createdAt": "2026-06-26T23:52:15Z", + "id": "28271935069", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271935069", + "createdAt": "2026-06-27T00:00:00.906485+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -39917,35 +41408,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 68.64000111818314, - "p90": 89.56799656152725, - "p95": 96.41599655151367, - "p99": 126.36800110340118 + "p50": 97.63199836015701, + "p90": 106.36799782514572, + "p95": 109.63200032711029, + "p99": 118.65600198507309 }, "combine": { - "p50": 58.04799869656563, - "p90": 69.60000097751617, - "p95": 74.52800124883652, - "p99": 91.80799871683121 + "p50": 71.45600020885468, + "p90": 78.94399762153625, + "p95": 79.42400127649307, + "p99": 82.24000036716461 }, "roundtrip": { - "p50": 112.73600161075592, - "p90": 135.93600690364838, - "p95": 145.7280069589615, - "p99": 215.26400744915009 + "p50": 145.4080045223236, + "p90": 154.23999726772308, + "p95": 155.64799308776855, + "p99": 157.98400342464447 }, "isolatedSum": { - "p50": 126.68799981474876, - "p90": 159.16799753904343, - "p95": 170.9439978003502, - "p99": 218.1759998202324 + "p50": 169.0879985690117, + "p90": 185.31199544668198, + "p95": 189.05600160360336, + "p99": 200.8960023522377 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 0, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -39954,35 +41445,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 69.34399902820587, - "p90": 88.22400122880936, - "p95": 94.68799829483032, - "p99": 116.15999788045883 + "p50": 77.08799839019775, + "p90": 104.06400263309479, + "p95": 105.66399991512299, + "p99": 111.1999973654747 }, "combine": { - "p50": 58.94400179386139, - "p90": 68.70400160551071, - "p95": 72.03199714422226, - "p99": 83.52000266313553 + "p50": 65.05600363016129, + "p90": 74.5600014925003, + "p95": 79.00799810886383, + "p99": 82.33600109815598 }, "roundtrip": { - "p50": 112.89600282907486, - "p90": 138.3039951324463, - "p95": 150.52799880504608, - "p99": 196.51199877262115 + "p50": 122.8799968957901, + "p90": 151.64799988269806, + "p95": 153.24799716472626, + "p99": 161.50400042533875 }, "isolatedSum": { - "p50": 128.28800082206726, - "p90": 156.92800283432007, - "p95": 166.71999543905258, - "p99": 199.68000054359436 + "p50": 142.14400202035904, + "p90": 178.6240041255951, + "p95": 184.67199802398682, + "p99": 193.53599846363068 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 2, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -39991,35 +41482,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 70.46400010585785, - "p90": 84.63999629020691, - "p95": 92.0960009098053, - "p99": 110.78400164842606 + "p50": 97.56799787282944, + "p90": 107.80800133943558, + "p95": 114.04799669981003, + "p99": 120.44800072908401 }, "combine": { - "p50": 60.28800085186958, - "p90": 70.91200351715088, - "p95": 75.16799867153168, - "p99": 87.5839963555336 + "p50": 65.69600105285645, + "p90": 78.87999713420868, + "p95": 79.32800054550171, + "p99": 87.13600039482117 }, "roundtrip": { - "p50": 114.20799791812897, - "p90": 135.68000495433807, - "p95": 147.64800667762756, - "p99": 195.5520063638687 + "p50": 123.99999797344208, + "p90": 158.720001578331, + "p95": 165.3439998626709, + "p99": 176.28799378871918 }, "isolatedSum": { - "p50": 130.75200095772743, - "p90": 155.5519998073578, - "p95": 167.26399958133698, - "p99": 198.36799800395966 + "p50": 163.26399892568588, + "p90": 186.68799847364426, + "p95": 193.37599724531174, + "p99": 207.58400112390518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40028,35 +41519,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 72.38399982452393, - "p90": 103.71199995279312, - "p95": 118.72000247240067, - "p99": 215.61600267887115 + "p50": 98.11200201511383, + "p90": 105.76000064611435, + "p95": 108.64000022411346, + "p99": 122.30399996042252 }, "combine": { - "p50": 61.055999249219894, - "p90": 76.03199779987335, - "p95": 81.7599967122078, - "p99": 112.57600039243698 + "p50": 72.22399860620499, + "p90": 79.1039988398552, + "p95": 80.38400113582611, + "p99": 87.0399996638298 }, "roundtrip": { - "p50": 115.84000289440155, - "p90": 143.51999759674072, - "p95": 151.67999267578125, - "p99": 190.46400487422943 + "p50": 145.28000354766846, + "p90": 152.54400670528412, + "p95": 155.39200603961945, + "p99": 160.38399934768677 }, "isolatedSum": { - "p50": 133.43999907374382, - "p90": 179.74399775266647, - "p95": 200.47999918460846, - "p99": 328.19200307130814 + "p50": 170.33600062131882, + "p90": 184.86399948596954, + "p95": 189.02400135993958, + "p99": 209.34399962425232 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 0, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40065,35 +41556,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 75.23199915885925, - "p90": 102.04800218343735, - "p95": 107.87200182676315, - "p99": 130.20800054073334 + "p50": 98.52799773216248, + "p90": 109.69600081443787, + "p95": 117.34399944543839, + "p99": 131.45600259304047 }, "combine": { - "p50": 61.792001128196716, - "p90": 71.16799801588058, - "p95": 76.64000242948532, - "p99": 86.84799820184708 + "p50": 78.59200239181519, + "p90": 81.53600245714188, + "p95": 86.91199868917465, + "p99": 88.32000195980072 }, "roundtrip": { - "p50": 116.92799627780914, - "p90": 138.2399946451187, - "p95": 147.96799421310425, - "p99": 179.967999458313 + "p50": 146.97599411010742, + "p90": 156.47999942302704, + "p95": 161.56800091266632, + "p99": 173.18400740623474 }, "isolatedSum": { - "p50": 137.02400028705597, - "p90": 173.21600019931793, - "p95": 184.51200425624847, - "p99": 217.0559987425804 + "p50": 177.12000012397766, + "p90": 191.23200327157974, + "p95": 204.25599813461304, + "p99": 219.7760045528412 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 1, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40102,35 +41593,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 74.75200295448303, - "p90": 90.52799642086029, - "p95": 98.75199943780899, - "p99": 135.48800349235535 + "p50": 84.54400300979614, + "p90": 107.07200318574905, + "p95": 113.40799927711487, + "p99": 126.08000636100769 }, "combine": { - "p50": 63.74400109052658, - "p90": 71.71200215816498, - "p95": 78.78399640321732, - "p99": 91.07200056314468 + "p50": 71.10399752855301, + "p90": 80.57600259780884, + "p95": 87.13600039482117, + "p99": 95.51999717950821 }, "roundtrip": { - "p50": 119.9679970741272, - "p90": 145.47200500965118, - "p95": 149.50400590896606, - "p99": 165.8879965543747 + "p50": 127.93600559234619, + "p90": 151.7760008573532, + "p95": 154.40000593662262, + "p99": 161.56800091266632 }, "isolatedSum": { - "p50": 138.4960040450096, - "p90": 162.23999857902527, - "p95": 177.5359958410263, - "p99": 226.56000405550003 + "p50": 155.64800053834915, + "p90": 187.6480057835579, + "p95": 200.54399967193604, + "p99": 221.6000035405159 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 0, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40139,35 +41630,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 84.60800349712372, - "p90": 103.13600301742554, - "p95": 112.22399771213531, - "p99": 138.11199367046356 + "p50": 100.35199671983719, + "p90": 113.37599903345108, + "p95": 126.49600207805634, + "p99": 162.1759980916977 }, "combine": { - "p50": 72.03199714422226, - "p90": 82.78399705886841, - "p95": 89.56799656152725, - "p99": 104.92800176143646 + "p50": 79.58400249481201, + "p90": 87.16800063848495, + "p95": 87.71199733018875, + "p99": 95.45599669218063 }, "roundtrip": { - "p50": 131.48799538612366, - "p90": 145.50399780273438, - "p95": 155.8080017566681, - "p99": 189.66400623321533 + "p50": 154.62400019168854, + "p90": 165.18400609493256, + "p95": 170.27199268341064, + "p99": 184.7359985113144 }, "isolatedSum": { - "p50": 156.64000064134598, - "p90": 185.92000007629395, - "p95": 201.79199427366257, - "p99": 243.03999543190002 + "p50": 179.9359992146492, + "p90": 200.54399967193604, + "p95": 214.2079994082451, + "p99": 257.6319947838783 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 0, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40176,35 +41667,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.2080020904541, - "p90": 120.2239990234375, - "p95": 133.82400572299957, - "p99": 215.68000316619873 + "p50": 98.1760025024414, + "p90": 120.80000340938568, + "p95": 125.56800246238708, + "p99": 134.49600338935852 }, "combine": { - "p50": 82.8159973025322, - "p90": 92.70399808883667, - "p95": 96.12800180912018, - "p99": 107.04000294208527 + "p50": 87.77599781751633, + "p90": 96.0640013217926, + "p95": 97.69599884748459, + "p99": 107.35999792814255 }, "roundtrip": { - "p50": 152.22400426864624, - "p90": 168.32000017166138, - "p95": 176.2239933013916, - "p99": 196.03200256824493 + "p50": 160.70400178432465, + "p90": 178.3680021762848, + "p95": 184.1920018196106, + "p99": 190.62399864196777 }, "isolatedSum": { - "p50": 177.0239993929863, - "p90": 212.92799711227417, - "p95": 229.95200753211975, - "p99": 322.720006108284 + "p50": 185.95200031995773, + "p90": 216.86400473117828, + "p95": 223.26400130987167, + "p99": 241.85600131750107 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40212,34 +41703,35 @@ ] }, { - "id": "cx-3d690e39", - "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "540c08b08c068f8c", + "id": "cx-b4d89049", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_1c83c0b0", + "comparisonKey": "b84a29c0643a5455", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:06.885074+00:00", + "generatedAt": "2026-06-27T00:11:39.736162+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -40264,8 +41756,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", + "traceSignature": "90042e0db6a8297", + "workloadId": "set:3:8fd05d9ebee41064", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -40273,304 +41765,119 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271759919", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271759919", - "createdAt": "2026-06-26T23:52:42Z", + "id": "28272315381", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272315381", + "createdAt": "2026-06-27T00:11:39.736162+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.49600034952164, - "p90": 102.1760031580925, - "p95": 111.90400272607803, - "p99": 133.34399461746216 - }, - "combine": { - "p50": 60.5119988322258, - "p90": 72.9919970035553, - "p95": 79.55200225114822, - "p99": 90.55999666452408 - }, - "roundtrip": { - "p50": 113.8560026884079, - "p90": 143.5839980840683, - "p95": 150.94399452209473, - "p99": 190.14400243759155 - }, - "isolatedSum": { - "p50": 131.00799918174744, - "p90": 175.1680001616478, - "p95": 191.45600497722626, - "p99": 223.90399128198624 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 430080, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 73.88799637556076, - "p90": 109.43999886512756, - "p95": 123.74400347471237, - "p99": 176.2239933013916 + "p50": 98.1760025024414, + "p90": 105.12000322341919, + "p95": 107.4879989027977, + "p99": 114.43199962377548 }, "combine": { - "p50": 62.463998794555664, - "p90": 76.4480009675026, - "p95": 81.37600123882294, - "p99": 89.6959975361824 + "p50": 81.216000020504, + "p90": 87.8399983048439, + "p95": 88.19200098514557, + "p99": 89.08800035715103 }, "roundtrip": { - "p50": 118.40000003576279, - "p90": 146.7839926481247, - "p95": 154.88000214099884, - "p99": 198.0160027742386 + "p50": 154.4959992170334, + "p90": 160.99199652671814, + "p95": 162.59199380874634, + "p99": 167.35999286174774 }, "isolatedSum": { - "p50": 136.35199517011642, - "p90": 185.88799983263016, - "p95": 205.1200047135353, - "p99": 265.919990837574 + "p50": 179.3920025229454, + "p90": 192.9600015282631, + "p95": 195.67999988794327, + "p99": 203.5199999809265 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 880640, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 72.12799787521362, - "p90": 100.8640006184578, - "p95": 107.84000158309937, - "p99": 182.5920045375824 + "p50": 104.3199971318245, + "p90": 109.98400300741196, + "p95": 111.77600175142288, + "p99": 118.81600320339203 }, "combine": { - "p50": 62.24000081419945, - "p90": 77.504001557827, - "p95": 82.36800134181976, - "p99": 100.22400319576263 + "p50": 89.1840010881424, + "p90": 95.58399766683578, + "p95": 96.09600156545639, + "p99": 97.18400239944458 }, "roundtrip": { - "p50": 116.64000153541565, - "p90": 148.3840048313141, - "p95": 158.49600732326508, - "p99": 193.34399700164795 + "p50": 164.2560064792633, + "p90": 169.69600319862366, + "p95": 171.64799571037292, + "p99": 176.64000391960144 }, "isolatedSum": { - "p50": 134.36799868941307, - "p90": 178.3680021762848, - "p95": 190.20800292491913, - "p99": 282.81600773334503 + "p50": 193.5039982199669, + "p90": 205.56800067424774, + "p95": 207.87200331687927, + "p99": 216.0000056028366 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1740800, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 3, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 72.60800153017044, - "p90": 101.1200025677681, - "p95": 114.1119971871376, - "p99": 128.06400656700134 + "p50": 137.28000223636627, + "p90": 146.11199498176575, + "p95": 149.6639996767044, + "p99": 152.19199657440186 }, "combine": { - "p50": 63.74400109052658, - "p90": 79.26400005817413, - "p95": 85.50400286912918, - "p99": 120.03199756145477 + "p50": 128.48000228405, + "p90": 130.14400005340576, + "p95": 130.65600395202637, + "p99": 136.57599687576294 }, "roundtrip": { - "p50": 117.53600090742111, - "p90": 147.74399995803833, - "p95": 156.8319946527481, - "p99": 184.54399704933167 + "p50": 231.10400140285492, + "p90": 236.4799976348877, + "p95": 238.11200261116028, + "p99": 242.88000166416168 }, "isolatedSum": { - "p50": 136.35200262069702, - "p90": 180.38400262594223, - "p95": 199.61600005626678, - "p99": 248.09600412845612 + "p50": 265.76000452041626, + "p90": 276.2559950351715, + "p95": 280.3200036287308, + "p99": 288.7679934501648 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3471360, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 75.9039968252182, - "p90": 101.79200023412704, - "p95": 111.77600175142288, - "p99": 127.9039978981018 - }, - "combine": { - "p50": 64.41599875688553, - "p90": 79.68000322580338, - "p95": 84.06399935483932, - "p99": 103.61599922180176 - }, - "roundtrip": { - "p50": 124.09599870443344, - "p90": 154.91199493408203, - "p95": 167.35999286174774, - "p99": 218.6560034751892 - }, - "isolatedSum": { - "p50": 140.31999558210373, - "p90": 181.47200345993042, - "p95": 195.8400011062622, - "p99": 231.51999711990356 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6912000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 77.63200253248215, - "p90": 102.08000242710114, - "p95": 110.1439967751503, - "p99": 138.5280042886734 - }, - "combine": { - "p50": 68.4799998998642, - "p90": 83.45600217580795, - "p95": 89.50400352478027, - "p99": 97.82399982213974 - }, - "roundtrip": { - "p50": 122.81599640846252, - "p90": 153.50399911403656, - "p95": 163.13600540161133, - "p99": 190.5599981546402 - }, - "isolatedSum": { - "p50": 146.11200243234634, - "p90": 185.5360046029091, - "p95": 199.64800029993057, - "p99": 236.35200411081314 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13977600, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 91.90399944782257, - "p90": 113.08799684047699, - "p95": 123.52000176906586, - "p99": 162.9759967327118 - }, - "combine": { - "p50": 77.15199887752533, - "p90": 91.13600105047226, - "p95": 97.59999811649323, - "p99": 112.06399649381638 - }, - "roundtrip": { - "p50": 140.47999680042267, - "p90": 166.75199568271637, - "p95": 175.9359985589981, - "p99": 250.20799040794373 - }, - "isolatedSum": { - "p50": 169.0559983253479, - "p90": 204.22399789094925, - "p95": 221.11999988555908, - "p99": 275.03999322652817 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27975680, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 99.07200187444687, - "p90": 122.27199971675873, - "p95": 127.42400169372559, - "p99": 146.7519998550415 - }, - "combine": { - "p50": 90.87999910116196, - "p90": 105.3759977221489, - "p95": 109.37599837779999, - "p99": 125.37600100040436 - }, - "roundtrip": { - "p50": 166.4319932460785, - "p90": 186.5919977426529, - "p95": 193.12000274658203, - "p99": 222.01600670814514 - }, - "isolatedSum": { - "p50": 189.95200097560883, - "p90": 227.64799743890762, - "p95": 236.80000007152557, - "p99": 272.12800085544586 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 1, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40578,35 +41885,36 @@ ] }, { - "id": "cx-e3311b84", - "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "fc31c0a33afa32cc", + "id": "cx-595b6f36", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_55b1ee31", + "comparisonKey": "b84a29c0643a5455", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:56.726240+00:00", + "generatedAt": "2026-06-27T00:11:41.163804+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", @@ -40630,202 +41938,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271775418", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271775418", - "createdAt": "2026-06-26T23:53:10Z", + "id": "28272321917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272321917", + "createdAt": "2026-06-27T00:11:41.163804+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.05599749088287, - "p90": 102.7199998497963, - "p95": 111.35999858379364, - "p99": 123.00799787044525 - }, - "combine": { - "p50": 65.92000275850296, - "p90": 79.77599650621414, - "p95": 88.44800293445587, - "p99": 126.30400061607361 - }, - "roundtrip": { - "p50": 118.78400295972824, - "p90": 148.28799664974213, - "p95": 155.8080017566681, - "p99": 184.64000523090363 - }, - "isolatedSum": { - "p50": 138.97600024938583, - "p90": 182.49599635601044, - "p95": 199.8080015182495, - "p99": 249.31199848651886 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.5920017361641, - "p90": 107.07200318574905, - "p95": 120.51200121641159, - "p99": 142.87999272346497 - }, - "combine": { - "p50": 67.03999638557434, - "p90": 84.73599702119827, - "p95": 92.12800115346909, - "p99": 114.07999694347382 - }, - "roundtrip": { - "p50": 120.38400024175644, - "p90": 157.18400478363037, - "p95": 169.24799978733063, - "p99": 195.68000733852386 - }, - "isolatedSum": { - "p50": 141.63199812173843, - "p90": 191.80800020694733, - "p95": 212.64000236988068, - "p99": 256.9599896669388 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.95999675989151, - "p90": 101.05600208044052, - "p95": 114.68800157308578, - "p99": 137.472003698349 - }, - "combine": { - "p50": 66.14399701356888, - "p90": 79.23199981451035, - "p95": 84.06399935483932, - "p99": 93.50399672985077 - }, - "roundtrip": { - "p50": 120.99199742078781, - "p90": 154.81600165367126, - "p95": 165.95199704170227, - "p99": 220.41599452495575 - }, - "isolatedSum": { - "p50": 139.1039937734604, - "p90": 180.28800189495087, - "p95": 198.7520009279251, - "p99": 230.97600042819977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.08799773454666, - "p90": 102.78400033712387, - "p95": 110.88000237941742, - "p99": 142.17600226402283 - }, - "combine": { - "p50": 67.90400296449661, - "p90": 83.29600095748901, - "p95": 89.31200206279755, - "p99": 102.30399668216705 - }, - "roundtrip": { - "p50": 120.95999717712402, - "p90": 156.73600137233734, - "p95": 165.56799411773682, - "p99": 189.43999707698822 - }, - "isolatedSum": { - "p50": 140.99200069904327, - "p90": 186.08000129461288, - "p95": 200.19200444221497, - "p99": 244.47999894618988 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 74.81600344181061, - "p90": 102.65599936246872, - "p95": 110.68800091743469, - "p99": 122.49600142240524 + "p50": 98.24000298976898, + "p90": 103.96800190210342, + "p95": 106.30399733781815, + "p99": 111.07199639081955 }, "combine": { - "p50": 68.2239979505539, - "p90": 86.14400029182434, - "p95": 90.4960036277771, - "p99": 105.95200210809708 + "p50": 79.52000200748444, + "p90": 86.87999844551086, + "p95": 87.52000331878662, + "p99": 88.0960002541542 }, "roundtrip": { - "p50": 121.44000083208084, - "p90": 152.25599706172943, - "p95": 161.40800714492798, - "p99": 200.9280025959015 + "p50": 153.28000485897064, + "p90": 161.3759994506836, + "p95": 163.4880006313324, + "p99": 455.80801367759705 }, "isolatedSum": { - "p50": 143.0400013923645, - "p90": 188.79999965429306, - "p95": 201.1840045452118, - "p99": 228.44800353050232 + "p50": 177.76000499725342, + "p90": 190.8480003476143, + "p95": 193.82400065660477, + "p99": 199.16799664497375 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40834,109 +41994,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 82.49600231647491, - "p90": 114.01599645614624, - "p95": 123.74400347471237, - "p99": 148.3519971370697 + "p50": 103.84000092744827, + "p90": 109.0560033917427, + "p95": 110.1439967751503, + "p99": 113.88800293207169 }, "combine": { - "p50": 74.14399832487106, - "p90": 88.60799670219421, - "p95": 94.11200135946274, - "p99": 106.81600123643875 + "p50": 87.87199854850769, + "p90": 95.32800316810608, + "p95": 95.90400010347366, + "p99": 96.25600278377533 }, "roundtrip": { - "p50": 128.54400277137756, - "p90": 162.33600676059723, - "p95": 178.20799350738525, - "p99": 222.30400145053864 + "p50": 161.98399662971497, + "p90": 168.99199783802032, + "p95": 170.56000232696533, + "p99": 175.80799758434296 }, "isolatedSum": { - "p50": 156.64000064134598, - "p90": 202.62399315834045, - "p95": 217.8560048341751, - "p99": 255.16799837350845 + "p50": 191.71199947595596, + "p90": 204.38400655984879, + "p95": 206.04799687862396, + "p99": 210.14400571584702 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 94.36800330877304, - "p90": 133.02400708198547, - "p95": 138.49599659442902, - "p99": 182.20800161361694 - }, - "combine": { - "p50": 81.44000172615051, - "p90": 95.42399644851685, - "p95": 100.5759984254837, - "p99": 123.74400347471237 - }, - "roundtrip": { - "p50": 151.2320041656494, - "p90": 172.03199863433838, - "p95": 182.17599391937256, - "p99": 404.1599929332733 - }, - "isolatedSum": { - "p50": 175.80800503492355, - "p90": 228.44800353050232, - "p95": 239.07199501991272, - "p99": 305.9520050883293 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 109.0880036354065, - "p90": 134.68800485134125, - "p95": 142.752006649971, - "p99": 173.3119934797287 + "p50": 136.80000603199005, + "p90": 145.4399973154068, + "p95": 146.68799936771393, + "p99": 149.4079977273941 }, "combine": { - "p50": 97.43999689817429, - "p90": 114.97599631547928, - "p95": 121.08799815177917, - "p99": 138.75199854373932 + "p50": 123.99999797344208, + "p90": 129.05600666999817, + "p95": 130.36799430847168, + "p99": 136.00000739097595 }, "roundtrip": { - "p50": 180.1919937133789, - "p90": 205.56800067424774, - "p95": 210.07999777793884, - "p99": 237.7600073814392 + "p50": 228.7999987602234, + "p90": 236.12800240516663, + "p95": 237.98400163650513, + "p99": 241.5039986371994 }, "isolatedSum": { - "p50": 206.52800053358078, - "p90": 249.66400116682053, - "p95": 263.8400048017502, - "p99": 312.063992023468 + "p50": 260.80000400543213, + "p90": 274.49600398540497, + "p95": 277.0559936761856, + "p99": 285.40800511837006 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -40944,16 +42067,16 @@ ] }, { - "id": "cx-a3bb3bd5", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "1e550a8055ce0039", + "id": "cx-f5ba95c3", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_54b1ec9e", + "comparisonKey": "b84a29c0643a5455", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:16.783949+00:00", + "generatedAt": "2026-06-27T00:12:09.752348+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -40961,22 +42084,23 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { @@ -40996,91 +42120,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272139795", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272139795", - "createdAt": "2026-06-27T00:04:50Z", + "id": "28272325031", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272325031", + "createdAt": "2026-06-27T00:12:09.752348+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 82.65600353479385, - "p90": 133.59999656677246, - "p95": 142.59199798107147, - "p99": 158.4320068359375 - }, - "combine": { - "p50": 76.38400048017502, - "p90": 99.61599856615067, - "p95": 103.84000092744827, - "p99": 158.1760048866272 - }, - "roundtrip": { - "p50": 128.35200130939484, - "p90": 157.21599757671356, - "p95": 169.63200271129608, - "p99": 325.6959915161133 - }, - "isolatedSum": { - "p50": 159.04000401496887, - "p90": 233.21599513292313, - "p95": 246.43199890851974, - "p99": 316.6080117225647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.60000163316727, - "p90": 94.81599926948547, - "p95": 101.82400047779083, - "p99": 127.32799351215363 + "p50": 97.98400104045868, + "p90": 104.44799810647964, + "p95": 107.84000158309937, + "p99": 116.06399714946747 }, "combine": { - "p50": 70.23999840021133, - "p90": 99.16800260543823, - "p95": 101.34399682283401, - "p99": 121.34400010108948 + "p50": 81.02399855852127, + "p90": 87.64799684286118, + "p95": 88.06400001049042, + "p99": 96.00000083446503 }, "roundtrip": { - "p50": 130.5599957704544, - "p90": 186.46399676799774, - "p95": 191.3280040025711, - "p99": 227.48799622058868 + "p50": 156.41599893569946, + "p90": 162.62400150299072, + "p95": 165.75999557971954, + "p99": 176.7359972000122 }, "isolatedSum": { - "p50": 143.8400000333786, - "p90": 193.9840018749237, - "p95": 203.16799730062485, - "p99": 248.6719936132431 + "p50": 179.00799959897995, + "p90": 192.09599494934082, + "p95": 195.90400159358978, + "p99": 212.0639979839325 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 3, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41089,35 +42176,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 84.70399677753448, - "p90": 100.44799745082855, - "p95": 106.55999928712845, - "p99": 121.18399888277054 + "p50": 103.20000350475311, + "p90": 107.39199817180634, + "p95": 111.51999980211258, + "p99": 119.00799721479416 }, "combine": { - "p50": 77.47200131416321, - "p90": 89.47200328111649, - "p95": 95.32800316810608, - "p99": 106.1440035700798 + "p50": 88.16000074148178, + "p90": 95.8079993724823, + "p95": 96.16000205278397, + "p99": 98.11200201511383 }, "roundtrip": { - "p50": 137.37599551677704, - "p90": 158.49600732326508, - "p95": 167.42399334907532, - "p99": 188.54400515556335 + "p50": 162.78399527072906, + "p90": 168.73599588871002, + "p95": 170.9440052509308, + "p99": 176.57600343227386 }, "isolatedSum": { - "p50": 162.1759980916977, - "p90": 189.92000073194504, - "p95": 201.88800245523453, - "p99": 227.32800245285034 + "p50": 191.3600042462349, + "p90": 203.19999754428864, + "p95": 207.68000185489655, + "p99": 217.119999229908 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 3, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41126,35 +42213,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.69600212574005, - "p90": 133.15199315547943, - "p95": 140.25600254535675, - "p99": 154.7199934720993 + "p50": 137.28000223636627, + "p90": 149.1200029850006, + "p95": 151.0079950094223, + "p99": 153.18399667739868 }, "combine": { - "p50": 105.82400113344193, - "p90": 123.55200201272964, - "p95": 129.50399518013, - "p99": 141.85599982738495 + "p50": 128.86400520801544, + "p90": 131.1360001564026, + "p95": 135.71199774742126, + "p99": 138.3039951324463 }, "roundtrip": { - "p50": 196.83200120925903, - "p90": 213.69600296020508, - "p95": 222.04799950122833, - "p99": 265.8880054950714 + "p50": 234.49599742889404, + "p90": 241.4720058441162, + "p95": 242.65600740909576, + "p99": 247.9040026664734 }, "isolatedSum": { - "p50": 223.52000325918198, - "p90": 256.7039951682091, - "p95": 269.75999772548676, - "p99": 296.57599329948425 + "p50": 266.1440074443817, + "p90": 280.2560031414032, + "p95": 286.71999275684357, + "p99": 291.48799180984497 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 3, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41162,16 +42249,16 @@ ] }, { - "id": "cx-0688a10c", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||2baace5eca64609", - "colorKey": "h200_d982b749", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-fb3ea9d7", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", + "colorKey": "h100_b654f9b2", + "comparisonKey": "10b5062b8e23fcad", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:12.411729+00:00", + "generatedAt": "2026-06-26T23:55:39.087780+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -41179,17 +42266,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -41214,8 +42302,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2baace5eca64609", - "workloadId": "set:2:07d544ac2af401ec", + "traceSignature": "2ad5ef98d328fa1", + "workloadId": "set:4:286be993cd819ed9", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -41223,82 +42311,156 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271536417", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271536417", - "createdAt": "2026-06-26T23:45:51Z", + "id": "28271817166", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271817166", + "createdAt": "2026-06-26T23:55:39.087780+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.82399982213974, + "p90": 105.95200210809708, + "p95": 109.95200276374817, + "p99": 121.50400131940842 + }, + "combine": { + "p50": 80.25600016117096, + "p90": 81.88799768686295, + "p95": 83.3280012011528, + "p99": 89.37600255012512 + }, + "roundtrip": { + "p50": 152.12799608707428, + "p90": 158.78400206565857, + "p95": 160.64000129699707, + "p99": 166.81599617004395 + }, + "isolatedSum": { + "p50": 178.0799999833107, + "p90": 187.83999979496002, + "p95": 193.28000396490097, + "p99": 210.88000386953354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.14399832487106, - "p90": 99.20000284910202, - "p95": 104.86400127410889, - "p99": 119.13599818944931 + "p50": 97.08800166845322, + "p90": 103.39199751615524, + "p95": 107.51999914646149, + "p99": 115.93600362539291 }, "combine": { - "p50": 70.0799971818924, - "p90": 87.13600039482117, - "p95": 91.839998960495, - "p99": 107.87200182676315 + "p50": 80.89599758386612, + "p90": 84.03199911117554, + "p95": 87.42400258779526, + "p99": 89.47200328111649 }, "roundtrip": { - "p50": 126.94400548934937, - "p90": 155.5519998073578, - "p95": 165.95199704170227, - "p99": 190.40000438690186 + "p50": 153.60000729560852, + "p90": 161.15200519561768, + "p95": 163.83999586105347, + "p99": 171.55200242996216 }, "isolatedSum": { - "p50": 144.22399550676346, - "p90": 186.3360032439232, - "p95": 196.70400023460388, - "p99": 227.00800001621246 + "p50": 177.98399925231934, + "p90": 187.42399662733078, + "p95": 194.94400173425674, + "p99": 205.4080069065094 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 104.51199859380722, - "p90": 139.48799669742584, - "p95": 145.56799829006195, - "p99": 155.10399639606476 + "p50": 103.39199751615524, + "p90": 108.47999900579453, + "p95": 110.55999994277954, + "p99": 117.18399822711945 }, "combine": { - "p50": 87.45600283145905, - "p90": 101.40799731016159, - "p95": 109.24799740314484, - "p99": 129.2479932308197 + "p50": 89.34400230646133, + "p90": 95.551997423172, + "p95": 97.34400361776352, + "p99": 99.93600100278854 }, "roundtrip": { - "p50": 163.35999965667725, - "p90": 187.8719925880432, - "p95": 195.10400295257568, - "p99": 370.5599904060364 + "p50": 162.75200247764587, + "p90": 170.43200135231018, + "p95": 172.83199727535248, + "p99": 179.61600422859192 }, "isolatedSum": { - "p50": 191.96800142526627, - "p90": 240.89599400758743, - "p95": 254.8159956932068, - "p99": 284.35198962688446 + "p50": 192.73599982261658, + "p90": 204.03199642896652, + "p95": 207.90400356054306, + "p99": 217.119999229908 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.85600662231445, + "p90": 144.41600441932678, + "p95": 147.0080018043518, + "p99": 151.16800367832184 + }, + "combine": { + "p50": 128.83199751377106, + "p90": 131.23199343681335, + "p95": 131.99999928474426, + "p99": 137.95199990272522 + }, + "roundtrip": { + "p50": 233.75999927520752, + "p90": 239.3919974565506, + "p95": 240.92799425125122, + "p99": 245.1840043067932 + }, + "isolatedSum": { + "p50": 266.6880041360855, + "p90": 275.64799785614014, + "p95": 279.00800108909607, + "p99": 289.12000358104706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41306,16 +42468,16 @@ ] }, { - "id": "cx-d576fec7", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-e0ce741a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_b654f9b2", + "comparisonKey": "10b5062b8e23fcad", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:04.752374+00:00", + "generatedAt": "2026-06-27T00:01:31.374180+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -41323,17 +42485,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -41358,8 +42521,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "b6caf944f6bb621", + "workloadId": "set:8:286be993cd819ed9", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -41367,9 +42530,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271823274", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271823274", - "createdAt": "2026-06-26T23:54:45Z", + "id": "28272004392", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272004392", + "createdAt": "2026-06-27T00:01:31.374180+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -41377,163 +42540,311 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 73.7600028514862, - "p90": 85.53600311279297, - "p95": 96.28800302743912, - "p99": 124.67200309038162 + "p50": 96.67199850082397, + "p90": 104.60799932479858, + "p95": 106.11200332641602, + "p99": 113.56800049543381 }, "combine": { - "p50": 68.38399916887283, - "p90": 78.59200239181519, - "p95": 84.927998483181, - "p99": 101.3759970664978 + "p50": 79.00799810886383, + "p90": 82.0159986615181, + "p95": 82.36800134181976, + "p99": 87.67999708652496 }, "roundtrip": { - "p50": 124.4800016283989, - "p90": 147.32800424098969, - "p95": 163.58399391174316, - "p99": 201.88799500465393 + "p50": 147.2640037536621, + "p90": 154.59200739860535, + "p95": 157.3439985513687, + "p99": 161.5999937057495 }, "isolatedSum": { - "p50": 142.14400202035904, - "p90": 164.12800550460815, - "p95": 181.21600151062012, - "p99": 226.04800015687943 + "p50": 175.6799966096878, + "p90": 186.62399798631668, + "p95": 188.48000466823578, + "p99": 201.24799758195877 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 74.49600100517273, - "p90": 89.15200084447861, - "p95": 96.92800045013428, - "p99": 135.6479972600937 + "p50": 99.67999905347824, + "p90": 105.0880029797554, + "p95": 107.16799646615982, + "p99": 112.99200356006622 }, "combine": { - "p50": 70.11199742555618, - "p90": 78.14399898052216, - "p95": 83.74399691820145, - "p99": 94.94400024414062 + "p50": 81.11999928951263, + "p90": 82.49600231647491, + "p95": 83.03999900817871, + "p99": 87.2960016131401 }, "roundtrip": { - "p50": 124.38400089740753, - "p90": 143.77599954605103, - "p95": 155.2319973707199, - "p99": 184.38400328159332 + "p50": 147.0080018043518, + "p90": 153.6639928817749, + "p95": 155.71199357509613, + "p99": 159.10400450229645 }, "isolatedSum": { - "p50": 144.6079984307289, - "p90": 167.29599982500076, - "p95": 180.67199736833572, - "p99": 230.5919975042343 + "p50": 180.79999834299088, + "p90": 187.58400529623032, + "p95": 190.20799547433853, + "p99": 200.28800517320633 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 3, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 85.60000360012054, - "p90": 97.21600264310837, - "p95": 104.2879968881607, - "p99": 122.81599640846252 + "p50": 97.18400239944458, + "p90": 103.93600165843964, + "p95": 106.30399733781815, + "p99": 122.04799801111221 }, "combine": { - "p50": 78.40000092983246, - "p90": 89.79199826717377, - "p95": 95.2640026807785, - "p99": 107.19999670982361 + "p50": 78.94399762153625, + "p90": 82.43200182914734, + "p95": 86.40000224113464, + "p99": 103.45599800348282 }, "roundtrip": { - "p50": 136.19199395179749, - "p90": 152.6080071926117, - "p95": 160.67199409008026, - "p99": 188.57599794864655 + "p50": 148.15999567508698, + "p90": 158.55999290943146, + "p95": 160.3199988603592, + "p99": 164.09599781036377 }, "isolatedSum": { - "p50": 164.000004529953, - "p90": 187.00800091028214, - "p95": 199.5519995689392, - "p99": 230.01599311828613 + "p50": 176.12800002098083, + "p90": 186.36800348758698, + "p95": 192.7039995789528, + "p99": 225.50399601459503 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 116.73600226640701, - "p90": 128.48000228405, - "p95": 133.12000036239624, - "p99": 152.92799472808838 + "p50": 98.91200065612793, + "p90": 104.35199737548828, + "p95": 106.65600001811981, + "p99": 112.47999966144562 }, "combine": { - "p50": 106.04800283908844, - "p90": 113.76000195741653, - "p95": 119.45600062608719, - "p99": 131.8719983100891 + "p50": 81.24800026416779, + "p90": 83.3280012011528, + "p95": 87.0399996638298, + "p99": 87.93599903583527 }, "roundtrip": { - "p50": 197.91999459266663, - "p90": 210.62399446964264, - "p95": 217.6000028848648, - "p99": 242.01600253582 + "p50": 153.4080058336258, + "p90": 159.61599349975586, + "p95": 161.47199273109436, + "p99": 165.21599888801575 }, "isolatedSum": { - "p50": 222.78400510549545, - "p90": 242.24000424146652, - "p95": 252.57600098848343, - "p99": 284.7999930381775 + "p50": 180.16000092029572, + "p90": 187.67999857664108, + "p95": 193.69599968194962, + "p99": 200.41599869728088 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.92800045013428, + "p90": 104.3199971318245, + "p95": 110.55999994277954, + "p99": 161.9199961423874 + }, + "combine": { + "p50": 81.4720019698143, + "p90": 87.2960016131401, + "p95": 87.8399983048439, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 153.43999862670898, + "p90": 160.19199788570404, + "p95": 162.78399527072906, + "p99": 169.98399794101715 + }, + "isolatedSum": { + "p50": 178.40000241994858, + "p90": 191.6159987449646, + "p95": 198.39999824762344, + "p99": 252.19199806451797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.64799946546555, + "p90": 108.31999778747559, + "p95": 110.62400043010712, + "p99": 114.84800279140472 + }, + "combine": { + "p50": 87.5839963555336, + "p90": 91.839998960495, + "p95": 95.39200365543365, + "p99": 96.38399630784988 + }, + "roundtrip": { + "p50": 155.96799552440643, + "p90": 165.50399363040924, + "p95": 168.41599345207214, + "p99": 175.64800381660461 + }, + "isolatedSum": { + "p50": 191.23199582099915, + "p90": 200.15999674797058, + "p95": 206.01600408554077, + "p99": 211.2319990992546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 114.88000303506851, + "p90": 126.11199915409088, + "p95": 127.6479959487915, + "p99": 133.56800377368927 + }, + "combine": { + "p50": 98.43199700117111, + "p90": 103.96800190210342, + "p95": 105.8880016207695, + "p99": 119.71200257539749 + }, + "roundtrip": { + "p50": 180.38399517536163, + "p90": 191.39200448989868, + "p95": 194.39999759197235, + "p99": 201.9840031862259 + }, + "isolatedSum": { + "p50": 213.31200003623962, + "p90": 230.0800010561943, + "p95": 233.535997569561, + "p99": 253.28000634908676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.79199934005737, + "p90": 147.2959965467453, + "p95": 149.82399344444275, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 122.36800044775009, + "p90": 128.4160017967224, + "p95": 129.02399897575378, + "p99": 136.1600011587143 + }, + "roundtrip": { + "p50": 231.77599906921387, + "p90": 241.85599386692047, + "p95": 244.9280023574829, + "p99": 248.76800179481506 + }, + "isolatedSum": { + "p50": 264.15999978780746, + "p90": 275.7119983434677, + "p95": 278.84799242019653, + "p99": 289.5359992980957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] }, { - "id": "cx-9ca51f4f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_d982b749", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-73951147", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_456a963c", + "comparisonKey": "12dbc31e8daf0a44", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:18.590174+00:00", + "generatedAt": "2026-06-27T00:01:37.187210+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -41541,20 +42852,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -41576,18 +42888,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "e41f5099a9733ac", + "workloadId": "set:8:286be993cd819ed9", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271601584", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271601584", - "createdAt": "2026-06-26T23:47:53Z", + "id": "28272008867", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272008867", + "createdAt": "2026-06-27T00:01:37.187210+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -41595,35 +42907,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 105.0880029797554, - "p90": 132.7040046453476, - "p95": 145.21600306034088, - "p99": 190.11199474334717 + "p50": 68.41599941253662, + "p90": 76.1599987745285, + "p95": 77.69600301980972, + "p99": 84.83199775218964 }, "combine": { - "p50": 71.3919997215271, - "p90": 93.37600320577621, - "p95": 98.01600128412247, - "p99": 108.51199924945831 + "p50": 71.07199728488922, + "p90": 73.11999797821045, + "p95": 73.7600028514862, + "p99": 79.74400371313095 }, "roundtrip": { - "p50": 123.45600128173828, - "p90": 180.60800433158875, - "p95": 190.7840073108673, - "p99": 233.2800030708313 + "p50": 126.46399438381195, + "p90": 130.62399625778198, + "p95": 131.55199587345123, + "p99": 136.4479959011078 }, "isolatedSum": { - "p50": 176.4800027012825, - "p90": 226.0800078511238, - "p95": 243.23200434446335, - "p99": 298.6239939928055 + "p50": 139.48799669742584, + "p90": 149.27999675273895, + "p95": 151.45600587129593, + "p99": 164.5760014653206 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 3, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41632,35 +42944,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 87.10400015115738, - "p90": 130.0799995660782, - "p95": 139.96799290180206, - "p99": 167.1999990940094 + "p50": 68.70400160551071, + "p90": 76.9599974155426, + "p95": 81.727996468544, + "p99": 107.10400342941284 }, "combine": { - "p50": 75.58400183916092, - "p90": 97.50399738550186, - "p95": 105.31199723482132, - "p99": 143.61600577831268 + "p50": 71.48800045251846, + "p90": 73.15199822187424, + "p95": 73.56800138950348, + "p99": 79.55200225114822 }, "roundtrip": { - "p50": 144.83200013637543, - "p90": 179.1040003299713, - "p95": 191.96799397468567, - "p99": 229.5680046081543 + "p50": 127.77599692344666, + "p90": 131.23199343681335, + "p95": 132.60799646377563, + "p99": 138.7840062379837 }, "isolatedSum": { - "p50": 162.6880019903183, - "p90": 227.58399695158005, - "p95": 245.27999013662338, - "p99": 310.8160048723221 + "p50": 140.19200205802917, + "p90": 150.11199563741684, + "p95": 155.29599785804749, + "p99": 186.65600568056107 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41669,35 +42981,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 72.06399738788605, - "p90": 86.14400029182434, - "p95": 95.51999717950821, - "p99": 111.87200248241425 + "p50": 70.8480030298233, + "p90": 77.79199630022049, + "p95": 80.09599894285202, + "p99": 87.0399996638298 }, "combine": { - "p50": 68.67200136184692, - "p90": 80.06399869918823, - "p95": 85.66399663686752, - "p99": 102.52799838781357 + "p50": 72.4480003118515, + "p90": 73.56800138950348, + "p95": 74.27199929952621, + "p99": 79.80799674987793 }, "roundtrip": { - "p50": 121.95199728012085, - "p90": 146.43199741840363, - "p95": 154.7199934720993, - "p99": 173.47200214862823 + "p50": 126.94400548934937, + "p90": 131.77600502967834, + "p95": 133.4719955921173, + "p99": 137.2479945421219 }, "isolatedSum": { - "p50": 140.73599874973297, - "p90": 166.20799899101257, - "p95": 181.18399381637573, - "p99": 214.4000008702278 + "p50": 143.2960033416748, + "p90": 151.35999768972397, + "p95": 154.36799824237823, + "p99": 166.84799641370773 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41706,34 +43018,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 72.9919970035553, - "p90": 93.12000125646591, - "p95": 103.2319962978363, - "p99": 120.7360029220581 + "p50": 70.11199742555618, + "p90": 76.9599974155426, + "p95": 79.3600007891655, + "p99": 86.14400029182434 }, "combine": { - "p50": 69.24799829721451, - "p90": 82.07999914884567, - "p95": 88.41600269079208, - "p99": 100.67199915647507 + "p50": 72.64000177383423, + "p90": 73.82400333881378, + "p95": 74.94399696588516, + "p99": 81.08799904584885 }, "roundtrip": { - "p50": 124.1919994354248, - "p90": 152.8639942407608, - "p95": 164.09599781036377, - "p99": 197.85599410533905 + "p50": 125.47199428081512, + "p90": 131.6480040550232, + "p95": 133.66399705410004, + "p99": 139.29599523544312 }, "isolatedSum": { - "p50": 142.2399953007698, - "p90": 175.20000040531158, - "p95": 191.6479989886284, - "p99": 221.40800207853317 + "p50": 142.7519991993904, + "p90": 150.78400075435638, + "p95": 154.30399775505066, + "p99": 167.2319993376732 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -41743,34 +43055,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 69.72800195217133, - "p90": 91.36000275611877, - "p95": 105.66399991512299, - "p99": 141.56800508499146 + "p50": 81.40800148248672, + "p90": 83.99999886751175, + "p95": 86.33600175380707, + "p99": 91.36000275611877 }, "combine": { - "p50": 70.592001080513, - "p90": 82.04799890518188, - "p95": 87.3280018568039, - "p99": 99.45599734783173 + "p50": 73.37599992752075, + "p90": 78.75200361013412, + "p95": 79.6160027384758, + "p99": 81.34400099515915 }, "roundtrip": { - "p50": 123.96799772977829, - "p90": 151.32799744606018, - "p95": 162.23999857902527, - "p99": 186.46399676799774 + "p50": 125.95200538635254, + "p90": 133.15199315547943, + "p95": 134.5919966697693, + "p99": 140.32000303268433 }, "isolatedSum": { - "p50": 140.32000303268433, - "p90": 173.40800166130066, - "p95": 192.99200177192688, - "p99": 241.02400243282318 + "p50": 154.78400141000748, + "p90": 162.75200247764587, + "p95": 165.95200449228287, + "p99": 172.70400375127792 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -41780,34 +43092,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.85599744319916, - "p90": 108.89600217342377, - "p95": 117.95199662446976, - "p99": 130.5599957704544 + "p50": 90.08000046014786, + "p90": 92.54399687051773, + "p95": 94.4959968328476, + "p99": 98.52799773216248 }, "combine": { - "p50": 77.56800204515457, - "p90": 96.25600278377533, - "p95": 99.7759997844696, - "p99": 110.43199896812439 + "p50": 80.09599894285202, + "p90": 81.56800270080566, + "p95": 82.07999914884567, + "p99": 87.2960016131401 }, "roundtrip": { - "p50": 136.19199395179749, - "p90": 168.19199919700623, - "p95": 180.25599420070648, - "p99": 210.01599729061127 + "p50": 141.08799397945404, + "p90": 144.96000111103058, + "p95": 146.30399644374847, + "p99": 150.33599734306335 }, "isolatedSum": { - "p50": 159.42399948835373, - "p90": 205.1520049571991, - "p95": 217.72799640893936, - "p99": 240.9919947385788 + "p50": 170.17599940299988, + "p90": 174.1119995713234, + "p95": 176.57599598169327, + "p99": 185.82399934530258 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -41817,35 +43129,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 94.81599926948547, - "p90": 107.16799646615982, - "p95": 116.99199676513672, - "p99": 140.6719982624054 + "p50": 95.23200243711472, + "p90": 113.24799805879593, + "p95": 114.59200084209442, + "p99": 119.10399794578552 }, "combine": { - "p50": 85.75999736785889, - "p90": 97.79199957847595, - "p95": 106.04800283908844, - "p99": 131.04000687599182 + "p50": 89.85599875450134, + "p90": 98.2080027461052, + "p95": 114.3679991364479, + "p99": 130.49599528312683 }, "roundtrip": { - "p50": 156.5759927034378, - "p90": 172.19200730323792, - "p95": 179.00800704956055, - "p99": 190.49599766731262 + "p50": 159.39199924468994, + "p90": 165.53600132465363, + "p95": 167.87199676036835, + "p99": 179.51999604701996 }, "isolatedSum": { - "p50": 180.57599663734436, - "p90": 204.95999604463577, - "p95": 223.03999960422516, - "p99": 271.7120051383972 + "p50": 185.08800119161606, + "p90": 211.45600080490112, + "p95": 228.95999997854233, + "p99": 249.59999322891235 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41854,35 +43166,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 115.90400338172913, - "p90": 139.23199474811554, - "p95": 145.47200500965118, - "p99": 182.65600502490997 + "p50": 111.29599809646606, + "p90": 117.21599847078323, + "p95": 118.43200027942657, + "p99": 122.72000312805176 }, "combine": { - "p50": 103.84000092744827, - "p90": 120.25599926710129, - "p95": 126.56000256538391, - "p99": 146.68799936771393 + "p50": 106.39999806880951, + "p90": 112.28799819946289, + "p95": 113.11999708414078, + "p99": 114.33599889278412 }, "roundtrip": { - "p50": 196.19199633598328, - "p90": 217.15199947357178, - "p95": 223.68000447750092, - "p99": 249.2160052061081 + "p50": 197.63199985027313, + "p90": 202.11200416088104, + "p95": 203.39199900627136, + "p99": 206.9759964942932 }, "isolatedSum": { - "p50": 219.7440043091774, - "p90": 259.4879940152168, - "p95": 272.0320075750351, - "p99": 329.3440043926239 + "p50": 217.69599616527557, + "p90": 229.50399667024612, + "p95": 231.55199736356735, + "p99": 237.05600202083588 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -41890,16 +43202,16 @@ ] }, { - "id": "cx-b7604172", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", - "colorKey": "h200_d982b749", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-fc133662", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h100_d54acd03", + "comparisonKey": "fb346b1019e55bb0", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:14:07.082435+00:00", + "generatedAt": "2026-06-27T00:13:31.132134+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -41907,19 +43219,20 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "routingLabel": "uniform", + "routingLabel": "uniform·empty-rank", "routingStep": 0, - "unevenTokens": "none", + "unevenTokens": "empty-rank", "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", @@ -41942,127 +43255,127 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": "set:3:07d544ac2af401ec", - "workloadSource": "canonical-serialized", + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272379468", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272379468", - "createdAt": "2026-06-27T00:12:44Z", + "id": "28272375977", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272375977", + "createdAt": "2026-06-27T00:13:31.132134+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 8, - "globalTokens": 64, + "globalTokens": 63, "dispatch": { - "p50": 72.86400347948074, - "p90": 82.24000036716461, - "p95": 88.73599767684937, - "p99": 117.66400188207626 + "p50": 98.01600128412247, + "p90": 108.03200304508209, + "p95": 124.22399967908859, + "p99": 164.000004529953 }, "combine": { - "p50": 70.01599669456482, - "p90": 75.39200037717819, - "p95": 80.6720033288002, - "p99": 96.0640013217926 + "p50": 80.73599636554718, + "p90": 89.63199704885483, + "p95": 104.63999956846237, + "p99": 112.5440001487732 }, "roundtrip": { - "p50": 123.90399724245071, - "p90": 139.74399864673615, - "p95": 148.47999811172485, - "p99": 178.75200510025024 + "p50": 154.1759967803955, + "p90": 160.35200655460358, + "p95": 162.08000481128693, + "p99": 175.3920018672943 }, "isolatedSum": { - "p50": 142.88000017404556, - "p90": 157.6320007443428, - "p95": 169.40800100564957, - "p99": 213.72800320386887 + "p50": 178.75199764966965, + "p90": 197.66400009393692, + "p95": 228.86399924755096, + "p99": 276.5440046787262 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, "trials": 3 }, { "tokensPerRank": 32, - "globalTokens": 256, + "globalTokens": 252, "dispatch": { - "p50": 81.66400343179703, - "p90": 93.53599697351456, - "p95": 100.70399940013885, - "p99": 128.09599936008453 + "p50": 104.76800054311752, + "p90": 134.0479999780655, + "p95": 136.1279934644699, + "p99": 144.41600441932678 }, "combine": { - "p50": 78.11199873685837, - "p90": 84.51200276613235, - "p95": 89.02399986982346, - "p99": 123.6800029873848 + "p50": 89.02399986982346, + "p90": 104.12800312042236, + "p95": 104.41599786281586, + "p99": 107.90400207042694 }, "roundtrip": { - "p50": 135.13599336147308, - "p90": 146.7200070619583, - "p95": 153.9199948310852, - "p99": 176.89600586891174 + "p50": 166.59200191497803, + "p90": 189.95200097560883, + "p95": 191.96799397468567, + "p99": 199.5840072631836 }, "isolatedSum": { - "p50": 159.7760021686554, - "p90": 178.0479997396469, - "p95": 189.7279992699623, - "p99": 251.77600234746933 + "p50": 193.79200041294098, + "p90": 238.17600309848785, + "p95": 240.54399132728577, + "p99": 252.32000648975372 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { "tokensPerRank": 128, - "globalTokens": 1024, + "globalTokens": 1022, "dispatch": { - "p50": 117.63200163841248, - "p90": 127.3919939994812, - "p95": 134.0479999780655, - "p99": 154.94400262832642 + "p50": 130.52800297737122, + "p90": 139.90400731563568, + "p95": 151.61600708961487, + "p99": 458.5599899291992 }, "combine": { - "p50": 104.67199981212616, - "p90": 115.42399972677231, - "p95": 121.98399752378464, - "p99": 159.93599593639374 + "p50": 120.7680031657219, + "p90": 127.93600559234619, + "p95": 128.54400277137756, + "p99": 129.50399518013 }, "roundtrip": { - "p50": 196.25599682331085, - "p90": 206.08000457286835, - "p95": 214.08000588417053, - "p99": 245.27999758720398 + "p50": 216.35200083255768, + "p90": 221.98399901390076, + "p95": 224.7679978609085, + "p99": 229.5359969139099 }, "isolatedSum": { - "p50": 222.30400145053864, - "p90": 242.8159937262535, - "p95": 256.0319975018501, - "p99": 314.87999856472015 + "p50": 251.2960061430931, + "p90": 267.8400129079819, + "p95": 280.16000986099243, + "p99": 588.0639851093292 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -42071,16 +43384,16 @@ ] }, { - "id": "cx-875c4f49", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "h200_d982b749", - "comparisonKey": "c8b8b28ca3d145bb", + "id": "cx-e7e5caec", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_f70758a0", + "comparisonKey": "fb346b1019e55bb0", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:54:14.463003+00:00", + "generatedAt": "2026-06-27T00:13:24.801629+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -42088,19 +43401,20 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "routingLabel": "uniform", + "routingLabel": "uniform·linear", "routingStep": 0, - "unevenTokens": "none", + "unevenTokens": "linear", "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", @@ -42123,202 +43437,128 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28273509838", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273509838", - "createdAt": "2026-06-27T00:52:52Z", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + "id": "28272372388", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272372388", + "createdAt": "2026-06-27T00:13:24.801629+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 71.3919997215271, - "p90": 87.5839963555336, - "p95": 96.3520035147667, - "p99": 139.55199718475342 + "p50": 98.24000298976898, + "p90": 103.64799946546555, + "p95": 106.4319983124733, + "p99": 112.5119999051094 }, "combine": { - "p50": 68.09599697589874, - "p90": 79.55200225114822, - "p95": 84.95999872684479, - "p99": 111.32799834012985 + "p50": 80.73599636554718, + "p90": 87.55200356245041, + "p95": 88.03199976682663, + "p99": 90.08000046014786 }, "roundtrip": { - "p50": 119.55200135707855, - "p90": 147.20000326633453, - "p95": 157.18400478363037, - "p99": 204.6079933643341 + "p50": 154.33600544929504, + "p90": 159.45599973201752, + "p95": 161.6639941930771, + "p99": 166.75199568271637 }, "isolatedSum": { - "p50": 139.48799669742584, - "p90": 167.13599860668182, - "p95": 181.31200224161148, - "p99": 250.87999552488327 + "p50": 178.97599935531616, + "p90": 191.20000302791595, + "p95": 194.46399807929993, + "p99": 202.59200036525726 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 72.9919970035553, - "p90": 94.36800330877304, - "p95": 103.13600301742554, - "p99": 130.68799674510956 + "p50": 100.09600222110748, + "p90": 105.27999699115753, + "p95": 106.91200196743011, + "p99": 113.37599903345108 }, "combine": { - "p50": 68.12799721956253, - "p90": 80.9599980711937, - "p95": 88.19200098514557, - "p99": 105.15200346708298 + "p50": 89.53599631786346, + "p90": 96.16000205278397, + "p95": 96.73599898815155, + "p99": 98.43199700117111 }, "roundtrip": { - "p50": 121.5360015630722, - "p90": 147.16799557209015, - "p95": 157.98400342464447, - "p99": 185.92000007629395 + "p50": 163.39200735092163, + "p90": 168.99199783802032, + "p95": 170.43200135231018, + "p99": 174.81599748134613 }, "isolatedSum": { - "p50": 141.11999422311783, - "p90": 175.32800137996674, - "p95": 191.3280040025711, - "p99": 235.84000021219254 + "p50": 189.63199853897095, + "p90": 201.4399990439415, + "p95": 203.64800095558167, + "p99": 211.8079960346222 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.02399724721909, - "p90": 95.48799693584442, - "p95": 104.86400127410889, - "p99": 133.08799266815186 - }, - "combine": { - "p50": 68.76800209283829, - "p90": 80.57600259780884, - "p95": 86.30400151014328, - "p99": 105.92000186443329 - }, - "roundtrip": { - "p50": 120.12799829244614, - "p90": 145.56799829006195, - "p95": 155.64799308776855, - "p99": 182.68799781799316 - }, - "isolatedSum": { - "p50": 141.79199934005737, - "p90": 176.06399953365326, - "p95": 191.16800278425217, - "p99": 239.00799453258514 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.66400212049484, - "p90": 93.75999867916107, - "p95": 98.65599870681763, - "p99": 113.18399757146835 - }, - "combine": { - "p50": 68.76800209283829, - "p90": 80.54400235414505, - "p95": 82.49600231647491, - "p99": 91.77599847316742 - }, - "roundtrip": { - "p50": 121.08799815177917, - "p90": 145.9839940071106, - "p95": 156.99200332164764, - "p99": 216.35200083255768 - }, - "isolatedSum": { - "p50": 142.43200421333313, - "p90": 174.30400103330612, - "p95": 181.15200102329254, - "p99": 204.95999604463577 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 73.91999661922455, - "p90": 96.67199850082397, - "p95": 103.2319962978363, - "p99": 125.34399330615997 + "p50": 130.75199723243713, + "p90": 136.99199259281158, + "p95": 138.7840062379837, + "p99": 143.42400431632996 }, "combine": { - "p50": 70.75200229883194, - "p90": 84.03199911117554, - "p95": 89.59999680519104, - "p99": 103.87200117111206 + "p50": 128.1599998474121, + "p90": 130.40000200271606, + "p95": 135.8720064163208, + "p99": 278.6880135536194 }, "roundtrip": { - "p50": 123.9359974861145, - "p90": 155.8080017566681, - "p95": 170.49600183963776, - "p99": 205.6960016489029 + "p50": 225.75999796390533, + "p90": 231.74400627613068, + "p95": 232.80000686645508, + "p99": 235.6480062007904 }, "isolatedSum": { - "p50": 144.6719989180565, - "p90": 180.7039976119995, - "p95": 192.83199310302734, - "p99": 229.21599447727203 + "p50": 258.91199707984924, + "p90": 267.39199459552765, + "p95": 274.6560126543045, + "p99": 422.11201786994934 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -42326,16 +43566,16 @@ ] }, { - "id": "cx-19b41153", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "fb9666d12f9a34f8", + "id": "cx-5fad8218", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_fb5b86de", + "comparisonKey": "bba2bec66db838b4", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:55.021886+00:00", + "generatedAt": "2026-06-26T23:59:15.450287+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -42343,22 +43583,23 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 288, "routing": "uniform", - "routingLabel": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { @@ -42378,18 +43619,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "73351bbcd4d02de", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272132556", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272132556", - "createdAt": "2026-06-27T00:04:36Z", + "id": "28271923814", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271923814", + "createdAt": "2026-06-26T23:59:15.450287+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -42397,326 +43638,256 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 72.31999933719635, - "p90": 95.0080007314682, - "p95": 99.93600100278854, - "p99": 117.69600212574005 + "p50": 96.99200093746185, + "p90": 104.89600151777267, + "p95": 107.04000294208527, + "p99": 111.68000102043152 }, "combine": { - "p50": 68.00000369548798, - "p90": 79.55200225114822, - "p95": 85.79199761152267, - "p99": 114.04799669981003 + "p50": 75.29599964618683, + "p90": 81.28000050783157, + "p95": 81.69600367546082, + "p99": 83.20000022649765 }, "roundtrip": { - "p50": 120.70400267839432, - "p90": 148.60799908638, - "p95": 156.54399991035461, - "p99": 199.0399956703186 + "p50": 146.27200365066528, + "p90": 154.11199629306793, + "p95": 156.031996011734, + "p99": 158.6879938840866 }, "isolatedSum": { - "p50": 140.32000303268433, - "p90": 174.56000298261642, - "p95": 185.72799861431122, - "p99": 231.74399882555008 + "p50": 172.28800058364868, + "p90": 186.17600202560425, + "p95": 188.73600661754608, + "p99": 194.88000124692917 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 73.02399724721909, - "p90": 95.87199985980988, - "p95": 102.91200131177902, - "p99": 124.35200065374374 + "p50": 72.09599763154984, + "p90": 103.87200117111206, + "p95": 106.4319983124733, + "p99": 113.76000195741653 }, "combine": { - "p50": 68.67200136184692, - "p90": 82.75199681520462, - "p95": 89.53599631786346, - "p99": 112.96000331640244 + "p50": 72.67200201749802, + "p90": 81.18399977684021, + "p95": 81.82399719953537, + "p99": 84.28800106048584 }, "roundtrip": { - "p50": 123.10399860143661, - "p90": 151.39199793338776, - "p95": 160.19199788570404, - "p99": 189.69599902629852 + "p50": 127.48800218105316, + "p90": 153.76000106334686, + "p95": 156.3200056552887, + "p99": 158.720001578331 }, "isolatedSum": { - "p50": 141.695998609066, - "p90": 178.6239966750145, - "p95": 192.4479976296425, - "p99": 237.31200397014618 + "p50": 144.76799964904785, + "p90": 185.05600094795227, + "p95": 188.25599551200867, + "p99": 198.04800301790237 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 87.52000331878662, - "p90": 135.23200154304504, - "p95": 142.04800128936768, - "p99": 161.21600568294525 + "p50": 71.26399874687195, + "p90": 100.89600086212158, + "p95": 104.19200360774994, + "p99": 112.96000331640244 }, "combine": { - "p50": 77.504001557827, - "p90": 92.38400310277939, - "p95": 97.120001912117, - "p99": 111.77600175142288 + "p50": 72.7040022611618, + "p90": 80.4160013794899, + "p95": 80.6720033288002, + "p99": 87.80799806118011 }, "roundtrip": { - "p50": 135.77599823474884, - "p90": 158.81599485874176, - "p95": 168.92799735069275, - "p99": 212.67199516296387 + "p50": 130.0159990787506, + "p90": 154.78399395942688, + "p95": 158.81599485874176, + "p99": 165.53600132465363 }, "isolatedSum": { - "p50": 165.02400487661362, - "p90": 227.61600464582443, - "p95": 239.16800320148468, - "p99": 272.99200743436813 + "p50": 143.96800100803375, + "p90": 181.31200224161148, + "p95": 184.86400693655014, + "p99": 200.76800137758255 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 116.67200177907944, - "p90": 136.83199882507324, - "p95": 145.79200744628906, - "p99": 161.6320013999939 + "p50": 96.79999947547913, + "p90": 103.16800326108932, + "p95": 105.79200088977814, + "p99": 110.46399921178818 }, "combine": { - "p50": 105.76000064611435, - "p90": 121.63200229406357, - "p95": 128.06400656700134, - "p99": 140.60799777507782 + "p50": 80.73599636554718, + "p90": 81.98399841785431, + "p95": 82.36800134181976, + "p99": 89.75999802350998 }, "roundtrip": { - "p50": 195.93599438667297, - "p90": 217.3759937286377, - "p95": 223.4240025281906, - "p99": 252.9279887676239 + "p50": 150.2400040626526, + "p90": 156.47999942302704, + "p95": 158.91200304031372, + "p99": 168.2240068912506 }, "isolatedSum": { - "p50": 222.4320024251938, - "p90": 258.4640011191368, - "p95": 273.8560140132904, - "p99": 302.2399991750717 + "p50": 177.5359958410263, + "p90": 185.15200167894363, + "p95": 188.1600022315979, + "p99": 200.22399723529816 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-6b3584db", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "0dade16dc8be5c94", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:19.346761+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272136313", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272136313", - "createdAt": "2026-06-27T00:04:43Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 75.13599842786789, - "p90": 99.84000027179718, - "p95": 112.38399893045425, - "p99": 175.48799514770508 + "p50": 96.83199971914291, + "p90": 102.24000364542007, + "p95": 104.44799810647964, + "p99": 107.77600109577179 }, "combine": { - "p50": 69.95200365781784, - "p90": 81.95199817419052, - "p95": 87.3280018568039, - "p99": 117.95199662446976 + "p50": 81.05599880218506, + "p90": 87.80799806118011, + "p95": 88.70399743318558, + "p99": 89.75999802350998 }, "roundtrip": { - "p50": 127.51999497413635, - "p90": 157.9200029373169, - "p95": 171.7119961977005, - "p99": 223.26399385929108 + "p50": 152.73599326610565, + "p90": 160.73599457740784, + "p95": 162.75200247764587, + "p99": 167.55199432373047 }, "isolatedSum": { - "p50": 145.08800208568573, - "p90": 181.7919984459877, - "p95": 199.71200078725815, - "p99": 293.43999177217484 + "p50": 177.88799852132797, + "p90": 190.0480017066002, + "p95": 193.15199553966522, + "p99": 197.53599911928177 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 76.03199779987335, - "p90": 112.73600161075592, - "p95": 125.37600100040436, - "p99": 209.4080001115799 + "p50": 91.32800251245499, + "p90": 101.9200012087822, + "p95": 104.19200360774994, + "p99": 108.57599973678589 }, "combine": { - "p50": 70.91200351715088, - "p90": 86.30400151014328, - "p95": 95.13600170612335, - "p99": 123.16799908876419 + "p50": 81.216000020504, + "p90": 90.01599997282028, + "p95": 90.40000289678574, + "p99": 97.88800030946732 }, "roundtrip": { - "p50": 125.11999905109406, - "p90": 156.99200332164764, - "p95": 177.47199535369873, - "p99": 251.64800882339478 + "p50": 142.2400027513504, + "p90": 161.8880033493042, + "p95": 163.96799683570862, + "p99": 168.67199540138245 }, "isolatedSum": { - "p50": 146.94400131702423, - "p90": 199.0400031208992, - "p95": 220.5120027065277, - "p99": 332.5759992003441 + "p50": 172.54400253295898, + "p90": 191.93600118160248, + "p95": 194.59200650453568, + "p99": 206.4640000462532 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 84.28800106048584, - "p90": 108.15999656915665, - "p95": 115.29599875211716, - "p99": 152.70400047302246 + "p50": 108.25599730014801, + "p90": 114.9120032787323, + "p95": 117.08799749612808, + "p99": 121.72800302505493 }, "combine": { - "p50": 78.36800068616867, - "p90": 91.87199920415878, - "p95": 98.55999797582626, - "p99": 110.17599701881409 + "p50": 96.0640013217926, + "p90": 97.85600006580353, + "p95": 102.11200267076492, + "p99": 108.96000266075134 }, "roundtrip": { - "p50": 138.46400380134583, - "p90": 167.23200678825378, - "p95": 179.45599555969238, - "p99": 238.91200125217438 + "p50": 166.46400094032288, + "p90": 181.63199722766876, + "p95": 186.0159933567047, + "p99": 189.91999328136444 }, "isolatedSum": { - "p50": 162.6560017466545, - "p90": 200.03199577331543, - "p95": 213.85599672794342, - "p99": 262.87999749183655 + "p50": 204.3199986219406, + "p90": 212.76800334453583, + "p95": 219.200000166893, + "p99": 230.68800568580627 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -42726,35 +43897,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.09599739313126, - "p90": 137.56799697875977, - "p95": 140.76800644397736, - "p99": 157.47199952602386 + "p50": 112.12799698114395, + "p90": 131.26400113105774, + "p95": 135.6479972600937, + "p99": 141.05600118637085 }, "combine": { - "p50": 104.35199737548828, - "p90": 122.97599762678146, - "p95": 125.50400197505951, - "p99": 148.5760062932968 + "p50": 106.36799782514572, + "p90": 117.37599968910217, + "p95": 120.80000340938568, + "p99": 121.8239963054657 }, "roundtrip": { - "p50": 198.7520009279251, - "p90": 219.2319929599762, - "p95": 227.58400440216064, - "p99": 269.3440020084381 + "p50": 195.68000733852386, + "p90": 214.59199488162994, + "p95": 216.60800278186798, + "p99": 221.91999852657318 }, "isolatedSum": { - "p50": 220.44799476861954, - "p90": 260.54399460554123, - "p95": 266.27200841903687, - "p99": 306.0480058193207 + "p50": 218.49599480628967, + "p90": 248.6400008201599, + "p95": 256.44800066947937, + "p99": 262.87999749183655 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 1, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -42762,16 +43933,16 @@ ] }, { - "id": "cx-f4f3e72f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "c5d592397744e4a1", + "id": "cx-7f743bfe", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_aa268d13", + "comparisonKey": "791af0af2f802328", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:52.426268+00:00", + "generatedAt": "2026-06-26T23:59:41.322977+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -42779,22 +43950,23 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "zeros", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { @@ -42814,8 +43986,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -42823,9 +43995,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272129001", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272129001", - "createdAt": "2026-06-27T00:04:29Z", + "id": "28271945409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409", + "createdAt": "2026-06-26T23:59:41.322977+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -42833,364 +44005,146 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 73.7600028514862, - "p90": 98.59199821949005, - "p95": 107.77600109577179, - "p99": 133.31200182437897 + "p50": 93.37600320577621, + "p90": 101.59999877214432, + "p95": 103.16800326108932, + "p99": 108.15999656915665 }, "combine": { - "p50": 70.592001080513, - "p90": 84.54400300979614, - "p95": 90.43200314044952, - "p99": 139.26400244235992 + "p50": 73.69600236415863, + "p90": 78.17599922418594, + "p95": 79.99999821186066, + "p99": 82.59200304746628 }, "roundtrip": { - "p50": 125.59999525547028, - "p90": 159.87199544906616, - "p95": 172.57599532604218, - "p99": 367.2960102558136 + "p50": 142.59199798107147, + "p90": 150.62400698661804, + "p95": 152.54400670528412, + "p99": 159.5200002193451 }, "isolatedSum": { - "p50": 144.3520039319992, - "p90": 183.1360012292862, - "p95": 198.2080042362213, - "p99": 272.5760042667389 + "p50": 167.07200556993484, + "p90": 179.77599799633026, + "p95": 183.16800147294998, + "p99": 190.75199961662292 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 74.87999647855759, - "p90": 107.04000294208527, - "p95": 128.80000472068787, - "p99": 359.391987323761 + "p50": 96.0640013217926, + "p90": 100.89600086212158, + "p95": 101.82400047779083, + "p99": 107.07200318574905 }, "combine": { - "p50": 70.49600034952164, - "p90": 84.06399935483932, - "p95": 89.88799899816513, - "p99": 102.9760017991066 + "p50": 74.43200051784515, + "p90": 80.48000186681747, + "p95": 81.216000020504, + "p99": 82.11199939250946 }, "roundtrip": { - "p50": 124.70400333404541, - "p90": 155.10399639606476, - "p95": 165.72800278663635, - "p99": 202.7519941329956 + "p50": 143.39199662208557, + "p90": 147.87200093269348, + "p95": 153.31199765205383, + "p99": 168.60799491405487 }, "isolatedSum": { - "p50": 145.37599682807922, - "p90": 191.1040022969246, - "p95": 218.688003718853, - "p99": 462.3679891228676 + "p50": 170.49600183963776, + "p90": 181.37600272893906, + "p95": 183.04000049829483, + "p99": 189.18400257825851 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 2, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 83.90399813652039, - "p90": 107.07200318574905, - "p95": 115.9679964184761, - "p99": 136.51199638843536 + "p50": 95.87199985980988, + "p90": 100.73599964380264, + "p95": 102.81600058078766, + "p99": 109.95200276374817 }, "combine": { - "p50": 78.33600044250488, - "p90": 91.93599969148636, - "p95": 97.69599884748459, - "p99": 108.83200168609619 + "p50": 74.30399954319, + "p90": 80.89599758386612, + "p95": 81.4720019698143, + "p99": 84.19200032949448 }, "roundtrip": { - "p50": 137.2160017490387, - "p90": 170.23999989032745, - "p95": 181.37599527835846, - "p99": 215.36000072956085 + "p50": 142.752006649971, + "p90": 153.02400290966034, + "p95": 154.9759954214096, + "p99": 160.0639969110489 }, "isolatedSum": { - "p50": 162.23999857902527, - "p90": 199.0080028772354, - "p95": 213.6639952659607, - "p99": 245.34399807453156 + "p50": 170.17599940299988, + "p90": 181.63199722766876, + "p95": 184.28800255060196, + "p99": 194.14400309324265 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 115.99999666213989, - "p90": 135.71199774742126, - "p95": 143.8400000333786, - "p99": 168.67199540138245 + "p50": 97.88800030946732, + "p90": 101.82400047779083, + "p95": 103.96800190210342, + "p99": 111.42399907112122 }, "combine": { - "p50": 104.73600029945374, - "p90": 121.47200107574463, - "p95": 125.47199428081512, - "p99": 163.00800442695618 + "p50": 75.6160020828247, + "p90": 81.4720019698143, + "p95": 82.04799890518188, + "p99": 84.03199911117554 }, "roundtrip": { - "p50": 196.6720074415207, - "p90": 216.19200706481934, - "p95": 220.5120027065277, - "p99": 240.1919960975647 + "p50": 146.7519998550415, + "p90": 153.47200632095337, + "p95": 154.9759954214096, + "p99": 167.9680049419403 }, "isolatedSum": { - "p50": 220.73599696159363, - "p90": 257.1839988231659, - "p95": 269.3119943141937, - "p99": 331.6799998283386 + "p50": 173.50400239229202, + "p90": 183.29600244760513, + "p95": 186.0160008072853, + "p99": 195.45599818229675 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-eb6d6f9b", - "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "4a72e21e2f542236", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:45.031759+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271615137", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271615137", - "createdAt": "2026-06-26T23:48:21Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.97599655389786, - "p90": 85.50400286912918, - "p95": 95.36000341176987, - "p99": 316.79999828338623 - }, - "combine": { - "p50": 68.70400160551071, - "p90": 74.72000271081924, - "p95": 78.72000336647034, - "p99": 94.2080020904541 - }, - "roundtrip": { - "p50": 122.56000190973282, - "p90": 143.26399564743042, - "p95": 153.1199961900711, - "p99": 172.2240000963211 - }, - "isolatedSum": { - "p50": 139.67999815940857, - "p90": 160.22400557994843, - "p95": 174.0800067782402, - "p99": 411.00800037384033 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.12799787521362, - "p90": 85.24800091981888, - "p95": 91.93599969148636, - "p99": 119.48800086975098 - }, - "combine": { - "p50": 68.57600063085556, - "p90": 72.83200323581696, - "p95": 77.15199887752533, - "p99": 83.45600217580795 - }, - "roundtrip": { - "p50": 120.83200365304947, - "p90": 129.2160004377365, - "p95": 133.215993642807, - "p99": 145.75999975204468 - }, - "isolatedSum": { - "p50": 140.70399850606918, - "p90": 158.08000415563583, - "p95": 169.0879985690117, - "p99": 202.94400304555893 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.63200187683105, - "p90": 90.84799885749817, - "p95": 103.64799946546555, - "p99": 133.02400708198547 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 80.86399734020233, - "p95": 87.74399757385254, - "p99": 105.6319996714592 - }, - "roundtrip": { - "p50": 123.64800274372101, - "p90": 149.59999918937683, - "p95": 158.33599865436554, - "p99": 186.0480010509491 - }, - "isolatedSum": { - "p50": 144.16000247001648, - "p90": 171.7119961977005, - "p95": 191.39199703931808, - "p99": 238.65600675344467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.18399846553802, - "p90": 83.03999900817871, - "p95": 94.91200000047684, - "p99": 104.09600287675858 - }, - "combine": { - "p50": 69.2799985408783, - "p90": 77.82399654388428, - "p95": 83.10399949550629, - "p99": 110.04800349473953 - }, - "roundtrip": { - "p50": 123.52000176906586, - "p90": 143.19999516010284, - "p95": 152.0960032939911, - "p99": 205.08800446987152 - }, - "isolatedSum": { - "p50": 142.46399700641632, - "p90": 160.863995552063, - "p95": 178.01599949598312, - "p99": 214.1440063714981 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43199,35 +44153,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 71.16799801588058, - "p90": 83.36000144481659, - "p95": 94.11200135946274, - "p99": 106.46399855613708 + "p50": 97.08800166845322, + "p90": 100.67199915647507, + "p95": 104.25599664449692, + "p99": 110.6560006737709 }, "combine": { - "p50": 70.04799693822861, - "p90": 78.07999849319458, - "p95": 83.20000022649765, - "p99": 95.71199864149094 + "p50": 78.94399762153625, + "p90": 82.04799890518188, + "p95": 82.78399705886841, + "p99": 89.40800279378891 }, "roundtrip": { - "p50": 124.54400211572647, - "p90": 144.0960019826889, - "p95": 155.008003115654, - "p99": 204.3839991092682 + "p50": 150.7200002670288, + "p90": 159.10400450229645, + "p95": 161.69600188732147, + "p99": 167.07199811935425 }, "isolatedSum": { - "p50": 141.2159949541092, - "p90": 161.43999993801117, - "p95": 177.3120015859604, - "p99": 202.17599719762802 + "p50": 176.03199928998947, + "p90": 182.71999806165695, + "p95": 187.03999370336533, + "p99": 200.06400346755981 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43236,35 +44190,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 80.60800284147263, - "p90": 89.59999680519104, - "p95": 94.81599926948547, - "p99": 117.53600090742111 + "p50": 96.47999703884125, + "p90": 101.31199657917023, + "p95": 104.5759990811348, + "p99": 110.62400043010712 }, "combine": { - "p50": 77.08799839019775, - "p90": 81.95199817419052, - "p95": 87.3280018568039, - "p99": 95.0080007314682 + "p50": 86.46400272846222, + "p90": 90.11200070381165, + "p95": 90.62399715185165, + "p99": 93.18400174379349 }, "roundtrip": { - "p50": 135.19999384880066, - "p90": 148.47999811172485, - "p95": 156.63999319076538, - "p99": 188.57599794864655 + "p50": 158.75199437141418, + "p90": 163.55200111865997, + "p95": 164.89599645137787, + "p99": 169.21600699424744 }, "isolatedSum": { - "p50": 157.69600123167038, - "p90": 171.55199497938156, - "p95": 182.14400112628937, - "p99": 212.5440016388893 + "p50": 182.94399976730347, + "p90": 191.42399728298187, + "p95": 195.19999623298645, + "p99": 203.8080021739006 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43273,35 +44227,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 94.68799829483032, - "p90": 114.97599631547928, - "p95": 122.36800044775009, - "p99": 148.03199470043182 + "p50": 106.9440022110939, + "p90": 138.36799561977386, + "p95": 143.0400013923645, + "p99": 250.2720057964325 }, "combine": { - "p50": 87.39200234413147, - "p90": 97.59999811649323, - "p95": 102.9760017991066, - "p99": 113.95200341939926 + "p50": 95.0080007314682, + "p90": 98.39999675750732, + "p95": 98.91200065612793, + "p99": 105.59999942779541 }, "roundtrip": { - "p50": 158.87999534606934, - "p90": 176.15999281406403, - "p95": 185.2159947156906, - "p99": 225.600004196167 + "p50": 176.67199671268463, + "p90": 184.03199315071106, + "p95": 187.3600035905838, + "p99": 190.5599981546402 }, "isolatedSum": { - "p50": 182.0800006389618, - "p90": 212.5759944319725, - "p95": 225.3440022468567, - "p99": 261.9839981198311 + "p50": 201.9520029425621, + "p90": 236.7679923772812, + "p95": 241.95200204849243, + "p99": 355.8720052242279 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43310,35 +44264,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.22399836778641, - "p90": 125.18399953842163, - "p95": 132.32000172138214, - "p99": 140.83200693130493 + "p50": 128.4160017967224, + "p90": 145.9520012140274, + "p95": 148.83199334144592, + "p99": 151.99999511241913 }, "combine": { - "p50": 105.34399747848511, - "p90": 111.32799834012985, - "p95": 116.28799885511398, - "p99": 123.83999675512314 + "p50": 119.74400281906128, + "p90": 122.56000190973282, + "p95": 123.80799651145935, + "p99": 129.7920048236847 }, "roundtrip": { - "p50": 197.60000705718994, - "p90": 207.2640061378479, - "p95": 214.81600403785706, - "p99": 241.05599522590637 + "p50": 228.2560020685196, + "p90": 233.88800024986267, + "p95": 236.12800240516663, + "p99": 240.28800427913666 }, "isolatedSum": { - "p50": 221.56799584627151, - "p90": 236.51199787855148, - "p95": 248.60800057649612, - "p99": 264.67200368642807 + "p50": 248.1600046157837, + "p90": 268.5120031237602, + "p95": 272.6399898529053, + "p99": 281.7919999361038 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43346,34 +44300,35 @@ ] }, { - "id": "cx-dea4952a", - "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "f2cda8ef40003c42", + "id": "cx-456ed1f6", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "h100_aa268d13", + "comparisonKey": "791af0af2f802328", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:13.205485+00:00", + "generatedAt": "2026-06-26T23:55:00.953910+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -43398,8 +44353,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -43407,9 +44362,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271728983", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271728983", - "createdAt": "2026-06-26T23:51:48Z", + "id": "28271802749", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271802749", + "createdAt": "2026-06-26T23:55:00.953910+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -43417,108 +44372,327 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 73.53600114583969, - "p90": 98.14400225877762, - "p95": 107.45599865913391, - "p99": 121.63200229406357 + "p50": 97.34400361776352, + "p90": 106.33599758148193, + "p95": 108.99200290441513, + "p99": 118.14399808645248 }, "combine": { - "p50": 68.2239979505539, - "p90": 82.24000036716461, - "p95": 87.26400136947632, - "p99": 110.07999628782272 + "p50": 78.72000336647034, + "p90": 81.11999928951263, + "p95": 82.14399963617325, + "p99": 87.42400258779526 }, "roundtrip": { - "p50": 125.59999525547028, - "p90": 155.39200603961945, - "p95": 163.68000209331512, - "p99": 201.6959935426712 + "p50": 148.76799285411835, + "p90": 160.5439931154251, + "p95": 164.73600268363953, + "p99": 172.44799435138702 }, "isolatedSum": { - "p50": 141.75999909639359, - "p90": 180.38400262594223, - "p95": 194.72000002861023, - "p99": 231.7119985818863 + "p50": 176.06400698423386, + "p90": 187.45599687099457, + "p95": 191.13600254058838, + "p99": 205.56800067424774 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, "recvTokensMax": 8, - "stragglerRank": 4, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 75.87199658155441, - "p90": 103.2319962978363, - "p95": 112.15999722480774, - "p99": 193.05600225925446 + "p50": 97.50399738550186, + "p90": 104.38399761915207, + "p95": 108.99200290441513, + "p99": 137.2479945421219 }, "combine": { - "p50": 68.60800087451935, - "p90": 83.5840031504631, - "p95": 90.30400216579437, - "p99": 129.60000336170197 + "p50": 79.39200103282928, + "p90": 86.68799698352814, + "p95": 87.52000331878662, + "p99": 103.90400141477585 }, "roundtrip": { - "p50": 123.23199957609177, - "p90": 153.31199765205383, - "p95": 164.38399255275726, - "p99": 185.37600338459015 + "p50": 152.99199521541595, + "p90": 162.9759967327118, + "p95": 165.69599509239197, + "p99": 171.55200242996216 }, "isolatedSum": { - "p50": 144.47999745607376, - "p90": 186.8159994482994, - "p95": 202.4639993906021, - "p99": 322.6560056209564 + "p50": 176.89599841833115, + "p90": 191.0719946026802, + "p95": 196.51200622320175, + "p99": 241.15199595689774 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 4, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 74.87999647855759, - "p90": 96.92800045013428, - "p95": 105.85600137710571, - "p99": 121.15199863910675 + "p50": 102.33599692583084, + "p90": 111.68000102043152, + "p95": 115.68000167608261, + "p99": 123.74400347471237 }, "combine": { - "p50": 69.2799985408783, - "p90": 82.56000280380249, - "p95": 90.30400216579437, - "p99": 102.04800218343735 - }, - "roundtrip": { - "p50": 125.2799928188324, - "p90": 152.28800475597382, - "p95": 160.8320027589798, - "p99": 174.55999553203583 + "p50": 87.45600283145905, + "p90": 94.81599926948547, + "p95": 95.32800316810608, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 160.7999950647354, + "p90": 168.67199540138245, + "p95": 171.29600048065186, + "p99": 178.52799594402313 }, "isolatedSum": { - "p50": 144.15999501943588, - "p90": 179.48800325393677, - "p95": 196.16000354290009, - "p99": 223.2000008225441 + "p50": 189.7919997572899, + "p90": 206.496000289917, + "p95": 211.0080048441887, + "p99": 220.06400674581528 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.39200341701508, + "p90": 145.34400403499603, + "p95": 147.5200057029724, + "p99": 163.71199488639832 + }, + "combine": { + "p50": 120.15999853610992, + "p90": 128.1599998474121, + "p95": 128.86400520801544, + "p99": 129.88799810409546 + }, + "roundtrip": { + "p50": 227.87199914455414, + "p90": 232.7360063791275, + "p95": 235.32800376415253, + "p99": 255.13601303100586 + }, + "isolatedSum": { + "p50": 259.552001953125, + "p90": 273.50400388240814, + "p95": 276.38401091098785, + "p99": 293.5999929904938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db353ddd", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_002beb29", + "comparisonKey": "d83561aeea03cdbc", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:11.693533+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271987393", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271987393", + "createdAt": "2026-06-27T00:01:11.693533+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.14400160312653, + "p90": 104.41599786281586, + "p95": 109.8560020327568, + "p99": 133.69600474834442 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 75.03999769687653, + "p95": 80.86399734020233, + "p99": 237.34399676322937 + }, + "roundtrip": { + "p50": 141.2159949541092, + "p90": 150.39999783039093, + "p95": 151.8079936504364, + "p99": 244.73600089550018 + }, + "isolatedSum": { + "p50": 165.47200083732605, + "p90": 179.45599555969238, + "p95": 190.71999937295914, + "p99": 371.0400015115738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 94.43199634552002, + "p90": 101.50399804115295, + "p95": 103.04000228643417, + "p99": 105.85600137710571 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 73.95199686288834, + "p95": 74.5600014925003, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 141.02399349212646, + "p90": 147.77599275112152, + "p95": 150.176003575325, + "p99": 175.6799966096878 + }, + "isolatedSum": { + "p50": 166.46399348974228, + "p90": 175.4559949040413, + "p95": 177.60000377893448, + "p99": 185.66399812698364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.20000219345093, + "p90": 101.47199779748917, + "p95": 103.13600301742554, + "p99": 108.12799632549286 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 78.65600287914276, + "p95": 79.0719985961914, + "p99": 81.53600245714188 + }, + "roundtrip": { + "p50": 143.93599331378937, + "p90": 152.41600573062897, + "p95": 155.61600029468536, + "p99": 564.3519759178162 + }, + "isolatedSum": { + "p50": 166.04800522327423, + "p90": 180.12800067663193, + "p95": 182.20800161361694, + "p99": 189.66399878263474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -43528,34 +44702,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 76.54400169849396, - "p90": 104.2879968881607, - "p95": 111.42399907112122, - "p99": 138.5599970817566 + "p50": 96.25600278377533, + "p90": 103.58399897813797, + "p95": 107.58399963378906, + "p99": 168.09600591659546 }, "combine": { - "p50": 70.52800059318542, - "p90": 85.66399663686752, - "p95": 91.67999774217606, - "p99": 102.59199887514114 + "p50": 75.71200281381607, + "p90": 80.1599994301796, + "p95": 80.83199709653854, + "p99": 82.30400085449219 }, "roundtrip": { - "p50": 126.39999389648438, - "p90": 154.55999970436096, - "p95": 166.97600483894348, - "p99": 208.67200195789337 + "p50": 144.73600685596466, + "p90": 150.81599354743958, + "p95": 152.79999375343323, + "p99": 157.95199573040009 }, "isolatedSum": { - "p50": 147.07200229167938, - "p90": 189.95199352502823, - "p95": 203.10399681329727, - "p99": 241.15199595689774 + "p50": 171.9680055975914, + "p90": 183.74399840831757, + "p95": 188.4159967303276, + "p99": 250.40000677108765 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -43565,34 +44739,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 78.40000092983246, - "p90": 99.07200187444687, - "p95": 105.98400235176086, - "p99": 126.3359934091568 + "p50": 96.3200032711029, + "p90": 102.39999741315842, + "p95": 104.51199859380722, + "p99": 110.27199774980545 }, "combine": { - "p50": 71.61600142717361, - "p90": 85.40800213813782, - "p95": 90.27200192213058, - "p99": 109.40799862146378 + "p50": 78.65600287914276, + "p90": 81.37600123882294, + "p95": 81.82399719953537, + "p99": 87.0399996638298 }, "roundtrip": { - "p50": 129.02399897575378, - "p90": 156.2879979610443, - "p95": 166.143998503685, - "p99": 196.51199877262115 + "p50": 146.33600413799286, + "p90": 152.38399803638458, + "p95": 153.76000106334686, + "p99": 157.82399475574493 }, "isolatedSum": { - "p50": 150.01600235700607, - "p90": 184.4800040125847, - "p95": 196.25600427389145, - "p99": 235.74399203062057 + "p50": 174.97600615024567, + "p90": 183.77599865198135, + "p95": 186.3359957933426, + "p99": 197.31199741363525 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -43602,34 +44776,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 85.40800213813782, - "p90": 105.12000322341919, - "p95": 114.04799669981003, - "p99": 131.71200454235077 + "p50": 96.67199850082397, + "p90": 101.95200145244598, + "p95": 103.87200117111206, + "p99": 109.56799983978271 }, "combine": { - "p50": 77.91999727487564, - "p90": 90.59199690818787, - "p95": 96.63999825716019, - "p99": 105.18400371074677 + "p50": 83.20000022649765, + "p90": 88.639996945858, + "p95": 89.28000181913376, + "p99": 90.27200192213058 }, "roundtrip": { - "p50": 137.2160017490387, - "p90": 163.07200491428375, - "p95": 172.35200107097626, - "p99": 208.064004778862 + "p50": 154.27200496196747, + "p90": 159.90400314331055, + "p95": 161.8880033493042, + "p99": 171.64799571037292 }, "isolatedSum": { - "p50": 163.32799941301346, - "p90": 195.71200013160706, - "p95": 210.68799495697021, - "p99": 236.89600825309753 + "p50": 179.87199872732162, + "p90": 190.59199839830399, + "p95": 193.15200299024582, + "p99": 199.8400017619133 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -43639,35 +44813,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 99.55199807882309, - "p90": 126.65599584579468, - "p95": 135.00800728797913, - "p99": 167.10400581359863 + "p50": 104.80000078678131, + "p90": 112.5440001487732, + "p95": 115.35999923944473, + "p99": 119.64800208806992 }, "combine": { - "p50": 89.24800157546997, - "p90": 106.1440035700798, - "p95": 111.23199760913849, - "p99": 126.65599584579468 + "p50": 95.32800316810608, + "p90": 97.6639986038208, + "p95": 98.14400225877762, + "p99": 103.45599800348282 }, "roundtrip": { - "p50": 162.9759967327118, - "p90": 185.88800728321075, - "p95": 193.6960071325302, - "p99": 255.87201118469238 + "p50": 173.21600019931793, + "p90": 177.47199535369873, + "p95": 178.97599935531616, + "p99": 184.09599363803864 }, "isolatedSum": { - "p50": 188.79999965429306, - "p90": 232.79999941587448, - "p95": 246.24000489711761, - "p99": 293.7600016593933 + "p50": 200.1280039548874, + "p90": 210.207998752594, + "p95": 213.50400149822235, + "p99": 223.10400009155273 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 7, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43676,34 +44850,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.367999792099, - "p90": 135.42400300502777, - "p95": 143.5520052909851, - "p99": 181.88799917697906 + "p50": 120.64000219106674, + "p90": 141.9840008020401, + "p95": 143.23200285434723, + "p99": 148.54399859905243 }, "combine": { - "p50": 105.34399747848511, - "p90": 119.99999731779099, - "p95": 126.78399682044983, - "p99": 139.0399932861328 + "p50": 119.48800086975098, + "p90": 122.04799801111221, + "p95": 122.56000190973282, + "p99": 123.58400225639343 }, "roundtrip": { - "p50": 197.53600656986237, - "p90": 215.83999693393707, - "p95": 224.48000311851501, - "p99": 253.1839907169342 + "p50": 219.84000504016876, + "p90": 226.17599368095398, + "p95": 227.29599475860596, + "p99": 232.16000199317932 }, "isolatedSum": { - "p50": 223.7119972705841, - "p90": 255.42400032281876, - "p95": 270.33600211143494, - "p99": 320.9279924631119 + "p50": 240.12800306081772, + "p90": 264.0319988131523, + "p95": 265.79200476408005, + "p99": 272.12800085544586 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -43712,16 +44886,16 @@ ] }, { - "id": "cx-99defb8b", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", - "colorKey": "h200_26ff284b", - "comparisonKey": "7784b2ab75c0721c", + "id": "cx-acf36978", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", + "colorKey": "h100_002beb29", + "comparisonKey": "d83561aeea03cdbc", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:19.040813+00:00", + "generatedAt": "2026-06-26T23:55:11.297271+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -43729,17 +44903,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups", + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups", + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -43764,8 +44939,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3cd13eac5b27759", - "workloadId": "set:3:24add4cb1eb472b4", + "traceSignature": "47fddabb3277bec", + "workloadId": "set:4:6b84350720aa8233", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -43773,82 +44948,119 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272355894", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272355894", - "createdAt": "2026-06-27T00:11:56Z", + "id": "28271810135", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271810135", + "createdAt": "2026-06-26T23:55:11.297271+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 72.51200079917908, - "p90": 88.95999938249588, - "p95": 100.96000134944916, - "p99": 118.23999881744385 + "p50": 95.83999961614609, + "p90": 101.27999633550644, + "p95": 104.86400127410889, + "p99": 111.51999980211258 }, "combine": { - "p50": 66.880002617836, - "p90": 74.81600344181061, - "p95": 79.83999699354172, - "p99": 99.55199807882309 + "p50": 71.74400240182877, + "p90": 73.95199686288834, + "p95": 79.03999835252762, + "p99": 81.08799904584885 }, "roundtrip": { - "p50": 121.24799937009811, - "p90": 136.89599931240082, - "p95": 147.74399995803833, - "p99": 232.92799293994904 + "p50": 142.5279974937439, + "p90": 149.79200065135956, + "p95": 151.71200037002563, + "p99": 156.73600137233734 }, "isolatedSum": { - "p50": 139.39200341701508, - "p90": 163.7760028243065, - "p95": 180.79999834299088, - "p99": 217.79199689626694 + "p50": 167.58400201797485, + "p90": 175.23199319839478, + "p95": 183.9039996266365, + "p99": 192.60799884796143 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 1, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.90399813652039, - "p90": 99.84000027179718, - "p95": 112.44799941778183, - "p99": 135.93600690364838 + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.1760025024414, + "p90": 104.96000200510025, + "p95": 106.91200196743011, + "p99": 112.44799941778183 }, "combine": { - "p50": 75.19999891519547, - "p90": 84.927998483181, - "p95": 90.81599861383438, - "p99": 107.45599865913391 + "p50": 73.34399968385696, + "p90": 79.99999821186066, + "p95": 80.48000186681747, + "p99": 85.08799970149994 }, "roundtrip": { - "p50": 137.40800321102142, - "p90": 155.35999834537506, - "p95": 164.92800414562225, - "p99": 303.6159873008728 + "p50": 146.14400267601013, + "p90": 152.6080071926117, + "p95": 154.7520011663437, + "p99": 160.73599457740784 }, "isolatedSum": { - "p50": 159.10399705171585, - "p90": 184.76799875497818, - "p95": 203.2639980316162, - "p99": 243.3920055627823 + "p50": 171.52000218629837, + "p90": 184.9600002169609, + "p95": 187.3920038342476, + "p99": 197.53599911928177 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 3, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.91200065612793, + "p90": 105.92000186443329, + "p95": 108.47999900579453, + "p99": 115.93600362539291 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 88.54400366544724, + "p95": 88.92799913883209, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 156.19200468063354, + "p90": 162.84799575805664, + "p95": 165.56799411773682, + "p99": 169.72799599170685 + }, + "isolatedSum": { + "p50": 181.7919984459877, + "p90": 194.46400552988052, + "p95": 197.40799814462662, + "p99": 206.2080055475235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43857,35 +45069,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 126.01600587368011, - "p90": 135.45599579811096, - "p95": 139.8400068283081, - "p99": 168.57600212097168 + "p50": 121.88799679279327, + "p90": 129.88799810409546, + "p95": 131.16799294948578, + "p99": 136.1279934644699 }, "combine": { - "p50": 111.58400028944016, - "p90": 120.64000219106674, - "p95": 128.31999361515045, - "p99": 143.0719941854477 + "p50": 114.68800157308578, + "p90": 121.18399888277054, + "p95": 122.079998254776, + "p99": 129.2160004377365 }, "roundtrip": { - "p50": 210.81599593162537, - "p90": 224.0000069141388, - "p95": 234.49599742889404, - "p99": 253.76001000404358 + "p50": 219.90400552749634, + "p90": 224.73600506782532, + "p95": 226.623997092247, + "p99": 230.30400276184082 }, "isolatedSum": { - "p50": 237.60000616312027, - "p90": 256.0959979891777, - "p95": 268.16000044345856, - "p99": 311.6479963064194 + "p50": 236.57599836587906, + "p90": 251.071996986866, + "p95": 253.24799120426178, + "p99": 265.3439939022064 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 2, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -43893,16 +45105,16 @@ ] }, { - "id": "cx-14a4cdc0", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", - "colorKey": "h200_b02e4015", - "comparisonKey": "7784b2ab75c0721c", + "id": "cx-18fdfbeb", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_c44978e5", + "comparisonKey": "26b5ab23f62d3389", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:31.348412+00:00", + "generatedAt": "2026-06-27T00:01:10.918377+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -43910,20 +45122,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups@s1", + "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s1", - "routingStep": 1, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -43945,54 +45158,202 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "traceSignature": "5a3054422534366", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272358996", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272358996", - "createdAt": "2026-06-27T00:12:03Z", + "id": "28271992225", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271992225", + "createdAt": "2026-06-27T00:01:10.918377+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.01600062847137, + "p90": 101.59999877214432, + "p95": 102.68799960613251, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 71.87200337648392, + "p90": 78.87999713420868, + "p95": 79.48800176382065, + "p99": 80.99199831485748 + }, + "roundtrip": { + "p50": 138.72000575065613, + "p90": 147.2640037536621, + "p95": 148.76799285411835, + "p99": 153.08800339698792 + }, + "isolatedSum": { + "p50": 165.8880040049553, + "p90": 180.479995906353, + "p95": 182.17600136995316, + "p99": 188.960000872612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.92000341415405, + "p90": 99.64799880981445, + "p95": 101.43999755382538, + "p99": 106.84800148010254 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 79.71200346946716, + "p95": 80.64000308513641, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 129.34400141239166, + "p90": 143.71199905872345, + "p95": 146.08000218868256, + "p99": 150.39999783039093 + }, + "isolatedSum": { + "p50": 141.56800508499146, + "p90": 179.36000227928162, + "p95": 182.0800006389618, + "p99": 188.76799941062927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.74400240182877, + "p90": 99.80800002813339, + "p95": 101.79200023412704, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 81.56800270080566, + "p95": 86.43200248479843, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 129.50399518013, + "p90": 156.47999942302704, + "p95": 159.13599729537964, + "p99": 162.6880019903183 + }, + "isolatedSum": { + "p50": 144.41600441932678, + "p90": 181.37600272893906, + "p95": 188.22400271892548, + "p99": 196.70400023460388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 70.75200229883194, - "p90": 98.39999675750732, - "p95": 108.57599973678589, - "p99": 136.03200018405914 + "p50": 72.12799787521362, + "p90": 96.16000205278397, + "p95": 98.30400347709656, + "p99": 103.64799946546555 }, "combine": { - "p50": 67.52000004053116, - "p90": 79.83999699354172, - "p95": 84.09599959850311, - "p99": 104.09600287675858 + "p50": 72.9919970035553, + "p90": 81.08799904584885, + "p95": 81.60000294446945, + "p99": 87.13600039482117 }, "roundtrip": { - "p50": 122.8799968957901, - "p90": 146.62399888038635, - "p95": 155.32800555229187, - "p99": 178.3359944820404 + "p50": 127.9039978981018, + "p90": 152.16000378131866, + "p95": 155.90399503707886, + "p99": 157.24800527095795 }, "isolatedSum": { - "p50": 138.2720023393631, - "p90": 178.23999375104904, - "p95": 192.671999335289, - "p99": 240.12800306081772 + "p50": 145.11999487876892, + "p90": 177.2480010986328, + "p95": 179.904006421566, + "p99": 190.7839998602867 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 7, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.0159986615181, + "p90": 98.55999797582626, + "p95": 101.50399804115295, + "p99": 106.33599758148193 + }, + "combine": { + "p50": 73.56800138950348, + "p90": 87.87199854850769, + "p95": 88.8959988951683, + "p99": 89.88799899816513 + }, + "roundtrip": { + "p50": 127.71199643611908, + "p90": 159.32799875736237, + "p95": 160.99199652671814, + "p99": 163.90399634838104 + }, + "isolatedSum": { + "p50": 155.58400005102158, + "p90": 186.43199652433395, + "p95": 190.39999693632126, + "p99": 196.22399657964706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44001,35 +45362,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 84.44800227880478, - "p90": 110.30399799346924, - "p95": 138.20800185203552, - "p99": 196.22400403022766 + "p50": 89.82399851083755, + "p90": 101.27999633550644, + "p95": 102.65599936246872, + "p99": 107.29599744081497 }, "combine": { - "p50": 75.16799867153168, - "p90": 85.34400165081024, - "p95": 91.00800007581711, - "p99": 101.02400183677673 + "p50": 80.73599636554718, + "p90": 89.4400030374527, + "p95": 89.85599875450134, + "p99": 95.42399644851685 }, "roundtrip": { - "p50": 135.3919953107834, - "p90": 156.3200056552887, - "p95": 166.4000004529953, - "p99": 198.36799800395966 + "p50": 141.59999787807465, + "p90": 158.9439958333969, + "p95": 161.18399798870087, + "p99": 167.32800006866455 }, "isolatedSum": { - "p50": 159.61600095033646, - "p90": 195.64799964427948, - "p95": 229.21600192785263, - "p99": 297.2480058670044 + "p50": 170.55999487638474, + "p90": 190.71999937295914, + "p95": 192.51199811697006, + "p99": 202.71999388933182 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 2, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.09600156545639, + "p90": 118.1119978427887, + "p95": 120.57600170373917, + "p99": 127.83999741077423 + }, + "combine": { + "p50": 89.82399851083755, + "p90": 103.20000350475311, + "p95": 103.80800068378448, + "p99": 104.70400005578995 + }, + "roundtrip": { + "p50": 160.288006067276, + "p90": 180.95999956130981, + "p95": 185.18400192260742, + "p99": 188.60800564289093 + }, + "isolatedSum": { + "p50": 185.92000007629395, + "p90": 221.3120013475418, + "p95": 224.38400238752365, + "p99": 232.54399746656418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44038,35 +45436,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 128.48000228405, - "p90": 142.91200041770935, - "p95": 151.36000514030457, - "p99": 290.0159955024719 + "p50": 114.52800035476685, + "p90": 135.0719928741455, + "p95": 136.6720050573349, + "p99": 140.00000059604645 }, "combine": { - "p50": 111.455999314785, - "p90": 123.6800029873848, - "p95": 127.93600559234619, - "p99": 143.71199905872345 + "p50": 106.01600259542465, + "p90": 119.71200257539749, + "p95": 120.35199999809265, + "p99": 122.14399874210358 }, "roundtrip": { - "p50": 210.81599593162537, - "p90": 223.26399385929108, - "p95": 229.34399545192719, - "p99": 257.79199600219727 + "p50": 195.96800208091736, + "p90": 214.33599293231964, + "p95": 216.86400473117828, + "p99": 220.44800221920013 }, "isolatedSum": { - "p50": 239.936001598835, - "p90": 266.59200340509415, - "p95": 279.29601073265076, - "p99": 433.7279945611954 + "p50": 220.5440029501915, + "p90": 254.783995449543, + "p95": 257.02400505542755, + "p99": 262.14399933815 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 2, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44074,16 +45472,16 @@ ] }, { - "id": "cx-4bdc0b92", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", - "colorKey": "h200_ad2e3b5c", - "comparisonKey": "7784b2ab75c0721c", + "id": "cx-efff3174", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_9aa30544", + "comparisonKey": "c4aa2e0da9446ced", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:31.907403+00:00", + "generatedAt": "2026-06-27T00:00:21.116102+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -44091,18 +45489,19 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups@s2", + "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s2", - "routingStep": 2, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", @@ -44126,234 +45525,201 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3cd13eac5b27759", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "f3df51be7d5c32b", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272362308", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272362308", - "createdAt": "2026-06-27T00:12:10Z", + "id": "28271958693", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271958693", + "createdAt": "2026-06-27T00:00:21.116102+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 71.32799923419952, - "p90": 93.82399916648865, - "p95": 109.72800105810165, - "p99": 145.1520025730133 - }, - "combine": { - "p50": 66.880002617836, - "p90": 72.25599884986877, - "p95": 80.32000064849854, - "p99": 91.39200299978256 + "p50": 97.28000313043594, + "p90": 104.70400005578995, + "p95": 106.11200332641602, + "p99": 112.73600161075592 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 82.65600353479385, + "p95": 99.13600236177444, + "p99": 275.4560112953186 }, "roundtrip": { - "p50": 123.48800152540207, - "p90": 140.51200449466705, - "p95": 156.8319946527481, - "p99": 195.64799964427948 + "p50": 147.61599898338318, + "p90": 155.32800555229187, + "p95": 156.73600137233734, + "p99": 162.91199624538422 }, "isolatedSum": { - "p50": 138.20800185203552, - "p90": 166.07999801635742, - "p95": 190.0480017066002, - "p99": 236.54400557279587 + "p50": 176.9920065999031, + "p90": 187.3600035905838, + "p95": 205.24800568819046, + "p99": 388.1920129060745 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 1, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 81.4720019698143, - "p90": 136.48000359535217, - "p95": 151.13599598407745, - "p99": 198.04799556732178 + "p50": 72.80000299215317, + "p90": 102.7199998497963, + "p95": 104.89600151777267, + "p99": 109.66400057077408 }, "combine": { - "p50": 75.80800354480743, - "p90": 89.47200328111649, - "p95": 102.91200131177902, - "p99": 122.36800044775009 + "p50": 73.15199822187424, + "p90": 81.44000172615051, + "p95": 81.88799768686295, + "p99": 82.91199803352356 }, "roundtrip": { - "p50": 134.5279961824417, - "p90": 149.31200444698334, - "p95": 162.9440039396286, - "p99": 204.73599433898926 + "p50": 129.4720023870468, + "p90": 153.3759981393814, + "p95": 156.15999698638916, + "p99": 164.92800414562225 }, "isolatedSum": { - "p50": 157.28000551462173, - "p90": 225.95200687646866, - "p95": 254.04799729585648, - "p99": 320.41599601507187 + "p50": 145.9520012140274, + "p90": 184.1600015759468, + "p95": 186.78399920463562, + "p99": 192.57599860429764 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 7, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 124.60800260305405, - "p90": 131.80799782276154, - "p95": 137.2160017490387, - "p99": 164.35199975967407 + "p50": 76.25599950551987, + "p90": 102.62399911880493, + "p95": 105.24799674749374, + "p99": 109.47199910879135 }, "combine": { - "p50": 111.00800335407257, - "p90": 119.39200013875961, - "p95": 125.5359947681427, - "p99": 155.03999590873718 + "p50": 73.31199944019318, + "p90": 81.4720019698143, + "p95": 86.20800077915192, + "p99": 89.34400230646133 }, "roundtrip": { - "p50": 208.41600000858307, - "p90": 218.6560034751892, - "p95": 229.72799837589264, - "p99": 263.3279860019684 + "p50": 129.56799566745758, + "p90": 157.9200029373169, + "p95": 160.35200655460358, + "p99": 166.04800522327423 }, "isolatedSum": { - "p50": 235.61600595712662, - "p90": 251.19999796152115, - "p95": 262.7519965171814, - "p99": 319.39199566841125 + "p50": 149.56799894571304, + "p90": 184.09600108861923, + "p95": 191.45599752664566, + "p99": 198.81600141525269 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 7, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-fcadbf18", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", - "colorKey": "h200_ae2e3cef", - "comparisonKey": "7784b2ab75c0721c", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:36.495887+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups@s3", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272365812", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272365812", - "createdAt": "2026-06-27T00:12:17Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 71.48800045251846, - "p90": 94.46399658918381, - "p95": 99.29600358009338, - "p99": 110.23999750614166 + "p50": 76.83199644088745, + "p90": 101.79200023412704, + "p95": 105.02400249242783, + "p99": 109.31199789047241 }, "combine": { - "p50": 67.10399687290192, - "p90": 80.09599894285202, - "p95": 84.54400300979614, - "p99": 108.31999778747559 + "p50": 73.5040009021759, + "p90": 82.04799890518188, + "p95": 86.40000224113464, + "p99": 88.54400366544724 }, "roundtrip": { - "p50": 119.61600184440613, - "p90": 148.83199334144592, - "p95": 158.01599621772766, - "p99": 279.9359858036041 + "p50": 130.23999333381653, + "p90": 159.39199924468994, + "p95": 161.82400286197662, + "p99": 165.98400473594666 }, "isolatedSum": { - "p50": 138.59199732542038, - "p90": 174.55999553203583, - "p95": 183.84000658988953, - "p99": 218.55999529361725 + "p50": 150.33599734306335, + "p90": 183.83999913930893, + "p95": 191.42400473356247, + "p99": 197.85600155591965 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.00000083446503, + "p90": 104.73600029945374, + "p95": 108.51199924945831, + "p99": 115.74400216341019 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 87.23200112581253, + "p95": 88.51200342178345, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 135.1040005683899, + "p90": 161.40800714492798, + "p95": 164.5440012216568, + "p99": 169.50400173664093 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 191.96800142526627, + "p95": 197.02400267124176, + "p99": 205.76000213623047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -44363,34 +45729,71 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 80.83199709653854, - "p90": 99.45599734783173, - "p95": 105.05600273609161, - "p99": 118.07999759912491 + "p50": 90.65599739551544, + "p90": 102.75200009346008, + "p95": 105.69600015878677, + "p99": 109.37599837779999 }, "combine": { - "p50": 75.23199915885925, - "p90": 87.52000331878662, - "p95": 92.0960009098053, - "p99": 108.51199924945831 + "p50": 81.60000294446945, + "p90": 90.59199690818787, + "p95": 95.32800316810608, + "p99": 97.47199714183807 }, "roundtrip": { - "p50": 133.91999900341034, - "p90": 154.78399395942688, - "p95": 162.04799711704254, - "p99": 176.1920005083084 + "p50": 145.1839953660965, + "p90": 165.56799411773682, + "p95": 168.5439944267273, + "p99": 174.68799650669098 }, "isolatedSum": { - "p50": 156.0639962553978, - "p90": 186.97600066661835, - "p95": 197.1520036458969, - "p99": 226.59199684858322 + "p50": 172.2560003399849, + "p90": 193.34399700164795, + "p95": 201.02400332689285, + "p99": 206.84799551963806 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.43999755382538, + "p90": 116.89600348472595, + "p95": 119.77600306272507, + "p99": 138.7840062379837 + }, + "combine": { + "p50": 90.59199690818787, + "p90": 103.35999727249146, + "p95": 104.3199971318245, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 168.7680035829544, + "p90": 185.88800728321075, + "p95": 188.6720061302185, + "p99": 193.37600469589233 + }, + "isolatedSum": { + "p50": 192.03199446201324, + "p90": 220.2560007572174, + "p95": 224.09600019454956, + "p99": 244.704008102417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -44400,35 +45803,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 125.15200674533844, - "p90": 135.96799969673157, - "p95": 141.79199934005737, - "p99": 205.34400641918182 + "p50": 122.5920021533966, + "p90": 134.91199910640717, + "p95": 136.9280070066452, + "p99": 143.64799857139587 }, "combine": { - "p50": 109.72800105810165, - "p90": 120.15999853610992, - "p95": 123.36000055074692, - "p99": 136.7039978504181 + "p50": 115.07199704647064, + "p90": 128.63999605178833, + "p95": 130.40000200271606, + "p99": 139.71200585365295 }, "roundtrip": { - "p50": 207.96799659729004, - "p90": 225.50399601459503, - "p95": 231.77599906921387, - "p99": 246.20799720287323 + "p50": 215.5199944972992, + "p90": 233.66400599479675, + "p95": 235.35999655723572, + "p99": 240.12799561023712 }, "isolatedSum": { - "p50": 234.8800078034401, - "p90": 256.1279982328415, - "p95": 265.1519998908043, - "p99": 342.0480042695999 + "p50": 237.66399919986725, + "p90": 263.5519951581955, + "p95": 267.32800900936127, + "p99": 283.3600044250488 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 5, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44436,16 +45839,16 @@ ] }, { - "id": "cx-f361a9a4", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", - "colorKey": "h200_b5c683eb", - "comparisonKey": "d82096ba4baa0cd5", + "id": "cx-6d1780ec", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_e8b903ea", + "comparisonKey": "0d93a7b7a0fcf6d0", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:27.284944+00:00", + "generatedAt": "2026-06-27T00:00:17.527263+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -44453,20 +45856,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", + "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -44488,18 +45892,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2279937619f3971", - "workloadId": "set:4:7af12818400d6348", + "traceSignature": "16babcaf4204243", + "workloadId": "set:8:289b7f9c14292e96", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271830346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271830346", - "createdAt": "2026-06-26T23:54:59Z", + "id": "28271962037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271962037", + "createdAt": "2026-06-27T00:00:17.527263+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -44507,72 +45911,183 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 76.73600316047668, - "p90": 126.24000012874603, - "p95": 134.46399569511414, - "p99": 156.63999319076538 + "p50": 98.55999797582626, + "p90": 106.33599758148193, + "p95": 108.51199924945831, + "p99": 113.21599781513214 }, "combine": { - "p50": 72.41600006818771, - "p90": 83.71199667453766, - "p95": 87.07199990749359, - "p99": 99.13600236177444 + "p50": 79.39200103282928, + "p90": 81.85599744319916, + "p95": 82.56000280380249, + "p99": 87.10400015115738 }, "roundtrip": { - "p50": 128.38399410247803, - "p90": 148.03199470043182, - "p95": 154.62400019168854, - "p99": 179.6479970216751 + "p50": 145.50399780273438, + "p90": 154.7199934720993, + "p95": 156.8640023469925, + "p99": 160.7999950647354 }, "isolatedSum": { - "p50": 149.1520032286644, - "p90": 209.9519968032837, - "p95": 221.53599560260773, - "p99": 255.77599555253983 + "p50": 177.95199900865555, + "p90": 188.1919950246811, + "p95": 191.0720020532608, + "p99": 200.31999796628952 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 0, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 75.83999633789062, - "p90": 87.99999952316284, - "p95": 98.11200201511383, - "p99": 113.02399635314941 + "p50": 73.60000163316727, + "p90": 108.31999778747559, + "p95": 109.66400057077408, + "p99": 115.13599753379822 }, "combine": { - "p50": 71.84000313282013, - "p90": 79.6160027384758, - "p95": 85.56800335645676, - "p99": 95.87199985980988 + "p50": 72.51200079917908, + "p90": 81.60000294446945, + "p95": 82.36800134181976, + "p99": 87.20000088214874 }, "roundtrip": { - "p50": 126.81600451469421, - "p90": 139.67999815940857, - "p95": 149.63200688362122, - "p99": 170.20800709724426 + "p50": 129.05600666999817, + "p90": 156.47999942302704, + "p95": 160.0639969110489, + "p99": 162.1759980916977 }, "isolatedSum": { - "p50": 147.67999947071075, - "p90": 167.61600226163864, - "p95": 183.6800053715706, - "p99": 208.8959962129593 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 0, + "p50": 146.11200243234634, + "p90": 189.92000073194504, + "p95": 192.03200191259384, + "p99": 202.33599841594696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.120001912117, + "p90": 103.87200117111206, + "p95": 105.66399991512299, + "p99": 110.68800091743469 + }, + "combine": { + "p50": 79.55200225114822, + "p90": 82.20800012350082, + "p95": 86.30400151014328, + "p99": 88.3840024471283 + }, + "roundtrip": { + "p50": 151.32799744606018, + "p90": 159.61599349975586, + "p95": 161.15200519561768, + "p99": 167.71200299263 + }, + "isolatedSum": { + "p50": 176.67200416326523, + "p90": 186.08000129461288, + "p95": 191.96800142526627, + "p99": 199.072003364563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.83199971914291, + "p90": 103.07200253009796, + "p95": 104.47999835014343, + "p99": 111.48799955844879 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 82.49600231647491, + "p95": 87.0399996638298, + "p99": 88.76799792051315 + }, + "roundtrip": { + "p50": 152.38399803638458, + "p90": 159.96800363063812, + "p95": 162.20800578594208, + "p99": 166.59200191497803 + }, + "isolatedSum": { + "p50": 176.32000148296356, + "p90": 185.56800484657288, + "p95": 191.51999801397324, + "p99": 200.25599747896194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.92800045013428, + "p90": 102.01600193977356, + "p95": 104.76800054311752, + "p99": 113.02399635314941 + }, + "combine": { + "p50": 80.86399734020233, + "p90": 88.3840024471283, + "p95": 89.63199704885483, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 153.21600437164307, + "p90": 159.39199924468994, + "p95": 160.8320027589798, + "p99": 165.3759926557541 + }, + "isolatedSum": { + "p50": 177.7919977903366, + "p90": 190.40000438690186, + "p95": 194.39999759197235, + "p99": 207.67999440431595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44581,34 +46096,71 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 92.73599833250046, - "p90": 105.50399869680405, - "p95": 112.41599917411804, - "p99": 132.60799646377563 + "p50": 91.5519967675209, + "p90": 105.27999699115753, + "p95": 106.52799904346466, + "p99": 110.55999994277954 }, "combine": { - "p50": 81.98399841785431, - "p90": 93.56799721717834, - "p95": 99.58399832248688, - "p99": 112.57600039243698 + "p50": 81.216000020504, + "p90": 90.17600119113922, + "p95": 94.33600306510925, + "p99": 96.79999947547913 }, "roundtrip": { - "p50": 148.70400726795197, - "p90": 168.7999963760376, - "p95": 180.7679980993271, - "p99": 196.6720074415207 + "p50": 144.1279947757721, + "p90": 167.52000153064728, + "p95": 168.99199783802032, + "p99": 173.567995429039 }, "isolatedSum": { - "p50": 174.71999675035477, - "p90": 199.0719959139824, - "p95": 211.99999749660492, - "p99": 245.18399685621262 + "p50": 172.7679967880249, + "p90": 195.45599818229675, + "p95": 200.8640021085739, + "p99": 207.35999941825867 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.80000078678131, + "p90": 116.35199934244156, + "p95": 118.81600320339203, + "p99": 122.97599762678146 + }, + "combine": { + "p50": 96.38399630784988, + "p90": 104.00000214576721, + "p95": 104.5759990811348, + "p99": 106.4319983124733 + }, + "roundtrip": { + "p50": 177.76000499725342, + "p90": 185.44000387191772, + "p95": 187.16800212860107, + "p99": 190.3039962053299 + }, + "isolatedSum": { + "p50": 201.1839970946312, + "p90": 220.35200148820877, + "p95": 223.39200228452682, + "p99": 229.40799593925476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -44618,35 +46170,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 139.00800049304962, - "p90": 175.99999904632568, - "p95": 181.34400248527527, - "p99": 197.91999459266663 + "p50": 113.56800049543381, + "p90": 131.58400356769562, + "p95": 133.66399705410004, + "p99": 139.96799290180206 }, "combine": { - "p50": 127.20000743865967, - "p90": 150.68799257278442, - "p95": 153.6639928817749, - "p99": 160.5439931154251 + "p50": 106.55999928712845, + "p90": 119.55200135707855, + "p95": 120.09599804878235, + "p99": 121.05599790811539 }, "roundtrip": { - "p50": 232.92799293994904, - "p90": 266.04801416397095, - "p95": 271.5199887752533, - "p99": 294.20799016952515 + "p50": 198.46400618553162, + "p90": 217.6000028848648, + "p95": 218.75199675559998, + "p99": 224.2880016565323 }, "isolatedSum": { - "p50": 266.2080079317093, - "p90": 326.6879916191101, - "p95": 335.00799536705017, - "p99": 358.46398770809174 + "p50": 220.12799978256226, + "p90": 251.13600492477417, + "p95": 253.75999510288239, + "p99": 261.02399080991745 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 3, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44654,16 +46206,16 @@ ] }, { - "id": "cx-d65f5a76", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", - "colorKey": "h200_b5c683eb", - "comparisonKey": "d82096ba4baa0cd5", + "id": "cx-9d829c00", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_552a4b73", + "comparisonKey": "95c165fc74bc43c0", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:47.642624+00:00", + "generatedAt": "2026-06-27T00:00:35.674306+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -44671,17 +46223,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", + "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -44706,8 +46259,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:120a8dc1dba92ca9", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -44715,9 +46268,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272028751", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272028751", - "createdAt": "2026-06-27T00:01:16Z", + "id": "28271971983", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271971983", + "createdAt": "2026-06-27T00:00:35.674306+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -44725,35 +46278,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 75.99999755620956, - "p90": 106.175996363163, - "p95": 117.60000139474869, - "p99": 352.512001991272 + "p50": 95.8079993724823, + "p90": 103.10400277376175, + "p95": 104.16000336408615, + "p99": 110.01600325107574 }, "combine": { - "p50": 70.68800181150436, - "p90": 85.9839990735054, - "p95": 90.52799642086029, - "p99": 104.12800312042236 + "p50": 74.33599978685379, + "p90": 81.56800270080566, + "p95": 81.98399841785431, + "p99": 83.29600095748901 }, "roundtrip": { - "p50": 124.60800260305405, - "p90": 158.62399339675903, - "p95": 166.46400094032288, - "p99": 186.27199530601501 + "p50": 142.2719955444336, + "p90": 148.67199957370758, + "p95": 150.4639983177185, + "p99": 154.11199629306793 }, "isolatedSum": { - "p50": 146.68799936771393, - "p90": 192.1599954366684, - "p95": 208.12799781560898, - "p99": 456.64000511169434 + "p50": 170.1439991593361, + "p90": 184.6720054745674, + "p95": 186.14400178194046, + "p99": 193.31200420856476 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, "recvTokensMax": 8, - "stragglerRank": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44762,35 +46315,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 74.65600222349167, - "p90": 98.91200065612793, - "p95": 104.09600287675858, - "p99": 114.84800279140472 + "p50": 70.88000327348709, + "p90": 101.98400169610977, + "p95": 102.94400155544281, + "p99": 106.01600259542465 }, "combine": { - "p50": 70.65600156784058, - "p90": 87.20000088214874, - "p95": 91.32800251245499, - "p99": 106.46399855613708 + "p50": 72.4480003118515, + "p90": 81.40800148248672, + "p95": 81.95199817419052, + "p99": 85.7279971241951 }, "roundtrip": { - "p50": 125.59999525547028, - "p90": 161.02400422096252, - "p95": 170.78399658203125, - "p99": 197.05599546432495 + "p50": 128.7039965391159, + "p90": 147.71200716495514, + "p95": 149.59999918937683, + "p99": 152.79999375343323 }, "isolatedSum": { - "p50": 145.31200379133224, - "p90": 186.11200153827667, - "p95": 195.42400538921356, - "p99": 221.3120013475418 + "p50": 143.3280035853386, + "p90": 183.3920031785965, + "p95": 184.89599972963333, + "p99": 191.74399971961975 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, "recvTokensMax": 16, - "stragglerRank": 3, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44799,35 +46352,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 74.20799881219864, - "p90": 102.24000364542007, - "p95": 111.35999858379364, - "p99": 129.63199615478516 + "p50": 73.18399846553802, + "p90": 93.82399916648865, + "p95": 96.41599655151367, + "p99": 104.99200224876404 }, "combine": { - "p50": 71.87200337648392, - "p90": 88.22400122880936, - "p95": 94.52799707651138, - "p99": 115.26399850845337 + "p50": 70.8480030298233, + "p90": 77.82399654388428, + "p95": 78.59200239181519, + "p99": 83.45600217580795 }, "roundtrip": { - "p50": 125.08800625801086, - "p90": 153.53600680828094, - "p95": 163.87200355529785, - "p99": 176.86399817466736 + "p50": 125.44000148773193, + "p90": 151.74399316310883, + "p95": 154.1759967803955, + "p99": 160.09600460529327 }, "isolatedSum": { - "p50": 146.08000218868256, - "p90": 190.46400487422943, - "p95": 205.88799566030502, - "p99": 244.89599466323853 + "p50": 144.03200149536133, + "p90": 171.64799571037292, + "p95": 175.00799894332886, + "p99": 188.448004424572 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, "recvTokensMax": 32, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44836,35 +46389,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.30399954319, - "p90": 98.49599748849869, - "p95": 106.59199953079224, - "p99": 120.19199877977371 + "p50": 75.83999633789062, + "p90": 100.22400319576263, + "p95": 102.39999741315842, + "p99": 107.4879989027977 }, "combine": { - "p50": 71.16799801588058, - "p90": 86.36800199747086, - "p95": 90.52799642086029, - "p99": 109.40799862146378 + "p50": 73.18399846553802, + "p90": 81.44000172615051, + "p95": 82.24000036716461, + "p99": 87.23200112581253 }, "roundtrip": { - "p50": 124.64000284671783, - "p90": 156.73600137233734, - "p95": 164.48000073432922, - "p99": 189.15200233459473 + "p50": 126.27199292182922, + "p90": 154.88000214099884, + "p95": 157.47199952602386, + "p99": 159.4880074262619 }, "isolatedSum": { - "p50": 145.4719975590706, - "p90": 184.86399948596954, - "p95": 197.11999595165253, - "p99": 229.5999974012375 + "p50": 149.02399480342865, + "p90": 181.66400492191315, + "p95": 184.63999778032303, + "p99": 194.72000002861023 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, "recvTokensMax": 64, - "stragglerRank": 3, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44873,35 +46426,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 75.26399940252304, - "p90": 102.59199887514114, - "p95": 109.76000130176544, - "p99": 125.59999525547028 + "p50": 79.3600007891655, + "p90": 100.0640019774437, + "p95": 123.80799651145935, + "p99": 229.76000607013702 }, "combine": { - "p50": 76.92799717187881, - "p90": 91.23200178146362, - "p95": 94.94400024414062, - "p99": 105.82400113344193 + "p50": 73.88799637556076, + "p90": 82.2720006108284, + "p95": 83.36000144481659, + "p99": 89.28000181913376 }, "roundtrip": { - "p50": 128.7039965391159, - "p90": 160.51200032234192, - "p95": 171.07200622558594, - "p99": 223.13599288463593 + "p50": 130.17599284648895, + "p90": 154.62400019168854, + "p95": 157.3760062456131, + "p99": 162.7199947834015 }, "isolatedSum": { - "p50": 152.19199657440186, - "p90": 193.82400065660477, - "p95": 204.70400154590607, - "p99": 231.4239963889122 + "p50": 153.24799716472626, + "p90": 182.3360025882721, + "p95": 207.16799795627594, + "p99": 319.0400078892708 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 4, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44910,35 +46463,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 90.59199690818787, - "p90": 110.52799969911575, - "p95": 119.00799721479416, - "p99": 143.39199662208557 + "p50": 88.44800293445587, + "p90": 103.71199995279312, + "p95": 105.76000064611435, + "p99": 110.1439967751503 }, "combine": { - "p50": 81.53600245714188, - "p90": 98.11200201511383, - "p95": 105.79200088977814, - "p99": 123.4240010380745 + "p50": 81.60000294446945, + "p90": 89.6959975361824, + "p95": 90.27200192213058, + "p99": 91.80799871683121 }, "roundtrip": { - "p50": 145.4080045223236, - "p90": 173.0239987373352, - "p95": 180.4479956626892, - "p99": 203.45599949359894 + "p50": 141.34399592876434, + "p90": 161.98399662971497, + "p95": 163.455992937088, + "p99": 169.24799978733063 }, "isolatedSum": { - "p50": 172.12799936532974, - "p90": 208.64000171422958, - "p95": 224.7999981045723, - "p99": 266.81599766016006 + "p50": 170.04800587892532, + "p90": 193.40799748897552, + "p95": 196.03200256824493, + "p99": 201.9519954919815 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 3, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44947,35 +46500,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 110.84800213575363, - "p90": 130.17599284648895, - "p95": 137.56799697875977, - "p99": 212.12799847126007 + "p50": 102.36799716949463, + "p90": 119.6800023317337, + "p95": 121.31199985742569, + "p99": 123.77600371837616 }, "combine": { - "p50": 95.13600170612335, - "p90": 114.20799791812897, - "p95": 124.57600235939026, - "p99": 243.42399835586548 + "p50": 89.9839997291565, + "p90": 96.03200107812881, + "p95": 99.48799759149551, + "p99": 102.04800218343735 }, "roundtrip": { - "p50": 178.14399302005768, - "p90": 205.24799823760986, - "p95": 233.40800404548645, - "p99": 432.2560131549835 + "p50": 165.69599509239197, + "p90": 182.43199586868286, + "p95": 184.1599941253662, + "p99": 187.51999735832214 }, "isolatedSum": { - "p50": 205.98400384187698, - "p90": 244.38399076461792, - "p95": 262.14399933815, - "p99": 455.55199682712555 + "p50": 192.35199689865112, + "p90": 215.71200340986252, + "p95": 220.7999974489212, + "p99": 225.8240059018135 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 0, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -44984,35 +46537,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 133.44000279903412, - "p90": 152.48000621795654, - "p95": 157.95199573040009, - "p99": 172.2240000963211 + "p50": 125.91999769210815, + "p90": 144.70399916172028, + "p95": 145.9520012140274, + "p99": 148.00000190734863 }, "combine": { - "p50": 125.72799623012543, - "p90": 140.60799777507782, - "p95": 145.31199634075165, - "p99": 176.7359972000122 + "p50": 114.56000059843063, + "p90": 119.99999731779099, + "p95": 122.30399996042252, + "p99": 126.91199779510498 }, "roundtrip": { - "p50": 237.2480034828186, - "p90": 255.51998615264893, - "p95": 262.65600323677063, - "p99": 295.9040105342865 + "p50": 218.9760059118271, + "p90": 233.63199830055237, + "p95": 235.1360023021698, + "p99": 238.304004073143 }, "isolatedSum": { - "p50": 259.16799902915955, - "p90": 293.08800399303436, - "p95": 303.26399207115173, - "p99": 348.9599972963333 + "p50": 240.4799982905388, + "p90": 264.70399647951126, + "p95": 268.2560011744499, + "p99": 274.9119997024536 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45020,16 +46573,16 @@ ] }, { - "id": "cx-26bc6c27", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", - "colorKey": "h200_d0dfa19a", - "comparisonKey": "5d5c9be2dc9b5f1f", + "id": "cx-c61b6088", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_106a51ab", + "comparisonKey": "6643ae5a97d68820", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:33.428125+00:00", + "generatedAt": "2026-06-27T00:00:43.354862+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -45037,20 +46590,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -45072,18 +46626,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d02a66236b524b8", - "workloadId": "set:4:2eebbed158fe1320", + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:120a8dc1dba92ca9", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271837870", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271837870", - "createdAt": "2026-06-26T23:55:13Z", + "id": "28271975554", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271975554", + "createdAt": "2026-06-27T00:00:43.354862+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -45091,34 +46645,108 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 65.95200300216675, - "p90": 102.7199998497963, - "p95": 115.55200070142746, - "p99": 166.6560024023056 + "p50": 69.72800195217133, + "p90": 76.7040029168129, + "p95": 82.24000036716461, + "p99": 100.09600222110748 }, "combine": { - "p50": 58.6559996008873, - "p90": 72.4480003118515, - "p95": 78.59200239181519, - "p99": 95.64799815416336 + "p50": 70.78400254249573, + "p90": 73.11999797821045, + "p95": 73.53600114583969, + "p99": 78.3040001988411 }, "roundtrip": { - "p50": 112.44799941778183, - "p90": 152.70400047302246, - "p95": 159.2320054769516, - "p99": 181.2479943037033 + "p50": 124.35200065374374, + "p90": 129.88799810409546, + "p95": 131.20000064373016, + "p99": 137.40800321102142 }, "isolatedSum": { - "p50": 124.60800260305405, - "p90": 175.1680001616478, - "p95": 194.14400309324265, - "p99": 262.30400055646896 + "p50": 140.51200449466705, + "p90": 149.82400089502335, + "p95": 155.7760015130043, + "p99": 178.40000241994858 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.92000341415405, + "p90": 77.79199630022049, + "p95": 80.19199967384338, + "p99": 96.19200229644775 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 73.27999919652939, + "p95": 73.85600358247757, + "p99": 78.94399762153625 + }, + "roundtrip": { + "p50": 126.94400548934937, + "p90": 130.91200590133667, + "p95": 132.1280002593994, + "p99": 138.33600282669067 + }, + "isolatedSum": { + "p50": 141.08800143003464, + "p90": 151.07199549674988, + "p95": 154.04800325632095, + "p99": 175.135999917984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.07999783754349, + "p90": 101.34399682283401, + "p95": 103.13600301742554, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 82.0159986615181, + "p95": 87.00799942016602, + "p99": 89.31200206279755 + }, + "roundtrip": { + "p50": 131.32800161838531, + "p90": 158.59200060367584, + "p95": 163.13600540161133, + "p99": 169.69600319862366 + }, + "isolatedSum": { + "p50": 147.07199484109879, + "p90": 183.3599954843521, + "p95": 190.14400243759155, + "p99": 200.70400089025497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -45128,35 +46756,72 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 67.9360032081604, - "p90": 85.66399663686752, - "p95": 95.0080007314682, - "p99": 111.00800335407257 + "p50": 75.6480023264885, + "p90": 100.76799988746643, + "p95": 102.01600193977356, + "p99": 105.95200210809708 }, "combine": { - "p50": 59.93599817156792, - "p90": 70.88000327348709, - "p95": 77.18399912118912, - "p99": 92.03200042247772 + "p50": 72.9919970035553, + "p90": 79.68000322580338, + "p95": 80.6720033288002, + "p99": 85.88799834251404 }, "roundtrip": { - "p50": 112.2559979557991, - "p90": 138.11199367046356, - "p95": 150.2400040626526, - "p99": 209.6319943666458 + "p50": 129.63199615478516, + "p90": 154.91199493408203, + "p95": 156.47999942302704, + "p99": 159.96800363063812 }, "isolatedSum": { - "p50": 127.87200137972832, - "p90": 156.54399991035461, - "p95": 172.19199985265732, - "p99": 203.0400037765503 + "p50": 148.6399993300438, + "p90": 180.4480031132698, + "p95": 182.68800526857376, + "p99": 191.84000045061111 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.49600231647491, + "p90": 100.73599964380264, + "p95": 103.04000228643417, + "p99": 106.81600123643875 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 87.0399996638298, + "p95": 87.90399879217148, + "p99": 89.63199704885483 + }, + "roundtrip": { + "p50": 132.38400220870972, + "p90": 161.02400422096252, + "p95": 162.81600296497345, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 156.8640023469925, + "p90": 187.77599930763245, + "p95": 190.94400107860565, + "p99": 196.44799828529358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45165,35 +46830,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 82.97599852085114, - "p90": 103.71199995279312, - "p95": 112.15999722480774, - "p99": 135.23200154304504 + "p50": 90.30400216579437, + "p90": 103.32799702882767, + "p95": 104.35199737548828, + "p99": 109.6000000834465 }, "combine": { - "p50": 69.76000219583511, - "p90": 85.05599945783615, - "p95": 93.88799965381622, - "p99": 128.60800325870514 + "p50": 81.31200075149536, + "p90": 89.75999802350998, + "p95": 90.43200314044952, + "p99": 91.61599725484848 }, "roundtrip": { - "p50": 125.56800246238708, - "p90": 148.70400726795197, - "p95": 165.92000424861908, - "p99": 200.3519982099533 + "p50": 142.20799505710602, + "p90": 158.65600109100342, + "p95": 161.50400042533875, + "p99": 167.39200055599213 }, "isolatedSum": { - "p50": 152.73600071668625, - "p90": 188.76799941062927, - "p95": 206.04799687862396, - "p99": 263.8400048017502 + "p50": 171.61600291728973, + "p90": 193.08799505233765, + "p95": 194.7840005159378, + "p99": 201.21599733829498 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 1, - "recvTokensMax": 32, - "stragglerRank": 7, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.89600020647049, + "p90": 116.60800129175186, + "p95": 118.43200027942657, + "p99": 124.32000041007996 + }, + "combine": { + "p50": 90.30400216579437, + "p90": 103.32799702882767, + "p95": 103.74400019645691, + "p99": 104.25599664449692 + }, + "roundtrip": { + "p50": 162.08000481128693, + "p90": 178.8800060749054, + "p95": 181.85600638389587, + "p99": 186.49600446224213 + }, + "isolatedSum": { + "p50": 187.20000237226486, + "p90": 219.93599832057953, + "p95": 222.17600047588348, + "p99": 228.57599705457687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45202,35 +46904,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 83.39200168848038, - "p90": 109.27999764680862, - "p95": 120.28799951076508, - "p99": 153.18399667739868 + "p50": 118.07999759912491, + "p90": 135.3279948234558, + "p95": 138.2399946451187, + "p99": 140.57600498199463 }, "combine": { - "p50": 69.50400024652481, - "p90": 82.87999778985977, - "p95": 90.27200192213058, - "p99": 100.89600086212158 + "p50": 106.84800148010254, + "p90": 119.45600062608719, + "p95": 119.74400281906128, + "p99": 120.54400146007538 }, "roundtrip": { - "p50": 128.67200374603271, - "p90": 153.53600680828094, - "p95": 162.62400150299072, - "p99": 190.65600633621216 + "p50": 198.84799420833588, + "p90": 216.2880003452301, + "p95": 219.67999637126923, + "p99": 221.47199511528015 }, "isolatedSum": { - "p50": 152.8960019350052, - "p90": 192.1599954366684, - "p95": 210.56000143289566, - "p99": 254.07999753952026 + "p50": 224.92799907922745, + "p90": 254.783995449543, + "p95": 257.98399746418, + "p99": 261.12000644207 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 7, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45238,16 +46940,16 @@ ] }, { - "id": "cx-b2e52442", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", - "colorKey": "h200_06544e53", - "comparisonKey": "57040e121807e028", + "id": "cx-a38d13e8", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_769b9c4b", + "comparisonKey": "115d84ad1ee38d09", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:47.649756+00:00", + "generatedAt": "2026-06-27T00:00:11.807854+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -45255,17 +46957,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, @@ -45290,18 +46993,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f0e66a15078595b", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272031884", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272031884", - "createdAt": "2026-06-27T00:01:23Z", + "id": "28271948775", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775", + "createdAt": "2026-06-27T00:00:11.807854+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -45309,35 +47012,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 68.15999746322632, - "p90": 100.60799866914749, - "p95": 110.72000116109848, - "p99": 138.75199854373932 + "p50": 70.39999961853027, + "p90": 100.832000374794, + "p95": 105.56799918413162, + "p99": 192.73599982261658 }, "combine": { - "p50": 60.70400029420853, - "p90": 72.86400347948074, - "p95": 79.3600007891655, - "p99": 86.11200004816055 + "p50": 73.18399846553802, + "p90": 88.44800293445587, + "p95": 188.38399648666382, + "p99": 344.2560136318207 }, "roundtrip": { - "p50": 116.92799627780914, - "p90": 150.2079963684082, - "p95": 158.6879938840866, - "p99": 184.83200669288635 + "p50": 123.77600371837616, + "p90": 133.08799266815186, + "p95": 149.4400054216385, + "p99": 156.12800419330597 }, "isolatedSum": { - "p50": 128.86399775743484, - "p90": 173.47200214862823, - "p95": 190.08000195026398, - "p99": 224.86399859189987 + "p50": 143.5839980840683, + "p90": 189.28000330924988, + "p95": 293.95199567079544, + "p99": 536.9920134544373 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 2, - "recvTokensMax": 3, - "stragglerRank": 5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45346,35 +47049,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 71.29599899053574, - "p90": 104.54399883747101, - "p95": 113.8560026884079, - "p99": 152.99199521541595 + "p50": 68.38399916887283, + "p90": 75.71200281381607, + "p95": 77.11999863386154, + "p99": 95.61599791049957 }, "combine": { - "p50": 61.983998864889145, - "p90": 78.97599786520004, - "p95": 83.5840031504631, - "p99": 98.49599748849869 + "p50": 71.29599899053574, + "p90": 73.44000041484833, + "p95": 74.36800003051758, + "p99": 82.2720006108284 }, "roundtrip": { - "p50": 116.5120005607605, - "p90": 149.88799393177032, - "p95": 163.71199488639832, - "p99": 195.45599818229675 + "p50": 126.68800354003906, + "p90": 130.87999820709229, + "p95": 133.56800377368927, + "p99": 142.59199798107147 }, "isolatedSum": { - "p50": 133.27999785542488, - "p90": 183.51999670267105, - "p95": 197.440005838871, - "p99": 251.48799270391464 + "p50": 139.67999815940857, + "p90": 149.1520032286644, + "p95": 151.48799866437912, + "p99": 177.88799852132797 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 2, - "recvTokensMax": 6, - "stragglerRank": 5, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45383,35 +47086,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 72.4480003118515, - "p90": 95.04000097513199, - "p95": 104.63999956846237, - "p99": 125.40799379348755 + "p50": 72.54400104284286, + "p90": 99.2640033364296, + "p95": 102.08000242710114, + "p99": 107.39199817180634 }, "combine": { - "p50": 61.664000153541565, - "p90": 73.02399724721909, - "p95": 81.82399719953537, - "p99": 99.58399832248688 + "p50": 72.9919970035553, + "p90": 79.71200346946716, + "p95": 84.22400057315826, + "p99": 87.39200234413147 }, "roundtrip": { - "p50": 117.08799749612808, - "p90": 144.41600441932678, - "p95": 157.72800147533417, - "p99": 314.88001346588135 + "p50": 130.23999333381653, + "p90": 156.41599893569946, + "p95": 160.22400557994843, + "p99": 165.53600132465363 }, "isolatedSum": { - "p50": 134.11200046539307, - "p90": 168.06399822235107, - "p95": 186.46399676799774, - "p99": 224.99199211597443 + "p50": 145.53599804639816, + "p90": 178.97600680589676, + "p95": 186.3040030002594, + "p99": 194.7840005159378 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 2, - "recvTokensMax": 12, - "stragglerRank": 7, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45420,34 +47123,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.95199686288834, - "p90": 99.2640033364296, - "p95": 105.8880016207695, - "p99": 122.27199971675873 + "p50": 96.19200229644775, + "p90": 109.56799983978271, + "p95": 112.73600161075592, + "p99": 155.87200224399567 }, "combine": { - "p50": 62.6240000128746, - "p90": 84.25600081682205, - "p95": 90.11200070381165, - "p99": 102.78400033712387 + "p50": 75.45600086450577, + "p90": 88.06400001049042, + "p95": 89.4400030374527, + "p99": 97.37599641084671 }, "roundtrip": { - "p50": 116.15999788045883, - "p90": 150.36800503730774, - "p95": 161.69600188732147, - "p99": 189.08800184726715 + "p50": 130.94399869441986, + "p90": 154.4319987297058, + "p95": 156.44800662994385, + "p99": 176.67199671268463 }, "isolatedSum": { - "p50": 136.57599687576294, - "p90": 183.52000415325165, - "p95": 196.00000232458115, - "p99": 225.0560000538826 + "p50": 171.64800316095352, + "p90": 197.63199985027313, + "p95": 202.17600464820862, + "p99": 253.24799865484238 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 2, - "recvTokensMax": 24, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -45457,35 +47160,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 75.87199658155441, - "p90": 96.70399874448776, - "p95": 106.01600259542465, - "p99": 122.94399738311768 + "p50": 82.97599852085114, + "p90": 100.16000270843506, + "p95": 103.55199873447418, + "p99": 106.72000050544739 }, "combine": { - "p50": 62.94400244951248, - "p90": 78.65600287914276, - "p95": 84.73599702119827, - "p99": 96.6079980134964 + "p50": 74.14399832487106, + "p90": 87.3280018568039, + "p95": 88.95999938249588, + "p99": 89.82399851083755 }, "roundtrip": { - "p50": 117.15199798345566, - "p90": 145.11999487876892, - "p95": 153.47200632095337, - "p99": 190.75199961662292 + "p50": 131.6480040550232, + "p90": 158.9760035276413, + "p95": 161.31199896335602, + "p99": 166.78400337696075 }, "isolatedSum": { - "p50": 138.8159990310669, - "p90": 175.36000162363052, - "p95": 190.75199961662292, - "p99": 219.55199539661407 + "p50": 157.1199968457222, + "p90": 187.48800456523895, + "p95": 192.51199811697006, + "p99": 196.54399901628494 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 2, - "recvTokensMax": 48, - "stragglerRank": 4, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45494,35 +47197,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 77.82399654388428, - "p90": 99.32799637317657, - "p95": 108.22399705648422, - "p99": 131.52000308036804 + "p50": 90.30400216579437, + "p90": 105.6319996714592, + "p95": 106.6880002617836, + "p99": 111.04000359773636 }, "combine": { - "p50": 66.3359984755516, - "p90": 80.35200089216232, - "p95": 87.74399757385254, - "p99": 170.23999989032745 + "p50": 80.99199831485748, + "p90": 89.15200084447861, + "p95": 89.88799899816513, + "p99": 90.91199934482574 }, "roundtrip": { - "p50": 119.90399658679962, - "p90": 146.7519998550415, - "p95": 154.4959992170334, - "p99": 167.4879938364029 + "p50": 142.17600226402283, + "p90": 157.6640009880066, + "p95": 160.44799983501434, + "p99": 164.8319959640503 }, "isolatedSum": { - "p50": 144.15999501943588, - "p90": 179.6799972653389, - "p95": 195.96799463033676, - "p99": 301.7600029706955 + "p50": 171.29600048065186, + "p90": 194.7840005159378, + "p95": 196.57599925994873, + "p99": 201.9520029425621 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 2, - "recvTokensMax": 96, - "stragglerRank": 5, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45531,35 +47234,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 83.39200168848038, - "p90": 107.93600231409073, - "p95": 117.47200042009354, - "p99": 157.82399475574493 + "p50": 95.74399888515472, + "p90": 116.2559986114502, + "p95": 121.98399752378464, + "p99": 398.6560106277466 }, "combine": { - "p50": 70.17599791288376, - "p90": 82.36800134181976, - "p95": 89.59999680519104, - "p99": 102.7199998497963 + "p50": 90.20800143480301, + "p90": 101.1200025677681, + "p95": 104.25599664449692, + "p99": 111.55200004577637 }, "roundtrip": { - "p50": 127.51999497413635, - "p90": 154.7199934720993, - "p95": 170.04799842834473, - "p99": 201.27999782562256 + "p50": 160.76800227165222, + "p90": 181.536003947258, + "p95": 185.37600338459015, + "p99": 188.35200369358063 }, "isolatedSum": { - "p50": 153.56799960136414, - "p90": 190.3040036559105, - "p95": 207.07199722528458, - "p99": 260.54399460554123 + "p50": 185.95200031995773, + "p90": 217.3760011792183, + "p95": 226.23999416828156, + "p99": 510.20801067352295 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 2, - "recvTokensMax": 192, - "stragglerRank": 5, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45568,35 +47271,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 96.19200229644775, - "p90": 114.04799669981003, - "p95": 123.83999675512314, - "p99": 167.4560010433197 + "p50": 114.3679991364479, + "p90": 133.44000279903412, + "p95": 137.31199502944946, + "p99": 142.7839994430542 }, "combine": { - "p50": 84.48000252246857, - "p90": 95.87199985980988, - "p95": 99.93600100278854, - "p99": 113.92000317573547 + "p50": 108.15999656915665, + "p90": 120.2239990234375, + "p95": 121.24799937009811, + "p99": 123.99999797344208 }, "roundtrip": { - "p50": 156.3200056552887, - "p90": 175.64800381660461, - "p95": 185.56800484657288, - "p99": 221.15199267864227 + "p50": 199.35999810695648, + "p90": 217.31199324131012, + "p95": 220.15999257564545, + "p99": 380.8319866657257 }, "isolatedSum": { - "p50": 180.67200481891632, - "p90": 209.9199965596199, - "p95": 223.77599775791168, - "p99": 281.3760042190552 + "p50": 222.52799570560455, + "p90": 253.66400182247162, + "p95": 258.5599943995476, + "p99": 266.7839974164963 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 5, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45604,48 +47307,49 @@ ] }, { - "id": "cx-f4d9691e", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", - "colorKey": "h200_ca3ee133", - "comparisonKey": "65013819dd1ccf9e", + "id": "cx-4ad32f1a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "2a087c80bac58077", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:51.846779+00:00", + "generatedAt": "2026-06-26T15:27:59.966964+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "normal", + "activationProfile": "fp8-saturation", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "unknown", + "conformanceClass": "resource-conforming", "fixedKernel": false, "paretoEligible": false }, @@ -45656,8 +47360,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "90042e0db6a8297", - "workloadId": "set:3:8fd05d9ebee41064", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -45665,45 +47369,82 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272342148", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272342148", - "createdAt": "2026-06-27T00:11:29Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28247603308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308", + "createdAt": "2026-06-26T15:27:59.966964+00:00", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.73599898815155, + "p90": 102.49599814414978, + "p95": 104.12800312042236, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 79.42400127649307, + "p90": 81.4720019698143, + "p95": 82.14399963617325, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 146.84799313545227, + "p90": 156.15999698638916, + "p95": 159.13599729537964, + "p99": 164.000004529953 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 183.96800011396408, + "p95": 186.2720027565956, + "p99": 200.1279965043068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.0479975938797, - "p90": 96.41599655151367, - "p95": 105.8880016207695, - "p99": 125.5359947681427 + "p50": 98.33600372076035, + "p90": 103.93600165843964, + "p95": 106.52799904346466, + "p99": 111.58400028944016 }, "combine": { - "p50": 69.69600170850754, - "p90": 81.91999793052673, - "p95": 88.95999938249588, - "p99": 114.88000303506851 + "p50": 80.03199845552444, + "p90": 86.84799820184708, + "p95": 87.61599659919739, + "p99": 88.06400001049042 }, "roundtrip": { - "p50": 124.83199685811996, - "p90": 153.82400155067444, - "p95": 160.67199409008026, - "p99": 180.95999956130981 + "p50": 151.64799988269806, + "p90": 159.16800498962402, + "p95": 160.35200655460358, + "p99": 165.50399363040924 }, "isolatedSum": { - "p50": 143.74399930238724, - "p90": 178.3359944820404, - "p95": 194.84800100326538, - "p99": 240.4159978032112 + "p50": 178.3680021762848, + "p90": 190.7839998602867, + "p95": 194.14399564266205, + "p99": 199.64800029993057 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 1, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45712,35 +47453,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 83.93599838018417, - "p90": 104.54399883747101, - "p95": 111.35999858379364, - "p99": 132.9919993877411 + "p50": 99.90400075912476, + "p90": 105.76000064611435, + "p95": 108.15999656915665, + "p99": 116.60800129175186 }, "combine": { - "p50": 76.99199765920639, - "p90": 89.37600255012512, - "p95": 96.41599655151367, - "p99": 108.64000022411346 + "p50": 87.90399879217148, + "p90": 90.55999666452408, + "p95": 95.23200243711472, + "p99": 96.57599776983261 }, "roundtrip": { - "p50": 138.20800185203552, - "p90": 157.69599378108978, - "p95": 164.38399255275726, - "p99": 188.54400515556335 + "p50": 157.82399475574493, + "p90": 163.7759953737259, + "p95": 166.78400337696075, + "p99": 169.95200514793396 }, "isolatedSum": { - "p50": 160.92799603939056, - "p90": 193.92000138759613, - "p95": 207.7759951353073, - "p99": 241.63199961185455 + "p50": 187.80799955129623, + "p90": 196.31999731063843, + "p95": 203.39199900627136, + "p99": 213.18399906158447 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 6, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45749,34 +47490,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 123.29600006341934, - "p90": 140.00000059604645, - "p95": 145.75999975204468, - "p99": 161.6639941930771 + "p50": 128.60800325870514, + "p90": 133.53599607944489, + "p95": 135.51999628543854, + "p99": 138.49599659442902 }, "combine": { - "p50": 117.85600334405899, - "p90": 129.2800009250641, - "p95": 136.28800213336945, - "p99": 148.80000054836273 + "p50": 112.57600039243698, + "p90": 120.4800009727478, + "p95": 120.7680031657219, + "p99": 122.40000069141388 }, "roundtrip": { - "p50": 222.30400145053864, - "p90": 243.45600605010986, - "p95": 248.99199604988098, - "p99": 268.7999904155731 + "p50": 208.3519995212555, + "p90": 215.71199595928192, + "p95": 217.56799519062042, + "p99": 220.5439954996109 }, "isolatedSum": { - "p50": 241.15200340747833, - "p90": 269.28000152111053, - "p95": 282.0480018854141, - "p99": 310.4639947414398 + "p50": 241.18400365114212, + "p90": 254.0159970521927, + "p95": 256.28799945116043, + "p99": 260.8959972858429 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -45785,35 +47526,36 @@ ] }, { - "id": "cx-9febd1e2", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", - "colorKey": "h200_9779cb2d", - "comparisonKey": "65013819dd1ccf9e", + "id": "cx-b5d97134", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.1|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:58.540972+00:00", + "generatedAt": "2026-06-26T17:27:16.815311+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s1", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s1", - "routingStep": 1, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", @@ -45821,14 +47563,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.1, + "achievedFraction": 0.0985, + "configuredUnits": 13, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -45837,54 +47579,91 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "6288a1aa76c20e7", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272345418", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272345418", - "createdAt": "2026-06-27T00:11:36Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254271442", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254271442", + "createdAt": "2026-06-26T17:27:16.815311+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.24800288677216, + "p90": 103.39199751615524, + "p95": 105.8880016207695, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 78.84799689054489, + "p90": 81.727996468544, + "p95": 85.11999994516373, + "p99": 89.02399986982346 + }, + "roundtrip": { + "p50": 151.36000514030457, + "p90": 157.53600001335144, + "p95": 159.67999398708344, + "p99": 164.63999450206757 + }, + "isolatedSum": { + "p50": 176.09599977731705, + "p90": 185.11999398469925, + "p95": 191.00800156593323, + "p99": 200.15999674797058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.97599720954895, - "p90": 90.91199934482574, - "p95": 99.32799637317657, - "p99": 128.83199751377106 + "p50": 99.29600358009338, + "p90": 104.70400005578995, + "p95": 106.72000050544739, + "p99": 113.53600025177002 }, "combine": { - "p50": 70.27199864387512, - "p90": 80.1599994301796, - "p95": 89.21600133180618, - "p99": 107.07200318574905 + "p50": 79.58400249481201, + "p90": 86.97599917650223, + "p95": 87.39200234413147, + "p99": 91.5519967675209 }, "roundtrip": { - "p50": 125.47199428081512, - "p90": 145.6959992647171, - "p95": 153.31199765205383, - "p99": 184.54399704933167 + "p50": 153.85599434375763, + "p90": 161.28000617027283, + "p95": 162.432000041008, + "p99": 166.07999801635742 }, "isolatedSum": { - "p50": 145.24799585342407, - "p90": 171.07199877500534, - "p95": 188.54399770498276, - "p99": 235.9040006995201 + "p50": 178.8800060749054, + "p90": 191.67999923229218, + "p95": 194.11200284957886, + "p99": 205.08799701929092 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45893,35 +47672,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 83.26400071382523, - "p90": 95.93600034713745, - "p95": 101.05600208044052, - "p99": 118.65600198507309 + "p50": 103.29599678516388, + "p90": 107.64800012111664, + "p95": 109.98400300741196, + "p99": 121.40800058841705 }, "combine": { - "p50": 78.8159966468811, - "p90": 86.75199747085571, - "p95": 92.03200042247772, - "p99": 111.84000223875046 + "p50": 87.74399757385254, + "p90": 95.20000219345093, + "p95": 95.48799693584442, + "p99": 97.18400239944458 }, "roundtrip": { - "p50": 139.13600146770477, - "p90": 150.68799257278442, - "p95": 155.20000457763672, - "p99": 181.05599284172058 + "p50": 161.6639941930771, + "p90": 169.50400173664093, + "p95": 170.9440052509308, + "p99": 175.52000284194946 }, "isolatedSum": { - "p50": 162.07999736070633, - "p90": 182.68799781799316, - "p95": 193.08800250291824, - "p99": 230.49600422382355 + "p50": 191.03999435901642, + "p90": 202.84800231456757, + "p95": 205.47199994325638, + "p99": 218.59200298786163 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 4, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45930,35 +47709,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 124.95999783277512, - "p90": 135.51999628543854, - "p95": 140.54399728775024, - "p99": 153.3759981393814 + "p50": 129.66400384902954, + "p90": 137.79200613498688, + "p95": 139.55199718475342, + "p99": 143.93599331378937 }, "combine": { - "p50": 118.30399930477142, - "p90": 126.0479986667633, - "p95": 131.00799918174744, - "p99": 152.5759994983673 + "p50": 113.72800171375275, + "p90": 120.15999853610992, + "p95": 120.83200365304947, + "p99": 123.55200201272964 }, "roundtrip": { - "p50": 222.27199375629425, - "p90": 233.5679978132248, - "p95": 239.3600046634674, - "p99": 254.55999374389648 + "p50": 211.776003241539, + "p90": 217.21599996089935, + "p95": 218.9439982175827, + "p99": 222.75200486183167 }, "isolatedSum": { - "p50": 243.26399713754654, - "p90": 261.56799495220184, - "p95": 271.5519964694977, - "p99": 305.9519976377487 + "p50": 243.3920055627823, + "p90": 257.9520046710968, + "p95": 260.3840008378029, + "p99": 267.487995326519 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -45966,35 +47745,36 @@ ] }, { - "id": "cx-f5a9f57f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", - "colorKey": "h200_9479c674", - "comparisonKey": "65013819dd1ccf9e", + "id": "cx-2f9f6948", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:12.398873+00:00", + "generatedAt": "2026-06-26T17:29:02.253264+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s2", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s2", - "routingStep": 2, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", @@ -46002,14 +47782,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -46018,272 +47798,276 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "675e15b52e37958", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272348704", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272348704", - "createdAt": "2026-06-27T00:11:43Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:29:02.253264+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 75.87199658155441, - "p90": 93.59999746084213, - "p95": 100.19200295209885, - "p99": 114.56000059843063 + "p50": 95.74399888515472, + "p90": 102.78400033712387, + "p95": 104.99200224876404, + "p99": 109.37599837779999 }, "combine": { - "p50": 71.35999947786331, - "p90": 79.64800298213959, - "p95": 85.63199639320374, - "p99": 97.79199957847595 + "p50": 79.32800054550171, + "p90": 82.07999914884567, + "p95": 82.87999778985977, + "p99": 88.03199976682663 }, "roundtrip": { - "p50": 129.2160004377365, - "p90": 148.5760062932968, - "p95": 158.84800255298615, - "p99": 188.22400271892548 + "p50": 147.74399995803833, + "p90": 154.6880006790161, + "p95": 157.44000673294067, + "p99": 171.9360053539276 }, "isolatedSum": { - "p50": 147.23199605941772, - "p90": 173.24800044298172, - "p95": 185.82399934530258, - "p99": 212.35200017690659 + "p50": 175.07199943065643, + "p90": 184.86399948596954, + "p95": 187.8720000386238, + "p99": 197.40799814462662 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 88.0960002541542, - "p90": 110.78400164842606, - "p95": 121.72800302505493, - "p99": 175.61599612236023 + "p50": 71.23199850320816, + "p90": 101.27999633550644, + "p95": 102.52799838781357, + "p99": 107.87200182676315 }, "combine": { - "p50": 80.70400357246399, - "p90": 92.3520028591156, - "p95": 98.88000041246414, - "p99": 121.34400010108948 + "p50": 72.22399860620499, + "p90": 80.92799782752991, + "p95": 81.44000172615051, + "p99": 84.76799726486206 }, "roundtrip": { - "p50": 141.37600362300873, - "p90": 164.19200599193573, - "p95": 172.95999825000763, - "p99": 193.7599927186966 + "p50": 127.45599448680878, + "p90": 153.02400290966034, + "p95": 155.64799308776855, + "p99": 159.4880074262619 }, "isolatedSum": { - "p50": 168.8000038266182, - "p90": 203.13600450754166, - "p95": 220.60800343751907, - "p99": 296.9599962234497 + "p50": 143.45599710941315, + "p90": 182.20799416303635, + "p95": 183.96800011396408, + "p99": 192.6399990916252 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 128.92800569534302, - "p90": 140.3840035200119, - "p95": 146.65600657463074, - "p99": 171.10399901866913 + "p50": 95.23200243711472, + "p90": 102.36799716949463, + "p95": 107.84000158309937, + "p99": 439.64800238609314 }, "combine": { - "p50": 120.28799951076508, - "p90": 132.38400220870972, - "p95": 136.76799833774567, - "p99": 159.36000645160675 + "p50": 72.95999675989151, + "p90": 81.66400343179703, + "p95": 86.81599795818329, + "p99": 88.92799913883209 }, "roundtrip": { - "p50": 224.2880016565323, - "p90": 240.1919960975647, - "p95": 248.1279969215393, - "p99": 276.8320143222809 + "p50": 128.7360042333603, + "p90": 159.19999778270721, + "p95": 161.31199896335602, + "p99": 167.1680063009262 }, "isolatedSum": { - "p50": 249.2160052061081, - "p90": 272.7680057287216, - "p95": 283.4240049123764, - "p99": 330.4640054702759 + "p50": 168.19199919700623, + "p90": 184.03200060129166, + "p95": 194.65599954128265, + "p99": 528.5760015249252 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-13ab64c2", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", - "colorKey": "h200_9579c807", - "comparisonKey": "65013819dd1ccf9e", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:19.903361+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s3", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "82b2963fc322419", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272352256", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272352256", - "createdAt": "2026-06-27T00:11:49Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.40000027418137, - "p90": 94.7519987821579, - "p95": 101.9200012087822, - "p99": 123.36000055074692 + "p50": 95.42399644851685, + "p90": 102.52799838781357, + "p95": 104.89600151777267, + "p99": 113.53600025177002 }, "combine": { - "p50": 70.20799815654755, - "p90": 82.17599987983704, - "p95": 89.37600255012512, - "p99": 105.56799918413162 + "p50": 79.58400249481201, + "p90": 82.91199803352356, + "p95": 87.07199990749359, + "p99": 87.96799927949905 }, "roundtrip": { - "p50": 125.34399330615997, - "p90": 150.04800260066986, - "p95": 162.6559942960739, - "p99": 177.88800597190857 + "p50": 151.48800611495972, + "p90": 159.90400314331055, + "p95": 162.20800578594208, + "p99": 169.47199404239655 }, "isolatedSum": { - "p50": 144.6079984307289, - "p90": 176.92799866199493, - "p95": 191.29600375890732, - "p99": 228.92799973487854 + "p50": 175.00799894332886, + "p90": 185.43999642133713, + "p95": 191.96800142526627, + "p99": 201.50399953126907 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 0, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 86.46400272846222, - "p90": 103.00800204277039, - "p95": 111.7440015077591, - "p99": 129.95199859142303 + "p50": 95.71199864149094, + "p90": 100.8640006184578, + "p95": 102.68799960613251, + "p99": 106.49599879980087 }, "combine": { - "p50": 79.26400005817413, - "p90": 90.97599983215332, - "p95": 96.47999703884125, - "p99": 115.9679964184761 + "p50": 80.64000308513641, + "p90": 87.90399879217148, + "p95": 89.24800157546997, + "p99": 95.23200243711472 }, "roundtrip": { - "p50": 139.8400068283081, - "p90": 156.6080003976822, - "p95": 163.96799683570862, - "p99": 176.35199427604675 + "p50": 152.319997549057, + "p90": 160.19199788570404, + "p95": 162.23999857902527, + "p99": 168.92799735069275 }, "isolatedSum": { - "p50": 165.72800278663635, - "p90": 193.9840018749237, - "p95": 208.22399854660034, - "p99": 245.91999500989914 + "p50": 176.35200172662735, + "p90": 188.76799941062927, + "p95": 191.93600118160248, + "p99": 201.7280012369156 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 0, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.86399734020233, + "p90": 103.26399654150009, + "p95": 105.47199845314026, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 80.35200089216232, + "p90": 89.31200206279755, + "p95": 90.04800021648407, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 136.48000359535217, + "p90": 164.60800170898438, + "p95": 167.10400581359863, + "p99": 175.10400712490082 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 192.57599860429764, + "p95": 195.51999866962433, + "p99": 208.92799645662308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.4879982471466, + "p90": 112.8000020980835, + "p95": 114.3679991364479, + "p99": 125.72799623012543 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 104.12800312042236, + "p95": 104.99200224876404, + "p99": 106.33599758148193 + }, + "roundtrip": { + "p50": 170.71999609470367, + "p90": 181.21600151062012, + "p95": 182.91200697422028, + "p99": 186.81600689888 + }, + "isolatedSum": { + "p50": 200.31999796628952, + "p90": 216.92800521850586, + "p95": 219.36000138521194, + "p99": 232.06399381160736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46292,35 +48076,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 124.83199685811996, - "p90": 138.59200477600098, - "p95": 144.44799721240997, - "p99": 233.88800024986267 + "p50": 111.29599809646606, + "p90": 130.87999820709229, + "p95": 133.5040032863617, + "p99": 139.93600010871887 }, "combine": { - "p50": 119.07199770212173, - "p90": 130.8159977197647, - "p95": 139.71200585365295, - "p99": 152.5759994983673 + "p50": 106.27199709415436, + "p90": 119.58400160074234, + "p95": 119.99999731779099, + "p99": 122.3360002040863 }, "roundtrip": { - "p50": 222.24000096321106, - "p90": 239.84000086784363, - "p95": 250.65600872039795, - "p99": 283.4239900112152 + "p50": 197.56799936294556, + "p90": 215.80800414085388, + "p95": 217.92000532150269, + "p99": 219.80799734592438 }, "isolatedSum": { - "p50": 243.9039945602417, - "p90": 269.4080024957657, - "p95": 284.1600030660629, - "p99": 386.46399974823 + "p50": 217.56799519062042, + "p90": 250.46399980783463, + "p95": 253.50400060415268, + "p99": 262.2720003128052 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 0, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46328,34 +48112,35 @@ ] }, { - "id": "cx-7c6f809c", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", - "colorKey": "h200_189562cd", - "comparisonKey": "6b812f29e2dcdef6", + "id": "cx-3752524d", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.6|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "b51e047646ec8fac", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:16.217396+00:00", + "generatedAt": "2026-06-26T17:30:39.045176+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -46364,14 +48149,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.6, + "achievedFraction": 0.5985, + "configuredUnits": 79, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -46380,8 +48165,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2ad5ef98d328fa1", - "workloadId": "set:4:286be993cd819ed9", + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -46389,45 +48174,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271859196", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271859196", - "createdAt": "2026-06-26T23:55:54Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254286950", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254286950", + "createdAt": "2026-06-26T17:30:39.045176+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 71.71200215816498, - "p90": 98.30400347709656, - "p95": 109.69600081443787, - "p99": 295.48799991607666 + "p50": 96.28800302743912, + "p90": 103.55199873447418, + "p95": 105.66399991512299, + "p99": 108.51199924945831 }, "combine": { - "p50": 67.6800012588501, - "p90": 82.07999914884567, - "p95": 88.16000074148178, - "p99": 110.04800349473953 + "p50": 79.1039988398552, + "p90": 81.37600123882294, + "p95": 84.89599823951721, + "p99": 89.91999924182892 }, "roundtrip": { - "p50": 121.95199728012085, - "p90": 153.24799716472626, - "p95": 161.53599321842194, - "p99": 211.16800606250763 + "p50": 146.27200365066528, + "p90": 156.38400614261627, + "p95": 161.82400286197662, + "p99": 219.2319929599762 }, "isolatedSum": { - "p50": 139.39200341701508, - "p90": 180.38400262594223, - "p95": 197.85600155591965, - "p99": 405.5360034108162 + "p50": 175.3920018672943, + "p90": 184.92799997329712, + "p95": 190.5599981546402, + "p99": 198.43199849128723 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46436,35 +48221,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.17599856853485, - "p90": 98.08000177145004, - "p95": 108.0000028014183, - "p99": 146.14400267601013 + "p50": 96.70399874448776, + "p90": 102.30399668216705, + "p95": 104.51199859380722, + "p99": 112.22399771213531 }, "combine": { - "p50": 69.63200122117996, - "p90": 83.13599973917007, - "p95": 89.02399986982346, - "p99": 103.20000350475311 + "p50": 79.58400249481201, + "p90": 87.3280018568039, + "p95": 87.80799806118011, + "p99": 89.9519994854927 }, "roundtrip": { - "p50": 125.40799379348755, - "p90": 153.50399911403656, - "p95": 165.12000560760498, - "p99": 192.83199310302734 + "p50": 153.3759981393814, + "p90": 161.21600568294525, + "p95": 162.56000101566315, + "p99": 166.72000288963318 }, "isolatedSum": { - "p50": 143.8079997897148, - "p90": 181.21600151062012, - "p95": 197.02400267124176, - "p99": 249.34400618076324 + "p50": 176.28800123929977, + "p90": 189.63199853897095, + "p95": 192.31999665498734, + "p99": 202.17599719762802 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46473,35 +48258,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.02399855852127, - "p90": 105.76000064611435, - "p95": 114.46399986743927, - "p99": 129.72800433635712 + "p50": 102.88000106811523, + "p90": 106.81600123643875, + "p95": 109.0560033917427, + "p99": 114.3679991364479 }, "combine": { - "p50": 77.2159993648529, - "p90": 89.34400230646133, - "p95": 95.8079993724823, - "p99": 114.97599631547928 + "p50": 87.99999952316284, + "p90": 95.48799693584442, + "p95": 96.22400254011154, + "p99": 119.1679984331131 }, "roundtrip": { - "p50": 137.472003698349, - "p90": 158.91200304031372, - "p95": 166.20799899101257, - "p99": 185.08799374103546 + "p50": 161.95200383663177, + "p90": 170.0800061225891, + "p95": 172.5119948387146, + "p99": 460.7999920845032 }, "isolatedSum": { - "p50": 158.23999792337418, - "p90": 195.10400295257568, - "p95": 210.27199923992157, - "p99": 244.7040006518364 + "p50": 190.88000059127808, + "p90": 202.30399817228317, + "p95": 205.28000593185425, + "p99": 233.535997569561 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46510,34 +48295,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 124.25599992275238, - "p90": 137.02400028705597, - "p95": 144.51199769973755, - "p99": 166.6879951953888 + "p50": 129.08799946308136, + "p90": 135.80800592899323, + "p95": 137.56799697875977, + "p99": 142.14399456977844 }, "combine": { - "p50": 118.30399930477142, - "p90": 130.14400005340576, - "p95": 135.71199774742126, - "p99": 157.6319932937622 + "p50": 113.27999830245972, + "p90": 120.44800072908401, + "p95": 120.67200243473053, + "p99": 123.74400347471237 }, "roundtrip": { - "p50": 220.06399929523468, - "p90": 239.42400515079498, - "p95": 246.17600440979004, - "p99": 313.6639893054962 + "p50": 211.5200012922287, + "p90": 218.176007270813, + "p95": 219.64800357818604, + "p99": 223.68000447750092 }, "isolatedSum": { - "p50": 242.5599992275238, - "p90": 267.16800034046173, - "p95": 280.2239954471588, - "p99": 324.319988489151 + "p50": 242.36799776554108, + "p90": 256.25600665807724, + "p95": 258.2399994134903, + "p99": 265.8879980444908 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -46546,34 +48331,35 @@ ] }, { - "id": "cx-13c27f2d", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", - "colorKey": "h200_189562cd", - "comparisonKey": "6b812f29e2dcdef6", + "id": "cx-7db267e7", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", + "colorKey": "h100_716e65b9", + "comparisonKey": "259b0e9f1092ac0e", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:10.730241+00:00", + "generatedAt": "2026-06-26T17:32:00.320566+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "label": "H100 EP8 · deepep · bf16 (norm) · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -46582,14 +48368,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -46598,8 +48384,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b6caf944f6bb621", - "workloadId": "set:8:286be993cd819ed9", + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -46607,43 +48393,43 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272100552", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272100552", - "createdAt": "2026-06-27T00:03:34Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254367516", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", + "createdAt": "2026-06-26T17:32:00.320566+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 74.14399832487106, - "p90": 98.84800016880035, - "p95": 106.36799782514572, - "p99": 130.46400249004364 + "p50": 95.93600034713745, + "p90": 103.00800204277039, + "p95": 104.38399761915207, + "p99": 107.64800012111664 }, "combine": { - "p50": 68.15999746322632, - "p90": 80.19199967384338, - "p95": 86.30400151014328, - "p99": 99.16800260543823 + "p50": 81.08799904584885, + "p90": 87.93599903583527, + "p95": 88.60799670219421, + "p99": 90.36800265312195 }, "roundtrip": { - "p50": 122.17599898576736, - "p90": 154.4319987297058, - "p95": 165.98400473594666, - "p99": 216.44799411296844 + "p50": 151.2639969587326, + "p90": 158.9760035276413, + "p95": 160.73599457740784, + "p99": 164.06400501728058 }, "isolatedSum": { - "p50": 142.30399578809738, - "p90": 179.03999984264374, - "p95": 192.671999335289, - "p99": 229.63200509548187 + "p50": 177.0239993929863, + "p90": 190.94400107860565, + "p95": 192.99199432134628, + "p99": 198.0160027742386 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, "recvTokensMax": 8, "stragglerRank": 4, "correct": true, @@ -46654,35 +48440,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 76.19199901819229, - "p90": 103.5199984908104, - "p95": 114.3679991364479, - "p99": 145.9520012140274 + "p50": 74.23999905586243, + "p90": 96.79999947547913, + "p95": 100.00000149011612, + "p99": 103.7760004401207 }, "combine": { - "p50": 69.2799985408783, - "p90": 83.96799862384796, - "p95": 90.11200070381165, - "p99": 99.7759997844696 + "p50": 73.98399710655212, + "p90": 87.64799684286118, + "p95": 88.54400366544724, + "p99": 89.66399729251862 }, "roundtrip": { - "p50": 125.02400577068329, - "p90": 152.3520052433014, - "p95": 163.58399391174316, - "p99": 191.16799533367157 + "p50": 127.32799351215363, + "p90": 158.1439971923828, + "p95": 159.32799875736237, + "p99": 162.52799332141876 }, "isolatedSum": { - "p50": 145.4719975590706, - "p90": 187.48799711465836, - "p95": 204.47999984025955, - "p99": 245.728000998497 + "p50": 148.22399616241455, + "p90": 184.4479963183403, + "p95": 188.54400515556335, + "p99": 193.4399977326393 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, "recvTokensMax": 16, - "stragglerRank": 2, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46691,35 +48477,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 79.16799932718277, - "p90": 122.56000190973282, - "p95": 143.8719928264618, - "p99": 228.03199291229248 + "p50": 74.87999647855759, + "p90": 99.5199978351593, + "p95": 103.20000350475311, + "p99": 106.62399977445602 }, "combine": { - "p50": 70.04799693822861, - "p90": 85.1840004324913, - "p95": 89.9519994854927, - "p99": 98.4639972448349 + "p50": 73.95199686288834, + "p90": 87.74399757385254, + "p95": 88.06400001049042, + "p99": 88.76799792051315 }, "roundtrip": { - "p50": 130.0159990787506, - "p90": 166.17600619792938, - "p95": 180.80000579357147, - "p99": 225.63199698925018 + "p50": 127.80800461769104, + "p90": 156.3519984483719, + "p95": 158.81599485874176, + "p99": 162.33600676059723 }, "isolatedSum": { - "p50": 149.21599626541138, - "p90": 207.74400234222412, - "p95": 233.8239923119545, - "p99": 326.4959901571274 + "p50": 148.83199334144592, + "p90": 187.26399540901184, + "p95": 191.26400351524353, + "p99": 195.39199769496918 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, "recvTokensMax": 32, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46728,35 +48514,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 75.52000135183334, - "p90": 99.71199929714203, - "p95": 106.62399977445602, - "p99": 121.24799937009811 + "p50": 94.36800330877304, + "p90": 100.09600222110748, + "p95": 101.95200145244598, + "p99": 107.4879989027977 }, "combine": { - "p50": 70.592001080513, - "p90": 88.19200098514557, - "p95": 93.31200271844864, - "p99": 122.49600142240524 + "p50": 80.92799782752991, + "p90": 88.03199976682663, + "p95": 88.86399865150452, + "p99": 89.79199826717377 }, "roundtrip": { - "p50": 127.29600071907043, - "p90": 156.44800662994385, - "p95": 164.2879992723465, - "p99": 200.76799392700195 + "p50": 149.85600113868713, + "p90": 156.95999562740326, + "p95": 158.1760048866272, + "p99": 161.98399662971497 }, "isolatedSum": { - "p50": 146.11200243234634, - "p90": 187.9040002822876, - "p95": 199.93600249290466, - "p99": 243.74400079250336 + "p50": 175.29600113630295, + "p90": 188.1280019879341, + "p95": 190.8160001039505, + "p99": 197.27999716997147 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, "recvTokensMax": 64, - "stragglerRank": 2, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46765,35 +48551,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 75.42400062084198, - "p90": 99.32799637317657, - "p95": 107.16799646615982, - "p99": 116.44800007343292 + "p50": 94.36800330877304, + "p90": 104.80000078678131, + "p95": 106.78400099277496, + "p99": 115.00799655914307 }, "combine": { - "p50": 72.7040022611618, - "p90": 89.59999680519104, - "p95": 95.551997423172, - "p99": 149.1200029850006 + "p50": 86.59200370311737, + "p90": 88.76799792051315, + "p95": 89.56799656152725, + "p99": 96.83199971914291 }, "roundtrip": { - "p50": 129.5360028743744, - "p90": 163.42400014400482, - "p95": 173.18400740623474, - "p99": 210.36800742149353 + "p50": 150.11200308799744, + "p90": 161.50400042533875, + "p95": 166.24000668525696, + "p99": 490.62401056289673 }, "isolatedSum": { - "p50": 148.12800288200378, - "p90": 188.92799317836761, - "p95": 202.71999388933182, - "p99": 265.56800305843353 + "p50": 180.9600070118904, + "p90": 193.56799870729446, + "p95": 196.35199755430222, + "p99": 211.83999627828598 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, - "fanoutMean": 5.3125, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, "recvTokensMax": 128, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46802,33 +48588,33 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 84.89599823951721, - "p90": 109.31199789047241, - "p95": 117.15199798345566, - "p99": 152.92799472808838 + "p50": 87.0399996638298, + "p90": 106.04800283908844, + "p95": 110.1439967751503, + "p99": 123.83999675512314 }, "combine": { - "p50": 78.75200361013412, - "p90": 95.36000341176987, - "p95": 99.10400211811066, - "p99": 120.06399780511856 + "p50": 82.5280025601387, + "p90": 96.3200032711029, + "p95": 96.73599898815155, + "p99": 97.56799787282944 }, "roundtrip": { - "p50": 140.73599874973297, - "p90": 167.29600727558136, - "p95": 174.01599884033203, - "p99": 211.07199788093567 + "p50": 143.5839980840683, + "p90": 166.55999422073364, + "p95": 168.7680035829544, + "p99": 175.55199563503265 }, "isolatedSum": { - "p50": 163.64800184965134, - "p90": 204.67200130224228, - "p95": 216.25600010156631, - "p99": 272.99199253320694 + "p50": 169.5680022239685, + "p90": 202.36800611019135, + "p95": 206.87999576330185, + "p99": 221.40799462795258 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, "recvTokensMax": 256, "stragglerRank": 4, "correct": true, @@ -46839,35 +48625,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 98.91200065612793, - "p90": 116.19199812412262, - "p95": 121.31199985742569, - "p99": 146.84799313545227 + "p50": 116.92799627780914, + "p90": 126.3359934091568, + "p95": 128.63999605178833, + "p99": 132.6719969511032 }, "combine": { - "p50": 91.36000275611877, - "p90": 105.50399869680405, - "p95": 109.92000252008438, - "p99": 130.65600395202637 + "p50": 104.19200360774994, + "p90": 112.06399649381638, + "p95": 112.99200356006622, + "p99": 113.76000195741653 }, "roundtrip": { - "p50": 168.7999963760376, - "p90": 190.8479928970337, - "p95": 195.23200392723083, - "p99": 233.69599878787994 + "p50": 190.49599766731262, + "p90": 199.74400103092194, + "p95": 202.36800611019135, + "p99": 204.76800203323364 }, "isolatedSum": { - "p50": 190.2720034122467, - "p90": 221.69599682092667, - "p95": 231.23200237751007, - "p99": 277.50399708747864 + "p50": 221.11999988555908, + "p90": 238.39998990297318, + "p95": 241.63199961185455, + "p99": 246.43199890851974 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38621184, - "combineLogicalBytes": 38621184, - "fanoutMean": 5.26171875, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, "recvTokensMax": 512, - "stragglerRank": 3, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46876,35 +48662,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 125.72799623012543, - "p90": 143.16800236701965, - "p95": 147.90399372577667, - "p99": 170.71999609470367 + "p50": 129.85600531101227, + "p90": 152.96000242233276, + "p95": 154.78399395942688, + "p99": 158.87999534606934 }, "combine": { - "p50": 120.06399780511856, - "p90": 136.48000359535217, - "p95": 141.9840008020401, - "p99": 148.44800531864166 + "p50": 121.2799996137619, + "p90": 129.43999469280243, + "p95": 130.3360015153885, + "p99": 145.34400403499603 }, "roundtrip": { - "p50": 224.09600019454956, - "p90": 247.8400021791458, - "p95": 254.68799471855164, - "p99": 276.38399600982666 + "p50": 226.8799990415573, + "p90": 240.31999707221985, + "p95": 242.01600253582, + "p99": 245.02399563789368 }, "isolatedSum": { - "p50": 245.791994035244, - "p90": 279.6480059623718, - "p95": 289.8879945278168, - "p99": 319.16800141334534 + "p50": 251.13600492477417, + "p90": 282.3999971151352, + "p95": 285.11999547481537, + "p99": 304.22399938106537 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, "recvTokensMax": 1024, - "stragglerRank": 3, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -46912,50 +48698,51 @@ ] }, { - "id": "cx-c4fd916e", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", - "colorKey": "h200_80a72891", - "comparisonKey": "abe9d0af26c5a0c0", + "id": "cx-c5b168ae", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", + "colorKey": "h100_f7ec28aa", + "comparisonKey": "9896b8e4d81bc6a5", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:13.797855+00:00", + "generatedAt": "2026-06-26T17:32:03.917674+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "label": "H100 EP8 · deepep · bf16 (norm) · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -46964,54 +48751,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "e41f5099a9733ac", - "workloadId": "set:8:286be993cd819ed9", + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.830078125, - "eplbImbalanceAfter": 1.0007595486111112, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272103776", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272103776", - "createdAt": "2026-06-27T00:03:41Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254376151", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", + "createdAt": "2026-06-26T17:32:03.917674+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 72.86400347948074, - "p90": 99.10400211811066, - "p95": 107.35999792814255, - "p99": 136.48000359535217 + "p50": 96.89600020647049, + "p90": 104.032002389431, + "p95": 106.04800283908844, + "p99": 111.04000359773636 }, "combine": { - "p50": 67.87200272083282, - "p90": 82.30400085449219, - "p95": 87.55200356245041, - "p99": 92.12800115346909 + "p50": 74.36800003051758, + "p90": 80.03199845552444, + "p95": 81.31200075149536, + "p99": 82.68799632787704 }, "roundtrip": { - "p50": 121.31199985742569, - "p90": 150.62400698661804, - "p95": 160.76800227165222, - "p99": 204.8639953136444 + "p50": 145.82400023937225, + "p90": 153.76000106334686, + "p95": 160.0639969110489, + "p99": 226.30399465560913 }, "isolatedSum": { - "p50": 140.73600620031357, - "p90": 181.40800297260284, - "p95": 194.91200149059296, - "p99": 228.60800474882126 + "p50": 171.26400023698807, + "p90": 184.06400084495544, + "p95": 187.3600035905838, + "p99": 193.7279999256134 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47020,35 +48807,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 74.43200051784515, - "p90": 101.34399682283401, - "p95": 109.66400057077408, - "p99": 138.43199610710144 + "p50": 70.72000205516815, + "p90": 103.93600165843964, + "p95": 105.18400371074677, + "p99": 113.63200098276138 }, "combine": { - "p50": 67.90400296449661, - "p90": 80.76799660921097, - "p95": 85.37600189447403, - "p99": 95.13600170612335 + "p50": 71.35999947786331, + "p90": 80.32000064849854, + "p95": 81.18399977684021, + "p99": 88.16000074148178 }, "roundtrip": { - "p50": 121.56800180673599, - "p90": 151.67999267578125, - "p95": 162.23999857902527, - "p99": 191.64800643920898 + "p50": 126.68800354003906, + "p90": 152.5759994983673, + "p95": 155.32800555229187, + "p99": 159.29600596427917 }, "isolatedSum": { - "p50": 142.33600348234177, - "p90": 182.11199343204498, - "p95": 195.0400024652481, - "p99": 233.5679978132248 + "p50": 142.08000153303146, + "p90": 184.25600230693817, + "p95": 186.36800348758698, + "p99": 201.79200172424316 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 7, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47057,35 +48844,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 74.36800003051758, - "p90": 106.30399733781815, - "p95": 112.8000020980835, - "p99": 133.34399461746216 + "p50": 70.14399766921997, + "p90": 100.28800368309021, + "p95": 102.55999863147736, + "p99": 131.71200454235077 }, "combine": { - "p50": 69.31199878454208, - "p90": 85.75999736785889, - "p95": 93.05600076913834, - "p99": 108.41599851846695 + "p50": 71.61600142717361, + "p90": 79.55200225114822, + "p95": 79.74400371313095, + "p99": 84.22400057315826 }, "roundtrip": { - "p50": 123.16799908876419, - "p90": 152.16000378131866, - "p95": 162.33600676059723, - "p99": 187.80800700187683 + "p50": 127.77599692344666, + "p90": 153.50399911403656, + "p95": 155.2640050649643, + "p99": 160.73599457740784 }, "isolatedSum": { - "p50": 143.67999881505966, - "p90": 192.06399470567703, - "p95": 205.85600286722183, - "p99": 241.7599931359291 + "p50": 141.75999909639359, + "p90": 179.84000593423843, + "p95": 182.3040023446083, + "p99": 215.93600511550903 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 23, - "stragglerRank": 6, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47094,35 +48881,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.72800260782242, - "p90": 94.94400024414062, + "p50": 94.97600048780441, + "p90": 100.832000374794, "p95": 102.30399668216705, - "p99": 121.2799996137619 + "p99": 114.3999993801117 }, "combine": { - "p50": 68.44799965620041, - "p90": 81.91999793052673, - "p95": 88.03199976682663, - "p99": 102.52799838781357 + "p50": 71.52000069618225, + "p90": 81.18399977684021, + "p95": 81.7599967122078, + "p99": 86.94399893283844 }, "roundtrip": { - "p50": 124.22399967908859, - "p90": 154.14400398731232, - "p95": 164.60800170898438, - "p99": 177.44000256061554 + "p50": 125.31200051307678, + "p90": 153.05599570274353, + "p95": 156.0640037059784, + "p99": 159.42400693893433 }, "isolatedSum": { - "p50": 142.17600226402283, - "p90": 176.86399817466736, - "p95": 190.33599644899368, - "p99": 223.80799800157547 + "p50": 166.49600118398666, + "p90": 182.01600015163422, + "p95": 184.06399339437485, + "p99": 201.34399831295013 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4730880, - "combineLogicalBytes": 4730880, - "fanoutMean": 5.15625, - "recvTokensMax": 44, - "stragglerRank": 4, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47131,35 +48918,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 72.12799787521362, - "p90": 98.55999797582626, - "p95": 106.01600259542465, - "p99": 130.62399625778198 + "p50": 95.551997423172, + "p90": 100.89600086212158, + "p95": 103.26399654150009, + "p99": 112.31999844312668 }, "combine": { - "p50": 69.92000341415405, - "p90": 83.29600095748901, - "p95": 89.28000181913376, - "p99": 106.75200074911118 + "p50": 79.48800176382065, + "p90": 86.87999844551086, + "p95": 87.71199733018875, + "p99": 88.22400122880936 }, "roundtrip": { - "p50": 123.77600371837616, - "p90": 149.63200688362122, - "p95": 158.4639996290207, - "p99": 176.54399573802948 + "p50": 149.79200065135956, + "p90": 158.24000537395477, + "p95": 160.0320041179657, + "p99": 165.69599509239197 }, "isolatedSum": { - "p50": 142.04800128936768, - "p90": 181.85599893331528, - "p95": 195.2960044145584, - "p99": 237.37599700689316 + "p50": 175.03999918699265, + "p90": 187.77599930763245, + "p95": 190.97599387168884, + "p99": 200.54399967193604 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9691136, - "combineLogicalBytes": 9691136, - "fanoutMean": 5.28125, - "recvTokensMax": 88, - "stragglerRank": 0, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47168,35 +48955,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 84.48000252246857, - "p90": 110.75200140476227, - "p95": 119.61600184440613, - "p99": 152.41600573062897 + "p50": 83.16799998283386, + "p90": 99.96800124645233, + "p95": 104.96000200510025, + "p99": 109.11999642848969 }, "combine": { - "p50": 77.2479996085167, - "p90": 91.07200056314468, - "p95": 98.36799651384354, - "p99": 130.17599284648895 + "p50": 79.8719972372055, + "p90": 87.93599903583527, + "p95": 89.28000181913376, + "p99": 95.39200365543365 }, "roundtrip": { - "p50": 134.783998131752, - "p90": 159.04000401496887, - "p95": 166.97600483894348, - "p99": 194.36800479888916 + "p50": 135.26399433612823, + "p90": 159.19999778270721, + "p95": 161.72799468040466, + "p99": 166.6560024023056 }, "isolatedSum": { - "p50": 161.72800213098526, - "p90": 201.82400196790695, - "p95": 217.98399835824966, - "p99": 282.5919985771179 + "p50": 163.03999722003937, + "p90": 187.9040002822876, + "p95": 194.240003824234, + "p99": 204.51200008392334 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19568640, - "combineLogicalBytes": 19568640, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 7, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47205,35 +48992,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 97.02400118112564, - "p90": 121.2799996137619, - "p95": 137.95199990272522, - "p99": 238.87999355793 + "p50": 100.832000374794, + "p90": 114.68800157308578, + "p95": 116.67200177907944, + "p99": 134.91199910640717 }, "combine": { - "p50": 90.94399958848953, - "p90": 106.97600245475769, - "p95": 113.98400366306305, - "p99": 139.3280029296875 + "p50": 90.27200192213058, + "p90": 103.32799702882767, + "p95": 104.16000336408615, + "p99": 152.12799608707428 }, "roundtrip": { - "p50": 161.05599701404572, - "p90": 182.17599391937256, - "p95": 191.23199582099915, - "p99": 230.27199506759644 + "p50": 164.70399498939514, + "p90": 182.8480064868927, + "p95": 186.49600446224213, + "p99": 189.40800428390503 }, "isolatedSum": { - "p50": 187.96800076961517, - "p90": 228.2560020685196, - "p95": 251.93600356578827, - "p99": 378.2079964876175 + "p50": 191.1040022969246, + "p90": 218.01599860191345, + "p95": 220.8320051431656, + "p99": 287.03999519348145 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38750208, - "combineLogicalBytes": 38750208, - "fanoutMean": 5.279296875, - "recvTokensMax": 348, - "stragglerRank": 6, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47242,34 +49029,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.03199690580368, - "p90": 134.39999520778656, - "p95": 142.5279974937439, - "p99": 206.11199736595154 + "p50": 121.31199985742569, + "p90": 139.67999815940857, + "p95": 144.57599818706512, + "p99": 150.87999403476715 }, "combine": { - "p50": 103.04000228643417, - "p90": 118.23999881744385, - "p95": 122.079998254776, - "p99": 137.69599795341492 + "p50": 112.99200356006622, + "p90": 120.64000219106674, + "p95": 120.80000340938568, + "p99": 128.51199507713318 }, "roundtrip": { - "p50": 195.99999487400055, - "p90": 214.33599293231964, - "p95": 224.5440036058426, - "p99": 265.02400636672974 + "p50": 212.67199516296387, + "p90": 228.4799963235855, + "p95": 230.0799936056137, + "p99": 235.74399948120117 }, "isolatedSum": { - "p50": 219.07199919223785, - "p90": 252.6399940252304, - "p95": 264.6079957485199, - "p99": 343.80799531936646 + "p50": 234.3040034174919, + "p90": 260.3200003504753, + "p95": 265.3760015964508, + "p99": 279.39198911190033 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77342720, - "combineLogicalBytes": 77342720, - "fanoutMean": 5.2685546875, - "recvTokensMax": 687, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -47278,50 +49065,51 @@ ] }, { - "id": "cx-34b2b051", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", - "colorKey": "h200_2a7f12a0", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-cf899bce", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", + "colorKey": "h100_93503624", + "comparisonKey": "74d307ed048ea3b5", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:14:22.620116+00:00", + "generatedAt": "2026-06-26T17:46:24.194442+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform·empty-rank", + "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform·empty-rank", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, - "unevenTokens": "empty-rank", - "eplbEnabled": false, + "unevenTokens": "none", + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -47330,53 +49118,201 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "5621f0d4899ad7a", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272386143", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272386143", - "createdAt": "2026-06-27T00:12:58Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28255296001", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", + "createdAt": "2026-06-26T17:46:24.194442+00:00", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" }, "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.72800195217133, + "p90": 75.83999633789062, + "p95": 77.85599678754807, + "p99": 83.39200168848038 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 73.40800017118454, + "p95": 74.0479975938797, + "p99": 78.87999713420868 + }, + "roundtrip": { + "p50": 121.85599654912949, + "p90": 128.12800705432892, + "p95": 130.3039938211441, + "p99": 134.71999764442444 + }, + "isolatedSum": { + "p50": 140.99200069904327, + "p90": 149.24799650907516, + "p95": 151.90399438142776, + "p99": 162.27199882268906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.3359991312027, + "p90": 76.25599950551987, + "p95": 78.59200239181519, + "p99": 84.6719965338707 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 73.53600114583969, + "p95": 74.27199929952621, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 127.20000743865967, + "p90": 131.00799918174744, + "p95": 133.27999413013458, + "p99": 138.08000087738037 + }, + "isolatedSum": { + "p50": 141.50399714708328, + "p90": 149.79200065135956, + "p95": 152.8640016913414, + "p99": 164.47999328374863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.18399846553802, + "p90": 102.14400291442871, + "p95": 105.50399869680405, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 81.82399719953537, + "p95": 87.10400015115738, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 131.8719983100891, + "p90": 160.3199988603592, + "p95": 162.88000345230103, + "p99": 167.1680063009262 + }, + "isolatedSum": { + "p50": 146.59199863672256, + "p90": 183.96800011396408, + "p95": 192.60799884796143, + "p99": 197.40799814462662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, - "globalTokens": 63, + "globalTokens": 64, "dispatch": { - "p50": 73.15199822187424, - "p90": 92.76799857616425, - "p95": 100.28800368309021, - "p99": 131.58400356769562 + "p50": 70.30399888753891, + "p90": 78.20799946784973, + "p95": 81.02399855852127, + "p99": 89.4400030374527 }, "combine": { - "p50": 68.96000355482101, - "p90": 83.64800363779068, - "p95": 88.92799913883209, - "p99": 102.11200267076492 + "p50": 72.7040022611618, + "p90": 73.91999661922455, + "p95": 74.27199929952621, + "p99": 79.58400249481201 }, "roundtrip": { - "p50": 121.66400253772736, - "p90": 145.37599682807922, - "p95": 157.18400478363037, - "p99": 189.56799805164337 + "p50": 128.67200374603271, + "p90": 132.83200562000275, + "p95": 135.0719928741455, + "p99": 140.22399485111237 }, "isolatedSum": { - "p50": 142.11200177669525, - "p90": 176.41600221395493, - "p95": 189.2160028219223, - "p99": 233.69600623846054 + "p50": 143.0080011487007, + "p90": 152.12799608707428, + "p95": 155.29599785804749, + "p99": 169.0240055322647 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4888576, - "combineLogicalBytes": 4888576, - "fanoutMean": 5.412698268890381, - "recvTokensMax": 46, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.97599852085114, + "p90": 102.14400291442871, + "p95": 104.70400005578995, + "p99": 109.56799983978271 + }, + "combine": { + "p50": 74.30399954319, + "p90": 87.87199854850769, + "p95": 89.12000060081482, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 132.4480026960373, + "p90": 161.47199273109436, + "p95": 163.26400637626648, + "p99": 166.9120043516159 + }, + "isolatedSum": { + "p50": 157.27999806404114, + "p90": 190.0160014629364, + "p95": 193.82400065660477, + "p99": 199.51999932527542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -47384,125 +49320,163 @@ }, { "tokensPerRank": 32, - "globalTokens": 252, + "globalTokens": 256, "dispatch": { - "p50": 82.43200182914734, - "p90": 96.28800302743912, - "p95": 103.84000092744827, - "p99": 123.07199835777283 + "p50": 81.40800148248672, + "p90": 103.80800068378448, + "p95": 105.3759977221489, + "p99": 108.0000028014183 }, "combine": { - "p50": 76.60800218582153, - "p90": 86.65599673986435, - "p95": 92.28800237178802, - "p99": 107.84000158309937 + "p50": 79.77599650621414, + "p90": 90.08000046014786, + "p95": 90.71999788284302, + "p99": 247.67999351024628 }, "roundtrip": { - "p50": 134.49600338935852, - "p90": 156.031996011734, - "p95": 167.4879938364029, - "p99": 228.12800109386444 + "p50": 138.17599415779114, + "p90": 156.3519984483719, + "p95": 159.7760021686554, + "p99": 163.83999586105347 }, "isolatedSum": { - "p50": 159.04000401496887, - "p90": 182.94399976730347, - "p95": 196.1280032992363, - "p99": 230.9119999408722 + "p50": 161.18399798870087, + "p90": 193.88800114393234, + "p95": 196.0959956049919, + "p99": 355.6799963116646 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19396608, - "combineLogicalBytes": 19396608, - "fanoutMean": 5.36904764175415, - "recvTokensMax": 180, - "stragglerRank": 6, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1022, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 115.90400338172913, - "p90": 130.49599528312683, - "p95": 136.86400651931763, - "p99": 152.319997549057 + "p50": 99.74399954080582, + "p90": 115.35999923944473, + "p95": 117.37599968910217, + "p99": 125.2799928188324 }, "combine": { - "p50": 108.92800241708755, - "p90": 121.31199985742569, - "p95": 126.8479973077774, - "p99": 144.06399428844452 + "p50": 90.55999666452408, + "p90": 103.61599922180176, + "p95": 104.19200360774994, + "p99": 104.8320010304451 }, "roundtrip": { - "p50": 201.08799636363983, - "p90": 216.5759950876236, - "p95": 222.33599424362183, - "p99": 238.5919988155365 + "p50": 163.87200355529785, + "p90": 178.0479997396469, + "p95": 180.2240014076233, + "p99": 185.47199666500092 }, "isolatedSum": { - "p50": 224.83200579881668, - "p90": 251.80799514055252, - "p95": 263.71200382709503, - "p99": 296.3839918375015 + "p50": 190.3039962053299, + "p90": 218.9759984612465, + "p95": 221.5680032968521, + "p99": 230.1119938492775 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77529088, - "combineLogicalBytes": 77529088, - "fanoutMean": 5.2915849685668945, - "recvTokensMax": 722, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.66400122642517, + "p90": 132.22399353981018, + "p95": 133.88800621032715, + "p99": 139.64800536632538 + }, + "combine": { + "p50": 106.59199953079224, + "p90": 114.75200206041336, + "p95": 119.99999731779099, + "p99": 121.91999703645706 + }, + "roundtrip": { + "p50": 198.91199469566345, + "p90": 213.69600296020508, + "p95": 216.0319983959198, + "p99": 220.60799598693848 + }, + "isolatedSum": { + "p50": 220.2560007572174, + "p90": 246.97599560022354, + "p95": 253.88800352811813, + "p99": 261.56800240278244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-2de6a2af", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", - "colorKey": "h200_58b5650b", - "comparisonKey": "4dde4e46080a91eb", + "id": "cx-4eb12954", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_5df912ff", + "comparisonKey": "5074d4febd922e2d", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:14:22.294115+00:00", + "generatedAt": "2026-06-26T17:28:11.272284+00:00", "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform·linear", + "label": "H100 EP8 · deepep · bf16 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", - "routingLabel": "uniform·linear", + "routingLabel": "uniform", "routingStep": 0, - "unevenTokens": "linear", + "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -47511,53 +49485,201 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": null, - "workloadSource": "seeded-runtime", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272382939", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272382939", - "createdAt": "2026-06-27T00:12:51Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254332840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", + "createdAt": "2026-06-26T17:28:11.272284+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { - "tokensPerRank": 8, + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.91199737787247, + "p90": 85.21600067615509, + "p95": 87.20000088214874, + "p99": 93.34400296211243 + }, + "combine": { + "p50": 79.68000322580338, + "p90": 81.60000294446945, + "p95": 86.91199868917465, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 133.69600474834442, + "p90": 141.184002161026, + "p95": 143.2960033416748, + "p99": 151.48800611495972 + }, + "isolatedSum": { + "p50": 158.59200060367584, + "p90": 166.81600362062454, + "p95": 174.1119995713234, + "p99": 181.88800662755966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 62.39999830722809, + "p90": 84.35200154781342, + "p95": 87.00799942016602, + "p99": 96.57599776983261 + }, + "combine": { + "p50": 71.99999690055847, + "p90": 81.02399855852127, + "p95": 81.44000172615051, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 116.7680025100708, + "p90": 140.00000059604645, + "p95": 141.6960060596466, + "p99": 143.96800100803375 + }, + "isolatedSum": { + "p50": 134.39999520778656, + "p90": 165.3760001063347, + "p95": 168.44800114631653, + "p99": 184.38399583101273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 62.01599910855293, + "p90": 82.56000280380249, + "p95": 84.76799726486206, + "p99": 91.90399944782257 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 86.94399893283844, + "p95": 87.61599659919739, + "p99": 88.22400122880936 + }, + "roundtrip": { + "p50": 116.57600104808807, + "p90": 143.13599467277527, + "p95": 144.96000111103058, + "p99": 189.40800428390503 + }, + "isolatedSum": { + "p50": 134.91200283169746, + "p90": 169.50400173664093, + "p95": 172.38399386405945, + "p99": 180.12800067663193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 75.19999891519547, - "p90": 97.18400239944458, - "p95": 107.84000158309937, - "p99": 136.1279934644699 + "p50": 66.78400188684464, + "p90": 82.46400207281113, + "p95": 85.1840004324913, + "p99": 90.65599739551544 }, "combine": { - "p50": 68.9919963479042, - "p90": 80.48000186681747, - "p95": 86.62399649620056, - "p99": 96.47999703884125 + "p50": 73.02399724721909, + "p90": 86.87999844551086, + "p95": 87.55200356245041, + "p99": 88.57599645853043 }, "roundtrip": { - "p50": 122.27199971675873, - "p90": 154.6880006790161, - "p95": 166.97600483894348, - "p99": 202.78400182724 + "p50": 116.67200177907944, + "p90": 142.4960047006607, + "p95": 143.64799857139587, + "p99": 149.1200029850006 }, "isolatedSum": { - "p50": 144.19199526309967, - "p90": 177.66400426626205, - "p95": 194.46399807929993, - "p99": 232.60799050331116 + "p50": 139.80799913406372, + "p90": 169.344000518322, + "p95": 172.7360039949417, + "p99": 179.23199385404587 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.97599786520004, + "p90": 84.83199775218964, + "p95": 86.94399893283844, + "p99": 90.87999910116196 + }, + "combine": { + "p50": 80.4160013794899, + "p90": 87.99999952316284, + "p95": 88.25600147247314, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 116.73600226640701, + "p90": 140.00000059604645, + "p95": 143.23200285434723, + "p99": 146.94400131702423 + }, + "isolatedSum": { + "p50": 159.39199924468994, + "p90": 172.83199727535248, + "p95": 175.20000040531158, + "p99": 180.63999712467194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -47567,35 +49689,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 85.69599688053131, - "p90": 105.8880016207695, - "p95": 113.63200098276138, - "p99": 147.2959965467453 + "p50": 75.58400183916092, + "p90": 84.6719965338707, + "p95": 86.20800077915192, + "p99": 90.97599983215332 }, "combine": { - "p50": 78.40000092983246, - "p90": 89.85599875450134, - "p95": 95.93600034713745, - "p99": 106.84800148010254 + "p50": 80.19199967384338, + "p90": 88.51200342178345, + "p95": 95.10400146245956, + "p99": 111.77600175142288 }, "roundtrip": { - "p50": 134.62400436401367, - "p90": 154.81600165367126, - "p95": 166.1120057106018, - "p99": 190.0160014629364 + "p50": 143.16800236701965, + "p90": 153.28000485897064, + "p95": 154.7520011663437, + "p99": 170.6240028142929 }, "isolatedSum": { - "p50": 164.09599781036377, - "p90": 195.74400037527084, - "p95": 209.56800132989883, - "p99": 254.14399802684784 + "p50": 155.7760015130043, + "p90": 173.18399995565414, + "p95": 181.31200224161148, + "p99": 202.7520015835762 }, "roundtripMeasured": true, "dispatchLogicalBytes": 19726336, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 3, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.15200018882751, + "p90": 96.67199850082397, + "p95": 98.30400347709656, + "p99": 158.65600109100342 + }, + "combine": { + "p50": 91.20000153779984, + "p90": 105.02400249242783, + "p95": 106.04800283908844, + "p99": 127.87200510501862 + }, + "roundtrip": { + "p50": 151.8079936504364, + "p90": 167.67999529838562, + "p95": 172.06400632858276, + "p99": 198.2399970293045 + }, + "isolatedSum": { + "p50": 176.35200172662735, + "p90": 201.6960009932518, + "p95": 204.352006316185, + "p99": 286.52800619602203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47604,35 +49763,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.76000261306763, - "p90": 139.13600146770477, - "p95": 149.9200016260147, - "p99": 190.94400107860565 + "p50": 104.73600029945374, + "p90": 119.64800208806992, + "p95": 128.03199887275696, + "p99": 401.43999457359314 }, "combine": { - "p50": 114.88000303506851, - "p90": 121.88799679279327, - "p95": 128.1599998474121, - "p99": 155.61600029468536 + "p50": 106.49599879980087, + "p90": 120.83200365304947, + "p95": 121.47200107574463, + "p99": 128.00000607967377 }, "roundtrip": { - "p50": 208.25600624084473, - "p90": 228.57600450515747, - "p95": 237.37600445747375, - "p99": 271.64798974990845 + "p50": 187.45599687099457, + "p90": 201.34399831295013, + "p95": 202.55999267101288, + "p99": 206.68800175189972 }, "isolatedSum": { - "p50": 232.64000564813614, - "p90": 261.02399826049805, - "p95": 278.0800014734268, - "p99": 346.560001373291 + "p50": 211.2319990992546, + "p90": 240.48000574111938, + "p95": 249.5039999485016, + "p99": 529.4400006532669 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47640,37 +49799,38 @@ ] }, { - "id": "cx-6ff3844b", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", - "colorKey": "h200_580d7b05", - "comparisonKey": "46ecc7ff5ccb7c5d", + "id": "cx-76b84ec2", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_17694d2c", + "comparisonKey": "d31efe4aa43e0223", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:26.011362+00:00", + "generatedAt": "2026-06-26T23:47:16.080205+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "label": "H100 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, + "experts": 256, "routing": "uniform", - "routingLabel": "uniform+eplb", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -47692,18 +49852,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "73351bbcd4d02de", + "traceSignature": "ac583971f94b176", "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.078125, - "eplbImbalanceAfter": 1.00048828125, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272020269", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272020269", - "createdAt": "2026-06-27T00:01:03Z", + "id": "28271551406", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271551406", + "createdAt": "2026-06-26T23:47:16.080205+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -47711,35 +49871,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 76.76800340414047, - "p90": 99.23200309276581, - "p95": 110.59200018644333, - "p99": 139.71200585365295 + "p50": 77.7600035071373, + "p90": 84.25600081682205, + "p95": 86.496002972126, + "p99": 92.57599711418152 }, "combine": { - "p50": 68.1919977068901, - "p90": 80.09599894285202, - "p95": 84.06399935483932, - "p99": 98.65599870681763 + "p50": 75.9039968252182, + "p90": 81.95199817419052, + "p95": 82.40000158548355, + "p99": 87.2960016131401 }, "roundtrip": { - "p50": 123.16799908876419, - "p90": 143.90400052070618, - "p95": 155.8080017566681, - "p99": 181.5679967403412 + "p50": 131.45600259304047, + "p90": 136.25599443912506, + "p95": 138.59200477600098, + "p99": 142.68800616264343 }, "isolatedSum": { - "p50": 144.96000111103058, - "p90": 179.32800203561783, - "p95": 194.65599954128265, - "p99": 238.36800456047058 + "p50": 153.6640003323555, + "p90": 166.20799899101257, + "p95": 168.89600455760956, + "p99": 179.87199872732162 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 516096, - "combineLogicalBytes": 516096, - "fanoutMean": 4.5, - "recvTokensMax": 6, - "stragglerRank": 2, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47748,35 +49908,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 76.09599828720093, - "p90": 102.55999863147736, - "p95": 112.70400136709213, - "p99": 138.5599970817566 + "p50": 66.23999774456024, + "p90": 80.99199831485748, + "p95": 83.13599973917007, + "p99": 87.52000331878662 }, "combine": { - "p50": 69.95200365781784, - "p90": 79.83999699354172, - "p95": 83.39200168848038, - "p99": 91.93599969148636 + "p50": 72.06399738788605, + "p90": 81.85599744319916, + "p95": 82.11199939250946, + "p99": 85.91999858617783 }, "roundtrip": { - "p50": 125.791996717453, - "p90": 143.96800100803375, - "p95": 156.67200088500977, - "p99": 176.5120029449463 + "p50": 115.55200070142746, + "p90": 136.06399297714233, + "p95": 137.9839926958084, + "p99": 142.4960047006607 }, "isolatedSum": { - "p50": 146.04800194501877, - "p90": 182.39999562501907, - "p95": 196.0960030555725, - "p99": 230.49599677324295 + "p50": 138.3039951324463, + "p90": 162.84799575805664, + "p95": 165.24799913167953, + "p99": 173.44000190496445 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1089536, - "combineLogicalBytes": 1089536, - "fanoutMean": 4.75, - "recvTokensMax": 11, - "stragglerRank": 1, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47785,35 +49945,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 77.05599814653397, - "p90": 99.0080013871193, - "p95": 106.6880002617836, - "p99": 139.77600634098053 + "p50": 77.60000228881836, + "p90": 81.69600367546082, + "p95": 83.93599838018417, + "p99": 89.02399986982346 }, "combine": { - "p50": 70.04799693822861, - "p90": 82.49600231647491, - "p95": 85.56800335645676, - "p99": 100.09600222110748 + "p50": 79.52000200748444, + "p90": 82.20800012350082, + "p95": 83.16799998283386, + "p99": 87.2960016131401 }, "roundtrip": { - "p50": 130.17599284648895, - "p90": 161.6320013999939, - "p95": 169.24799978733063, - "p99": 194.43200528621674 + "p50": 133.82400572299957, + "p90": 140.86399972438812, + "p95": 143.10400187969208, + "p99": 149.72800016403198 }, "isolatedSum": { - "p50": 147.10399508476257, - "p90": 181.5040037035942, - "p95": 192.25600361824036, - "p99": 239.872008562088 + "p50": 157.1200042963028, + "p90": 163.90400379896164, + "p95": 167.10399836301804, + "p99": 176.32000148296356 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2207744, - "combineLogicalBytes": 2207744, - "fanoutMean": 4.8125, - "recvTokensMax": 23, - "stragglerRank": 1, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47822,35 +49982,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 77.34400033950806, - "p90": 90.94399958848953, - "p95": 97.9200005531311, - "p99": 113.18399757146835 + "p50": 77.66400277614594, + "p90": 83.13599973917007, + "p95": 87.8399983048439, + "p99": 131.67999684810638 }, "combine": { - "p50": 71.19999825954437, - "p90": 79.9039974808693, - "p95": 84.06399935483932, - "p99": 113.02399635314941 + "p50": 81.216000020504, + "p90": 82.71999657154083, + "p95": 84.03199911117554, + "p99": 90.20800143480301 }, "roundtrip": { - "p50": 130.0159990787506, - "p90": 153.08800339698792, - "p95": 165.24800658226013, - "p99": 195.3279972076416 + "p50": 134.68800485134125, + "p90": 139.55199718475342, + "p95": 142.752006649971, + "p99": 145.56799829006195 }, "isolatedSum": { - "p50": 148.54399859905243, - "p90": 170.84799706935883, - "p95": 181.98399990797043, - "p99": 226.20799392461777 + "p50": 158.88000279664993, + "p90": 165.8559963107109, + "p95": 171.87199741601944, + "p99": 221.8879982829094 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4558848, - "combineLogicalBytes": 4558848, - "fanoutMean": 4.96875, - "recvTokensMax": 46, - "stragglerRank": 2, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47859,35 +50019,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 83.48800241947174, - "p90": 106.20799660682678, - "p95": 114.78400230407715, - "p99": 256.0960054397583 + "p50": 77.79199630022049, + "p90": 81.66400343179703, + "p95": 84.73599702119827, + "p99": 87.23200112581253 }, "combine": { - "p50": 72.9919970035553, - "p90": 86.17600053548813, - "p95": 91.51999652385712, - "p99": 108.83200168609619 + "p50": 81.69600367546082, + "p90": 84.79999750852585, + "p95": 88.95999938249588, + "p99": 90.27200192213058 }, "roundtrip": { - "p50": 132.9919993877411, - "p90": 166.24000668525696, - "p95": 176.35199427604675, - "p99": 203.5519927740097 - }, - "isolatedSum": { - "p50": 156.47999942302704, - "p90": 192.3839971423149, - "p95": 206.30399882793427, - "p99": 364.9280071258545 + "p50": 135.29600203037262, + "p90": 143.5839980840683, + "p95": 144.96000111103058, + "p99": 150.30400454998016 + }, + "isolatedSum": { + "p50": 159.4879999756813, + "p90": 166.46400094032288, + "p95": 173.69599640369415, + "p99": 177.50400304794312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9347072, - "combineLogicalBytes": 9347072, - "fanoutMean": 5.09375, - "recvTokensMax": 86, - "stragglerRank": 1, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47896,35 +50056,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 89.1840010881424, - "p90": 104.51199859380722, - "p95": 112.44799941778183, - "p99": 135.5839967727661 + "p50": 83.42400193214417, + "p90": 88.3840024471283, + "p95": 89.28000181913376, + "p99": 95.20000219345093 }, "combine": { - "p50": 79.3600007891655, - "p90": 87.26400136947632, - "p95": 92.73599833250046, - "p99": 111.32799834012985 + "p50": 81.44000172615051, + "p90": 89.9839997291565, + "p95": 90.27200192213058, + "p99": 92.47999638319016 }, "roundtrip": { - "p50": 139.90400731563568, - "p90": 159.2639982700348, - "p95": 169.3439930677414, - "p99": 189.02400135993958 + "p50": 129.18399274349213, + "p90": 144.51199769973755, + "p95": 147.0080018043518, + "p99": 152.73599326610565 }, "isolatedSum": { - "p50": 168.5440018773079, - "p90": 191.77599996328354, - "p95": 205.1839977502823, - "p99": 246.91199511289597 + "p50": 164.86400365829468, + "p90": 178.3680021762848, + "p95": 179.55200374126434, + "p99": 187.67999857664108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 18995200, - "combineLogicalBytes": 18995200, - "fanoutMean": 5.17578125, - "recvTokensMax": 178, - "stragglerRank": 1, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47933,35 +50093,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 98.2080027461052, - "p90": 113.40799927711487, - "p95": 119.99999731779099, - "p99": 140.19200205802917 + "p50": 93.56799721717834, + "p90": 101.40799731016159, + "p95": 102.36799716949463, + "p99": 109.47199910879135 }, "combine": { - "p50": 89.12000060081482, - "p90": 98.7199991941452, - "p95": 102.7199998497963, - "p99": 111.455999314785 + "p50": 94.81599926948547, + "p90": 99.61599856615067, + "p95": 102.33599692583084, + "p99": 105.82400113344193 }, "roundtrip": { - "p50": 162.7199947834015, - "p90": 182.0800006389618, - "p95": 189.60000574588776, - "p99": 210.4640007019043 + "p50": 158.78400206565857, + "p90": 165.72800278663635, + "p95": 167.04000532627106, + "p99": 170.01600563526154 }, "isolatedSum": { - "p50": 187.32800334692, - "p90": 212.12799847126007, - "p95": 222.71999716758728, - "p99": 251.64800137281418 + "p50": 188.38399648666382, + "p90": 201.02399587631226, + "p95": 204.70399409532547, + "p99": 215.29600024223328 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38291456, - "combineLogicalBytes": 38291456, - "fanoutMean": 5.216796875, - "recvTokensMax": 348, - "stragglerRank": 1, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -47970,34 +50130,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.46400117874146, - "p90": 136.51199638843536, - "p95": 143.64799857139587, - "p99": 156.41599893569946 + "p50": 112.41599917411804, + "p90": 120.4800009727478, + "p95": 123.48800152540207, + "p99": 303.6800026893616 }, "combine": { - "p50": 106.33599758148193, - "p90": 117.91999638080597, - "p95": 122.079998254776, - "p99": 132.09599256515503 + "p50": 111.90400272607803, + "p90": 117.34399944543839, + "p95": 120.03199756145477, + "p99": 125.08800625801086 }, "roundtrip": { - "p50": 200.15999674797058, - "p90": 217.72800385951996, - "p95": 223.29600155353546, - "p99": 246.87999486923218 + "p50": 192.80000030994415, + "p90": 199.74400103092194, + "p95": 201.9519954919815, + "p99": 206.9759964942932 }, "isolatedSum": { - "p50": 228.7999987602234, - "p90": 254.43199276924133, - "p95": 265.7279968261719, - "p99": 288.5119915008545 + "p50": 224.32000190019608, + "p90": 237.8240004181862, + "p95": 243.51999908685684, + "p99": 428.76800894737244 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77113344, - "combineLogicalBytes": 77113344, - "fanoutMean": 5.2529296875, - "recvTokensMax": 685, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 0, "correct": true, "samplesPooled": 600, @@ -48006,34 +50166,35 @@ ] }, { - "id": "cx-f68ea439", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h200_b6aa6110", - "comparisonKey": "5971fba5c9d29fa7", + "id": "cx-6f4d88a5", + "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_8abde1a9", + "comparisonKey": "a63125ec759ccc03", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:10.278228+00:00", + "generatedAt": "2026-06-26T23:48:24.132792+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", "backend": "deepep", "phase": "decode", - "mode": "normal", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", + "label": "H100 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -48043,12 +50204,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -48058,8 +50219,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -48067,9 +50228,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272042133", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272042133", - "createdAt": "2026-06-27T00:01:43Z", + "id": "28271587010", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271587010", + "createdAt": "2026-06-26T23:48:24.132792+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -48077,35 +50238,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 71.68000191450119, - "p90": 93.44000369310379, - "p95": 102.68799960613251, - "p99": 140.1599943637848 + "p50": 47.29599878191948, + "p90": 48.70399832725525, + "p95": 49.02400076389313, + "p99": 54.75199967622757 }, "combine": { - "p50": 67.4239993095398, - "p90": 79.45600152015686, - "p95": 86.496002972126, - "p99": 106.01600259542465 + "p50": 36.57599911093712, + "p90": 37.408001720905304, + "p95": 38.59199956059456, + "p99": 44.60800066590309 }, "roundtrip": { - "p50": 119.4240003824234, - "p90": 146.59200608730316, - "p95": 155.07200360298157, - "p99": 181.34400248527527 + "p50": 58.97599831223488, + "p90": 66.6240006685257, + "p95": 67.1359971165657, + "p99": 67.6800012588501 }, "isolatedSum": { - "p50": 139.10400122404099, - "p90": 172.89600521326065, - "p95": 189.18400257825851, - "p99": 246.17599695920944 + "p50": 83.8719978928566, + "p90": 86.11200004816055, + "p95": 87.61600032448769, + "p99": 99.36000034213066 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48114,35 +50275,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.69600236415863, - "p90": 100.92800110578537, - "p95": 109.66400057077408, - "p99": 146.04799449443817 + "p50": 40.32000154256821, + "p90": 48.51200059056282, + "p95": 48.73599857091904, + "p99": 53.82400006055832 }, "combine": { - "p50": 68.28799843788147, - "p90": 80.76799660921097, - "p95": 85.69599688053131, - "p99": 152.8320014476776 + "p50": 35.77600046992302, + "p90": 37.02399879693985, + "p95": 38.94399851560593, + "p99": 44.47999969124794 }, "roundtrip": { - "p50": 121.15199863910675, - "p90": 147.77599275112152, - "p95": 155.71199357509613, - "p99": 193.7599927186966 + "p50": 56.57599866390228, + "p90": 65.05600363016129, + "p95": 66.27199798822403, + "p99": 67.07199662923813 }, "isolatedSum": { - "p50": 141.9840008020401, - "p90": 181.69599771499634, - "p95": 195.3599974513054, - "p99": 298.8799959421158 + "p50": 76.09600201249123, + "p90": 85.53599938750267, + "p95": 87.67999708652496, + "p99": 98.30399975180626 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 6, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48151,35 +50312,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 72.73600250482559, - "p90": 96.12800180912018, - "p95": 103.90400141477585, - "p99": 168.06399822235107 + "p50": 42.27200150489807, + "p90": 48.70399832725525, + "p95": 49.056001007556915, + "p99": 55.39200082421303 }, "combine": { - "p50": 66.91200286149979, - "p90": 78.65600287914276, - "p95": 82.2720006108284, - "p99": 94.71999853849411 + "p50": 36.70400008559227, + "p90": 37.50399872660637, + "p95": 43.07200014591217, + "p99": 45.05600035190582 }, "roundtrip": { - "p50": 118.9119964838028, - "p90": 143.8080072402954, - "p95": 155.71199357509613, - "p99": 209.6959948539734 + "p50": 59.167999774217606, + "p90": 66.880002617836, + "p95": 67.45599955320358, + "p99": 68.57600063085556 }, "isolatedSum": { - "p50": 139.64800536632538, - "p90": 174.78400468826294, - "p95": 186.17600202560425, - "p99": 262.7839967608452 + "p50": 78.97600159049034, + "p90": 86.20799705386162, + "p95": 92.12800115346909, + "p99": 100.44800117611885 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 6, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48188,35 +50349,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.56800138950348, - "p90": 93.82399916648865, - "p95": 101.47199779748917, - "p99": 132.7359974384308 + "p50": 47.359999269247055, + "p90": 48.70399832725525, + "p95": 48.895999789237976, + "p99": 55.26399984955788 }, "combine": { - "p50": 67.6800012588501, - "p90": 79.6160027384758, - "p95": 83.23200047016144, - "p99": 101.21600329875946 + "p50": 36.57599911093712, + "p90": 43.2640016078949, + "p95": 43.776001781225204, + "p99": 45.024000108242035 }, "roundtrip": { - "p50": 119.26399916410446, - "p90": 145.24799585342407, - "p95": 154.4959992170334, - "p99": 191.71200692653656 + "p50": 64.67200070619583, + "p90": 67.10399687290192, + "p95": 67.29599833488464, + "p99": 69.47200000286102 }, "isolatedSum": { - "p50": 141.24800264835358, - "p90": 173.44000190496445, - "p95": 184.7039982676506, - "p99": 233.95200073719025 + "p50": 83.93599838018417, + "p90": 91.96799993515015, + "p95": 92.67200157046318, + "p99": 100.28799995779991 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 5, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48225,35 +50386,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 77.66400277614594, - "p90": 104.12800312042236, - "p95": 114.30399864912033, - "p99": 140.6400054693222 + "p50": 48.448000103235245, + "p90": 55.64799904823303, + "p95": 56.2559999525547, + "p99": 56.89600110054016 }, "combine": { - "p50": 70.8480030298233, - "p90": 84.32000130414963, - "p95": 90.7519981265068, - "p99": 122.27199971675873 + "p50": 43.776001781225204, + "p90": 44.73600164055824, + "p95": 44.89599913358688, + "p99": 48.22399839758873 }, "roundtrip": { - "p50": 125.95200538635254, - "p90": 157.151997089386, - "p95": 166.81599617004395, - "p99": 207.23199844360352 + "p50": 66.880002617836, + "p90": 73.82400333881378, + "p95": 74.68800246715546, + "p99": 75.29599964618683 }, "isolatedSum": { - "p50": 148.51200580596924, - "p90": 188.448004424572, - "p95": 205.05599677562714, - "p99": 262.91200518608093 + "p50": 92.22400188446045, + "p90": 100.38400068879128, + "p95": 101.15199908614159, + "p99": 105.11999949812889 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 5, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48262,35 +50423,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 82.94399827718735, - "p90": 101.56799852848053, - "p95": 110.88000237941742, - "p99": 162.11199760437012 + "p50": 49.60000142455101, + "p90": 56.8000003695488, + "p95": 57.08799883723259, + "p99": 59.167999774217606 }, "combine": { - "p50": 76.31999999284744, - "p90": 87.67999708652496, - "p95": 90.68799763917923, - "p99": 98.33600372076035 + "p50": 51.00800096988678, + "p90": 52.86400020122528, + "p95": 53.0879981815815, + "p99": 53.98400127887726 }, "roundtrip": { - "p50": 135.71199774742126, - "p90": 155.20000457763672, - "p95": 165.6000018119812, - "p99": 222.27199375629425 + "p50": 75.39200037717819, + "p90": 83.26400071382523, + "p95": 83.74399691820145, + "p99": 84.63999629020691 }, "isolatedSum": { - "p50": 159.2639982700348, - "p90": 189.2479956150055, - "p95": 201.56800001859665, - "p99": 260.44800132513046 + "p50": 100.60800239443779, + "p90": 109.66400057077408, + "p95": 110.17599701881409, + "p99": 113.15200105309486 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 5, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48299,35 +50460,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 96.76799923181534, - "p90": 109.11999642848969, - "p95": 116.5120005607605, - "p99": 174.01599884033203 + "p50": 64.92800265550613, + "p90": 67.45599955320358, + "p95": 72.41600006818771, + "p99": 74.0479975938797 }, "combine": { - "p50": 86.17600053548813, - "p90": 97.31200337409973, - "p95": 103.07200253009796, - "p99": 120.64000219106674 + "p50": 61.055999249219894, + "p90": 63.1679967045784, + "p95": 68.54400038719177, + "p99": 77.18399912118912 }, "roundtrip": { - "p50": 160.67199409008026, - "p90": 175.61599612236023, - "p95": 181.40800297260284, - "p99": 218.9439982175827 + "p50": 105.76000064611435, + "p90": 108.67200046777725, + "p95": 109.18399691581726, + "p99": 113.69600147008896 }, "isolatedSum": { - "p50": 182.94399976730347, - "p90": 206.43199980258942, - "p95": 219.58400309085846, - "p99": 294.6560010313988 + "p50": 125.98400190472603, + "p90": 130.62399625778198, + "p95": 140.9600004553795, + "p99": 151.23199671506882 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 6, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48336,35 +50497,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.3360002040863, - "p90": 132.47999548912048, - "p95": 135.51999628543854, - "p99": 155.90399503707886 + "p50": 86.04799956083298, + "p90": 91.71199798583984, + "p95": 92.83199906349182, + "p99": 94.62399780750275 }, "combine": { - "p50": 112.86400258541107, - "p90": 121.8239963054657, - "p95": 126.62400305271149, - "p99": 136.76799833774567 + "p50": 94.36800330877304, + "p90": 96.79999947547913, + "p95": 97.82399982213974, + "p99": 218.78400444984436 }, "roundtrip": { - "p50": 214.52799439430237, - "p90": 232.92799293994904, - "p95": 243.42399835586548, - "p99": 306.97599053382874 + "p50": 152.8960019350052, + "p90": 158.91200304031372, + "p95": 159.67999398708344, + "p99": 163.2000058889389 }, "isolatedSum": { - "p50": 235.20000278949738, - "p90": 254.30399179458618, - "p95": 262.14399933815, - "p99": 292.6719933748245 + "p50": 180.41600286960602, + "p90": 188.51199746131897, + "p95": 190.65599888563156, + "p99": 313.4080022573471 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 5, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48372,34 +50533,35 @@ ] }, { - "id": "cx-9e42f709", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", - "colorKey": "h200_b6aa6110", - "comparisonKey": "5971fba5c9d29fa7", + "id": "cx-fecf5035", + "identity": "h100|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_45e1ef29", + "comparisonKey": "b17b52153b29fbde", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:48.444120+00:00", + "generatedAt": "2026-06-26T23:48:28.951078+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", "backend": "deepep", "phase": "decode", - "mode": "normal", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", + "label": "H100 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -48409,12 +50571,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -48424,8 +50586,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "1fa7fe74d0e30a3", - "workloadId": "set:4:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -48433,9 +50595,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271844665", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271844665", - "createdAt": "2026-06-26T23:55:26Z", + "id": "28271590306", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271590306", + "createdAt": "2026-06-26T23:48:28.951078+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -48443,35 +50605,109 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 71.3919997215271, - "p90": 101.1200025677681, - "p95": 115.1999980211258, - "p99": 144.44799721240997 + "p50": 42.17600077390671, + "p90": 48.928000032901764, + "p95": 49.8879998922348, + "p99": 51.77599936723709 }, "combine": { - "p50": 64.4799992442131, - "p90": 82.78399705886841, - "p95": 91.48799628019333, - "p99": 104.67199981212616 + "p50": 36.99199855327606, + "p90": 38.176000118255615, + "p95": 38.40000182390213, + "p99": 44.03200000524521 }, "roundtrip": { - "p50": 117.98399686813354, - "p90": 156.22399747371674, - "p95": 165.3120070695877, - "p99": 193.12000274658203 + "p50": 59.42400172352791, + "p90": 61.216000467538834, + "p95": 61.63199990987778, + "p99": 69.31199878454208 }, "isolatedSum": { - "p50": 135.8719989657402, - "p90": 183.9039996266365, - "p95": 206.68799430131912, - "p99": 249.11999702453613 + "p50": 79.16799932718277, + "p90": 87.10400015115738, + "p95": 88.28800171613693, + "p99": 95.8079993724823 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.30400174856186, + "p90": 49.375999718904495, + "p95": 49.95200037956238, + "p99": 51.80799961090088 + }, + "combine": { + "p50": 38.11199963092804, + "p90": 39.0079990029335, + "p95": 39.84000161290169, + "p99": 45.9199994802475 + }, + "roundtrip": { + "p50": 60.47999858856201, + "p90": 61.69600039720535, + "p95": 63.90400230884552, + "p99": 69.21599805355072 + }, + "isolatedSum": { + "p50": 80.4160013794899, + "p90": 88.383998721838, + "p95": 89.79200199246407, + "p99": 97.72799909114838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.367998510599136, + "p90": 49.855999648571014, + "p95": 50.20799860358238, + "p99": 57.95200169086456 + }, + "combine": { + "p50": 37.47199848294258, + "p90": 38.7520007789135, + "p95": 39.03999924659729, + "p99": 46.30399867892265 + }, + "roundtrip": { + "p50": 59.26400050520897, + "p90": 61.983998864889145, + "p95": 63.19999694824219, + "p99": 69.50400024652481 + }, + "isolatedSum": { + "p50": 79.83999699354172, + "p90": 88.60800042748451, + "p95": 89.24799785017967, + "p99": 104.25600036978722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48480,34 +50716,71 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.08799773454666, - "p90": 95.77599912881851, - "p95": 105.66399991512299, - "p99": 147.32800424098969 + "p50": 42.81599819660187, + "p90": 49.247998744249344, + "p95": 49.855999648571014, + "p99": 51.42400041222572 }, "combine": { - "p50": 67.6800012588501, - "p90": 82.59200304746628, - "p95": 89.02399986982346, - "p99": 108.64000022411346 + "p50": 37.9519984126091, + "p90": 38.784001022577286, + "p95": 40.352001786231995, + "p99": 46.39999940991402 }, "roundtrip": { - "p50": 121.2799996137619, - "p90": 152.63999998569489, - "p95": 167.4560010433197, - "p99": 201.7280012369156 + "p50": 60.63999980688095, + "p90": 68.35199892520905, + "p95": 68.80000233650208, + "p99": 69.88800317049026 }, "isolatedSum": { - "p50": 140.76799899339676, - "p90": 178.3680021762848, - "p95": 194.68799978494644, - "p99": 255.96800446510315 + "p50": 80.76799660921097, + "p90": 88.03199976682663, + "p95": 90.20800143480301, + "p99": 97.82399982213974 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 43.74400153756142, + "p90": 50.23999884724617, + "p95": 50.84799975156784, + "p99": 57.18399956822395 + }, + "combine": { + "p50": 38.2080003619194, + "p90": 45.791998505592346, + "p95": 46.08000069856644, + "p99": 49.056001007556915 + }, + "roundtrip": { + "p50": 66.91200286149979, + "p90": 69.15199756622314, + "p95": 69.98399645090103, + "p99": 76.7040029168129 + }, + "isolatedSum": { + "p50": 81.95200189948082, + "p90": 96.03199735283852, + "p95": 96.92800045013428, + "p99": 106.24000057578087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -48517,34 +50790,71 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.66400343179703, - "p90": 98.30400347709656, - "p95": 109.98400300741196, - "p99": 134.14399325847626 + "p50": 50.464000552892685, + "p90": 52.352000027894974, + "p95": 57.023998349905014, + "p99": 59.90400165319443 }, "combine": { - "p50": 76.31999999284744, - "p90": 89.21600133180618, - "p95": 95.90400010347366, - "p99": 118.6240017414093 + "p50": 46.68800160288811, + "p90": 48.128001391887665, + "p95": 49.056001007556915, + "p99": 54.84800040721893 }, "roundtrip": { - "p50": 136.00000739097595, - "p90": 157.53600001335144, - "p95": 172.7360039949417, - "p99": 212.25599944591522 + "p50": 76.76800340414047, + "p90": 84.44800227880478, + "p95": 85.21600067615509, + "p99": 86.30400151014328 }, "isolatedSum": { - "p50": 157.98400342464447, - "p90": 187.52000480890274, - "p95": 205.88800311088562, - "p99": 252.76799499988556 + "p50": 97.15200215578079, + "p90": 100.48000141978264, + "p95": 106.07999935746193, + "p99": 114.75200206041336 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 61.69600039720535, + "p90": 66.6240006685257, + "p95": 67.55200028419495, + "p99": 73.7600028514862 + }, + "combine": { + "p50": 62.17600032687187, + "p90": 63.551999628543854, + "p95": 64.06400352716446, + "p99": 70.49600034952164 + }, + "roundtrip": { + "p50": 102.11200267076492, + "p90": 109.8560020327568, + "p95": 110.27199774980545, + "p99": 111.39199882745743 + }, + "isolatedSum": { + "p50": 123.87200072407722, + "p90": 130.17600029706955, + "p95": 131.6160038113594, + "p99": 144.25600320100784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -48554,34 +50864,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.52800166606903, - "p90": 136.6720050573349, - "p95": 148.15999567508698, - "p99": 160.89600324630737 + "p50": 85.56800335645676, + "p90": 89.50400352478027, + "p95": 90.14400094747543, + "p99": 95.45599669218063 }, "combine": { - "p50": 112.03200370073318, - "p90": 125.21600723266602, - "p95": 132.4480026960373, - "p99": 149.02399480342865 + "p50": 91.45600348711014, + "p90": 99.16800260543823, + "p95": 99.80800002813339, + "p99": 101.05600208044052 }, "roundtrip": { - "p50": 211.58400177955627, - "p90": 233.2800030708313, - "p95": 244.159996509552, - "p99": 292.03200340270996 + "p50": 158.52800011634827, + "p90": 164.60800170898438, + "p95": 166.52800142765045, + "p99": 168.38400065898895 }, "isolatedSum": { - "p50": 234.56000536680222, - "p90": 261.8880122900009, - "p95": 280.60799837112427, - "p99": 309.919998049736 + "p50": 177.0240068435669, + "p90": 188.6720061302185, + "p95": 189.95200097560883, + "p99": 196.51199877262115 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -48590,34 +50900,35 @@ ] }, { - "id": "cx-b1823392", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", - "colorKey": "h200_c5b3365a", - "comparisonKey": "73e84f1c938d90c0", + "id": "cx-f1655975", + "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_81ce2214", + "comparisonKey": "16f06985ac4d7bde", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:44.997855+00:00", + "generatedAt": "2026-06-26T17:31:24.570568+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "label": "H100 EP8 · deepep · bf16 LL (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -48626,13 +50937,13 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -48642,8 +50953,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "22da8b58646609c", - "workloadId": "set:8:6b84350720aa8233", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -48651,44 +50962,44 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272086516", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272086516", - "createdAt": "2026-06-27T00:03:05Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254350430", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254350430", + "createdAt": "2026-06-26T17:31:24.570568+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 64.92800265550613, - "p90": 98.62399846315384, - "p95": 109.27999764680862, - "p99": 182.23999440670013 + "p50": 42.49599948525429, + "p90": 50.27199909090996, + "p95": 50.87999999523163, + "p99": 57.920001447200775 }, "combine": { - "p50": 60.92799827456474, - "p90": 75.42400062084198, - "p95": 80.6720033288002, - "p99": 96.54399752616882 + "p50": 37.98399865627289, + "p90": 39.135999977588654, + "p95": 45.3759990632534, + "p99": 46.911999583244324 }, "roundtrip": { - "p50": 116.57600104808807, - "p90": 152.44799852371216, - "p95": 162.81600296497345, - "p99": 179.51999604701996 + "p50": 60.83200126886368, + "p90": 62.272001057863235, + "p95": 67.90400296449661, + "p99": 69.88800317049026 }, "isolatedSum": { - "p50": 125.85600093007088, - "p90": 174.04799908399582, - "p95": 189.95200097560883, - "p99": 278.78399193286896 + "p50": 80.47999814152718, + "p90": 89.40799906849861, + "p95": 96.25599905848503, + "p99": 104.8320010304451 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -48698,35 +51009,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 65.50399959087372, - "p90": 96.47999703884125, - "p95": 104.86400127410889, - "p99": 137.56799697875977 + "p50": 49.02400076389313, + "p90": 50.40000006556511, + "p95": 50.87999999523163, + "p99": 57.11999908089638 }, "combine": { - "p50": 59.808000922203064, - "p90": 72.83200323581696, - "p95": 78.84799689054489, - "p99": 92.19200164079666 + "p50": 38.2080003619194, + "p90": 38.84800150990486, + "p95": 39.64800015091896, + "p99": 45.85599899291992 }, "roundtrip": { - "p50": 110.97600311040878, - "p90": 140.00000059604645, - "p95": 150.87999403476715, - "p99": 177.72799730300903 + "p50": 61.216000467538834, + "p90": 67.84000247716904, + "p95": 68.9919963479042, + "p99": 69.88800317049026 }, "isolatedSum": { - "p50": 125.31200051307678, - "p90": 169.3120002746582, - "p95": 183.71199816465378, - "p99": 229.75999861955643 + "p50": 87.23200112581253, + "p90": 89.24800157546997, + "p95": 90.52800014615059, + "p99": 102.9759980738163 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 5, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48735,34 +51046,34 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 73.18399846553802, - "p90": 93.63199770450592, - "p95": 108.67200046777725, - "p99": 126.01600587368011 + "p50": 42.75200143456459, + "p90": 50.04800111055374, + "p95": 50.52800104022026, + "p99": 57.88800120353699 }, "combine": { - "p50": 62.20800057053566, - "p90": 70.52800059318542, - "p95": 78.07999849319458, - "p99": 100.51199793815613 + "p50": 37.9519984126091, + "p90": 38.84800150990486, + "p95": 40.44799879193306, + "p99": 46.52800038456917 }, "roundtrip": { - "p50": 116.67200177907944, - "p90": 144.1279947757721, - "p95": 158.91200304031372, - "p99": 186.17600202560425 + "p50": 60.736000537872314, + "p90": 62.431998550891876, + "p95": 67.9360032081604, + "p99": 70.0799971818924 }, "isolatedSum": { - "p50": 135.39199903607368, - "p90": 164.15999829769135, - "p95": 186.75199896097183, - "p99": 226.52800381183624 + "p50": 80.70399984717369, + "p90": 88.8960026204586, + "p95": 90.97599983215332, + "p99": 104.41600158810616 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -48772,34 +51083,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 72.64000177383423, - "p90": 98.39999675750732, - "p95": 103.93600165843964, - "p99": 132.28799402713776 + "p50": 49.12000149488449, + "p90": 50.36799982190132, + "p95": 50.783999264240265, + "p99": 56.44800141453743 }, "combine": { - "p50": 60.99199876189232, - "p90": 72.06399738788605, - "p95": 79.52000200748444, - "p99": 91.5519967675209 + "p50": 38.2080003619194, + "p90": 39.8080013692379, + "p95": 44.89599913358688, + "p99": 46.23999819159508 }, "roundtrip": { - "p50": 118.94399672746658, - "p90": 150.30400454998016, - "p95": 160.3199988603592, - "p99": 178.78399789333344 + "p50": 61.08799949288368, + "p90": 68.54400038719177, + "p95": 69.023996591568, + "p99": 70.01599669456482 }, "isolatedSum": { - "p50": 133.63200053572655, - "p90": 170.46399414539337, - "p95": 183.45600366592407, - "p99": 223.83999079465866 + "p50": 87.3280018568039, + "p90": 90.17600119113922, + "p95": 95.67999839782715, + "p99": 102.68799960613251 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -48809,35 +51120,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 72.4480003118515, - "p90": 85.9839990735054, - "p95": 96.99200093746185, - "p99": 122.17599898576736 + "p50": 49.536000937223434, + "p90": 50.783999264240265, + "p95": 52.73599922657013, + "p99": 58.079998940229416 }, "combine": { - "p50": 67.10399687290192, - "p90": 77.11999863386154, - "p95": 83.74399691820145, - "p99": 104.16000336408615 + "p50": 45.24800181388855, + "p90": 46.431999653577805, + "p95": 46.68800160288811, + "p99": 48.48000034689903 }, "roundtrip": { - "p50": 118.40000003576279, - "p90": 138.11199367046356, - "p95": 145.11999487876892, - "p99": 157.18400478363037 + "p50": 68.67200136184692, + "p90": 70.30399888753891, + "p95": 75.42400062084198, + "p99": 77.504001557827 }, "isolatedSum": { - "p50": 139.55199718475342, - "p90": 163.10399770736694, - "p95": 180.7359978556633, - "p99": 226.33600234985352 + "p50": 94.78400275111198, + "p90": 97.21599891781807, + "p95": 99.42400082945824, + "p99": 106.55999928712845 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48846,34 +51157,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 76.86399668455124, - "p90": 97.31200337409973, - "p95": 106.36799782514572, - "p99": 120.25599926710129 + "p50": 50.52800104022026, + "p90": 57.5999990105629, + "p95": 58.079998940229416, + "p99": 58.97599831223488 }, "combine": { - "p50": 69.47200000286102, - "p90": 82.78399705886841, - "p95": 87.80799806118011, - "p99": 102.9760017991066 + "p50": 46.592000871896744, + "p90": 53.568001836538315, + "p95": 54.207999259233475, + "p99": 55.10399863123894 }, "roundtrip": { - "p50": 128.25599312782288, - "p90": 152.63999998569489, - "p95": 163.10399770736694, - "p99": 197.37599790096283 + "p50": 77.56800204515457, + "p90": 85.34400165081024, + "p95": 85.79199761152267, + "p99": 86.496002972126 }, "isolatedSum": { - "p50": 146.33599668741226, - "p90": 180.09600043296814, - "p95": 194.17599588632584, - "p99": 223.23200106620789 + "p50": 97.120001912117, + "p90": 111.16800084710121, + "p95": 112.28799819946289, + "p99": 114.07999694347382 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -48883,35 +51194,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 95.8079993724823, - "p90": 111.68000102043152, - "p95": 120.99199742078781, - "p99": 207.61600136756897 + "p50": 66.01600348949432, + "p90": 107.35999792814255, + "p95": 108.06400328874588, + "p99": 109.40799862146378 }, "combine": { - "p50": 81.53600245714188, - "p90": 93.75999867916107, - "p95": 102.24000364542007, - "p99": 131.1360001564026 + "p50": 62.52799928188324, + "p90": 63.93600255250931, + "p95": 65.85600227117538, + "p99": 79.29600030183792 }, "roundtrip": { - "p50": 155.96799552440643, - "p90": 171.23199999332428, - "p95": 179.9360066652298, - "p99": 195.93599438667297 + "p50": 102.39999741315842, + "p90": 110.1439967751503, + "p95": 110.68800091743469, + "p99": 112.89600282907486 }, "isolatedSum": { - "p50": 177.34400182962418, - "p90": 205.4399996995926, - "p95": 223.23200106620789, - "p99": 338.75200152397156 + "p50": 128.54400277137756, + "p90": 171.29600048065186, + "p95": 173.92000555992126, + "p99": 188.7039989233017 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11210752, - "combineLogicalBytes": 11210752, - "fanoutMean": 1.52734375, - "recvTokensMax": 512, - "stragglerRank": 5, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -48920,34 +51231,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.49600076675415, - "p90": 133.40799510478973, - "p95": 137.472003698349, - "p99": 168.09600591659546 + "p50": 87.2960016131401, + "p90": 90.91199934482574, + "p95": 94.08000111579895, + "p99": 95.51999717950821 }, "combine": { - "p50": 108.51199924945831, - "p90": 121.37600034475327, - "p95": 125.18399953842163, - "p99": 135.74400544166565 + "p50": 88.86399865150452, + "p90": 95.64799815416336, + "p95": 96.3520035147667, + "p99": 97.43999689817429 }, "roundtrip": { - "p50": 205.76000213623047, - "p90": 222.78399765491486, - "p95": 227.84000635147095, - "p99": 288.2879972457886 + "p50": 153.21600437164307, + "p90": 159.90400314331055, + "p95": 160.67199409008026, + "p99": 161.95200383663177 }, "isolatedSum": { - "p50": 227.00800001621246, - "p90": 254.783995449543, - "p95": 262.65600323677063, - "p99": 303.8400113582611 + "p50": 176.16000026464462, + "p90": 186.5599974989891, + "p95": 190.43200463056564, + "p99": 192.9599940776825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -48956,16 +51267,16 @@ ] }, { - "id": "cx-1cebdc77", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", - "colorKey": "h200_c5b3365a", - "comparisonKey": "73e84f1c938d90c0", + "id": "cx-075b23a8", + "identity": "h100|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_a96c99f3", + "comparisonKey": "b300aeac7d2a6068", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:04.169845+00:00", + "generatedAt": "2026-06-27T11:15:32.751842+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -48973,21 +51284,22 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "label": "H100 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -49008,8 +51320,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "47fddabb3277bec", - "workloadId": "set:4:6b84350720aa8233", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -49017,411 +51329,193 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271852422", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271852422", - "createdAt": "2026-06-26T23:55:40Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287505969", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287505969", + "createdAt": "2026-06-27T11:15:32.751842+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 70.27199864387512, - "p90": 103.64799946546555, - "p95": 125.791996717453, - "p99": 208.15999805927277 + "p50": 93.34400296211243, + "p90": 97.15200215578079, + "p95": 99.2640033364296, + "p99": 102.20800340175629 }, "combine": { - "p50": 61.95199862122536, - "p90": 75.45600086450577, - "p95": 80.6720033288002, - "p99": 99.07200187444687 + "p50": 60.15999987721443, + "p90": 61.63199990987778, + "p95": 63.07200342416763, + "p99": 68.25599819421768 }, "roundtrip": { - "p50": 117.37599968910217, - "p90": 144.83200013637543, - "p95": 152.73599326610565, - "p99": 179.58399653434753 + "p50": 174.97600615024567, + "p90": 179.55200374126434, + "p95": 182.40000307559967, + "p99": 185.5359971523285 }, "isolatedSum": { - "p50": 132.22399726510048, - "p90": 179.1040003299713, - "p95": 206.4640000462532, - "p99": 307.23199993371964 + "p50": 153.50400283932686, + "p90": 158.78400206565857, + "p95": 162.33600676059723, + "p99": 170.46400159597397 }, "roundtripMeasured": true, "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 4, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 75.26399940252304, - "p90": 98.11200201511383, - "p95": 106.175996363163, - "p99": 138.3039951324463 + "p50": 70.3359991312027, + "p90": 95.0080007314682, + "p95": 98.11200201511383, + "p99": 103.4879982471466 }, "combine": { - "p50": 63.90400230884552, - "p90": 78.43200117349625, - "p95": 83.99999886751175, - "p99": 94.11200135946274 + "p50": 53.18399891257286, + "p90": 61.11999973654747, + "p95": 61.69600039720535, + "p99": 64.19199705123901 }, "roundtrip": { - "p50": 119.48800086975098, - "p90": 151.16800367832184, - "p95": 161.53599321842194, - "p99": 214.4320011138916 + "p50": 145.4080045223236, + "p90": 176.70400440692902, + "p95": 179.26399409770966, + "p99": 185.44000387191772 }, "isolatedSum": { - "p50": 139.16800171136856, - "p90": 176.54400318861008, - "p95": 190.17599523067474, - "p99": 232.41599649190903 + "p50": 123.51999804377556, + "p90": 156.12800046801567, + "p95": 159.80800241231918, + "p99": 167.67999529838562 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 6, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 82.17599987983704, - "p90": 105.98400235176086, - "p95": 113.11999708414078, - "p99": 133.18400084972382 + "p50": 74.68800246715546, + "p90": 98.04800152778625, + "p95": 100.0640019774437, + "p99": 110.97600311040878 }, "combine": { - "p50": 72.15999811887741, - "p90": 88.76799792051315, - "p95": 93.28000247478485, - "p99": 116.57600104808807 + "p50": 52.191998809576035, + "p90": 62.431998550891876, + "p95": 63.1679967045784, + "p99": 67.52000004053116 }, "roundtrip": { - "p50": 134.49600338935852, - "p90": 162.432000041008, - "p95": 173.47200214862823, - "p99": 268.8640058040619 + "p50": 145.9520012140274, + "p90": 179.77599799633026, + "p95": 183.07200074195862, + "p99": 188.06399405002594 }, "isolatedSum": { - "p50": 154.33599799871445, - "p90": 194.75200027227402, - "p95": 206.39999955892563, - "p99": 249.7600018978119 + "p50": 126.88000127673149, + "p90": 160.48000007867813, + "p95": 163.2319986820221, + "p99": 178.49600315093994 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 1, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 119.58400160074234, - "p90": 131.96800649166107, - "p95": 136.3839954137802, - "p99": 154.59200739860535 + "p50": 70.23999840021133, + "p90": 96.12800180912018, + "p95": 98.43199700117111, + "p99": 103.42399775981903 }, "combine": { - "p50": 109.31199789047241, - "p90": 120.67200243473053, - "p95": 125.69600343704224, - "p99": 135.3919953107834 + "p50": 53.75999957323074, + "p90": 62.752000987529755, + "p95": 63.87200206518173, + "p99": 71.87200337648392 }, "roundtrip": { - "p50": 207.58399367332458, - "p90": 222.91199862957, - "p95": 232.86400735378265, - "p99": 284.89598631858826 + "p50": 146.2399959564209, + "p90": 179.83999848365784, + "p95": 182.81599879264832, + "p99": 186.71999871730804 }, "isolatedSum": { - "p50": 228.89599949121475, - "p90": 252.6400089263916, - "p95": 262.07999885082245, - "p99": 289.98400270938873 + "p50": 123.99999797344208, + "p90": 158.88000279664993, + "p95": 162.30399906635284, + "p99": 175.29600113630295 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 6, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-78ae7872", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", - "colorKey": "h200_06aa1194", - "comparisonKey": "85dbd46cb77d1362", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:54.232728+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5a3054422534366", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.40625, - "eplbImbalanceAfter": 1.0004417782738093, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272090308", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272090308", - "createdAt": "2026-06-27T00:03:13Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 69.21599805355072, - "p90": 83.55200290679932, - "p95": 92.83199906349182, - "p99": 110.75200140476227 + "p50": 93.85599941015244, + "p90": 98.14400225877762, + "p95": 100.832000374794, + "p99": 104.60799932479858 }, "combine": { - "p50": 67.45599955320358, - "p90": 76.12799853086472, - "p95": 81.53600245714188, - "p99": 88.54400366544724 + "p50": 62.144000083208084, + "p90": 63.80800157785416, + "p95": 65.08799642324448, + "p99": 69.24799829721451 }, "roundtrip": { - "p50": 122.079998254776, - "p90": 140.4159963130951, - "p95": 148.25600385665894, - "p99": 178.3680021762848 + "p50": 147.2959965467453, + "p90": 180.7679980993271, + "p95": 184.86399948596954, + "p99": 189.82400000095367 }, "isolatedSum": { - "p50": 136.6719976067543, - "p90": 159.68000143766403, - "p95": 174.3680015206337, - "p99": 199.2960050702095 + "p50": 155.99999949336052, + "p90": 161.95200383663177, + "p95": 165.91999679803848, + "p99": 173.8559976220131 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 559104, - "combineLogicalBytes": 559104, - "fanoutMean": 4.875, - "recvTokensMax": 6, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.03199714422226, - "p90": 80.57600259780884, - "p95": 86.40000224113464, - "p99": 97.34400361776352 - }, - "combine": { - "p50": 67.61600077152252, - "p90": 75.13599842786789, - "p95": 79.0719985961914, - "p99": 86.40000224113464 - }, - "roundtrip": { - "p50": 120.7360029220581, - "p90": 138.49599659442902, - "p95": 162.01600432395935, - "p99": 265.21599292755127 - }, - "isolatedSum": { - "p50": 139.64799791574478, - "p90": 155.71200102567673, - "p95": 165.47200083732605, - "p99": 183.74400585889816 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.30399954319, - "p90": 86.91199868917465, - "p95": 100.12800246477127, - "p99": 123.48800152540207 - }, - "combine": { - "p50": 68.51200014352798, - "p90": 77.85599678754807, - "p95": 84.70399677753448, - "p99": 112.15999722480774 - }, - "roundtrip": { - "p50": 121.31199985742569, - "p90": 140.25600254535675, - "p95": 151.64799988269806, - "p99": 177.66399681568146 - }, - "isolatedSum": { - "p50": 142.815999686718, - "p90": 164.76799547672272, - "p95": 184.83199924230576, - "p99": 235.6479987502098 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2465792, - "combineLogicalBytes": 2465792, - "fanoutMean": 5.375, - "recvTokensMax": 25, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.65600222349167, - "p90": 88.28800171613693, - "p95": 97.98400104045868, - "p99": 121.2799996137619 - }, - "combine": { - "p50": 69.56800073385239, - "p90": 78.87999713420868, - "p95": 83.16799998283386, - "p99": 94.84799951314926 - }, - "roundtrip": { - "p50": 126.36800110340118, - "p90": 164.57599401474, - "p95": 172.44799435138702, - "p99": 196.22400403022766 - }, - "isolatedSum": { - "p50": 144.22400295734406, - "p90": 167.1679988503456, - "p95": 181.15200102329254, - "p99": 216.12799912691116 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4988928, - "combineLogicalBytes": 4988928, - "fanoutMean": 5.4375, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.53600114583969, - "p90": 88.0960002541542, - "p95": 96.03200107812881, - "p99": 121.2799996137619 - }, - "combine": { - "p50": 70.39999961853027, - "p90": 78.91199737787247, - "p95": 86.36800199747086, - "p99": 98.9760011434555 - }, - "roundtrip": { - "p50": 125.47199428081512, - "p90": 143.96800100803375, - "p95": 153.6960005760193, - "p99": 172.8000044822693 - }, - "isolatedSum": { - "p50": 143.93600076436996, - "p90": 167.00799763202667, - "p95": 182.40000307559967, - "p99": 220.2560007572174 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9791488, - "combineLogicalBytes": 9791488, - "fanoutMean": 5.3359375, - "recvTokensMax": 94, - "stragglerRank": 5, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49430,35 +51524,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.02399855852127, - "p90": 94.71999853849411, - "p95": 106.11200332641602, - "p99": 144.6399986743927 + "p50": 71.10399752855301, + "p90": 95.10400146245956, + "p95": 97.75999933481216, + "p99": 105.92000186443329 }, "combine": { - "p50": 76.7040029168129, - "p90": 88.54400366544724, - "p95": 96.76799923181534, - "p99": 107.00800269842148 + "p50": 57.95200169086456, + "p90": 66.84800237417221, + "p95": 67.4239993095398, + "p99": 71.74400240182877 }, "roundtrip": { - "p50": 135.29600203037262, - "p90": 158.78400206565857, - "p95": 170.84799706935883, - "p99": 241.43999814987183 + "p50": 150.9760022163391, + "p90": 184.25600230693817, + "p95": 188.7039989233017, + "p99": 192.80000030994415 }, "isolatedSum": { - "p50": 157.72800147533417, - "p90": 183.26400220394135, - "p95": 202.88000255823135, - "p99": 251.64800137281418 + "p50": 129.05599921941757, + "p90": 161.95200383663177, + "p95": 165.18399864435196, + "p99": 177.66400426626205 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19410944, - "combineLogicalBytes": 19410944, - "fanoutMean": 5.2890625, - "recvTokensMax": 178, - "stragglerRank": 4, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49467,35 +51561,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 92.96000003814697, - "p90": 105.47199845314026, - "p95": 115.80800265073776, - "p99": 153.56799960136414 + "p50": 74.43200051784515, + "p90": 96.54399752616882, + "p95": 100.5759984254837, + "p99": 110.75200140476227 }, "combine": { - "p50": 86.87999844551086, - "p90": 96.03200107812881, - "p95": 102.33599692583084, - "p99": 112.67200112342834 + "p50": 66.17599725723267, + "p90": 75.39200037717819, + "p95": 76.22399926185608, + "p99": 80.79999685287476 }, "roundtrip": { - "p50": 158.4320068359375, - "p90": 171.26399278640747, - "p95": 179.967999458313, - "p99": 206.43199980258942 + "p50": 158.75199437141418, + "p90": 192.51200556755066, + "p95": 196.19199633598328, + "p99": 201.6959935426712 }, "isolatedSum": { - "p50": 179.83999848365784, - "p90": 201.50399953126907, - "p95": 218.1439995765686, - "p99": 266.2400007247925 + "p50": 140.60799777507782, + "p90": 171.93599790334702, + "p95": 176.79999768733978, + "p99": 191.55199825763702 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38678528, - "combineLogicalBytes": 38678528, - "fanoutMean": 5.26953125, - "recvTokensMax": 360, - "stragglerRank": 4, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49504,35 +51598,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.18399822711945, - "p90": 127.68000364303589, - "p95": 131.3599944114685, - "p99": 140.44800400733948 + "p50": 80.44800162315369, + "p90": 96.67199850082397, + "p95": 99.23200309276581, + "p99": 107.04000294208527 }, "combine": { - "p50": 104.3199971318245, - "p90": 113.76000195741653, - "p95": 121.98399752378464, - "p99": 137.28000223636627 + "p50": 78.3040001988411, + "p90": 88.79999816417694, + "p95": 89.63199704885483, + "p99": 92.83199906349182 }, "roundtrip": { - "p50": 196.28800451755524, - "p90": 208.95999670028687, - "p95": 216.5759950876236, - "p99": 241.56799912452698 + "p50": 173.21600019931793, + "p90": 207.519993185997, + "p95": 211.13599836826324, + "p99": 220.64000368118286 }, "isolatedSum": { - "p50": 221.50399535894394, - "p90": 241.44000560045242, - "p95": 253.34399193525314, - "p99": 277.72800624370575 + "p50": 158.75200182199478, + "p90": 185.47199666500092, + "p95": 188.86400014162064, + "p99": 199.8720020055771 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 77285376, - "fanoutMean": 5.2646484375, - "recvTokensMax": 704, - "stragglerRank": 5, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49540,38 +51634,39 @@ ] }, { - "id": "cx-4fa5aaad", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", - "colorKey": "h200_6a794fcd", - "comparisonKey": "50f5858697d33730", + "id": "cx-1bb82fc0", + "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_97196257", + "comparisonKey": "efcc4c7d487df84c", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:36.902996+00:00", + "generatedAt": "2026-06-26T23:51:08.338542+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "label": "H100 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -49592,8 +51687,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f3df51be7d5c32b", - "workloadId": "set:8:289b7f9c14292e96", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -49601,9 +51696,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272056705", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272056705", - "createdAt": "2026-06-27T00:02:10Z", + "id": "28271676478", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271676478", + "createdAt": "2026-06-26T23:51:08.338542+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -49611,35 +51706,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 73.98399710655212, - "p90": 98.84800016880035, - "p95": 105.98400235176086, - "p99": 125.21600723266602 + "p50": 184.7359985113144, + "p90": 193.08799505233765, + "p95": 196.86399400234222, + "p99": 204.25599813461304 }, "combine": { - "p50": 68.96000355482101, - "p90": 81.66400343179703, - "p95": 86.496002972126, - "p99": 102.88000106811523 + "p50": 49.79199916124344, + "p90": 51.96800082921982, + "p95": 53.79199981689453, + "p99": 56.86400085687637 }, "roundtrip": { - "p50": 119.93599683046341, - "p90": 147.93600142002106, - "p95": 157.53600001335144, - "p99": 168.09600591659546 + "p50": 218.9760059118271, + "p90": 226.52800381183624, + "p95": 230.0799936056137, + "p99": 235.6480062007904 }, "isolatedSum": { - "p50": 142.94400066137314, - "p90": 180.51200360059738, - "p95": 192.48000532388687, - "p99": 228.09600830078125 + "p50": 234.52799767255783, + "p90": 245.05599588155746, + "p95": 250.65599381923676, + "p99": 261.1199989914894 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, - "recvTokensMax": 8, - "stragglerRank": 3, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49648,35 +51743,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.08799773454666, - "p90": 88.73599767684937, - "p95": 100.09600222110748, - "p99": 118.20799857378006 + "p50": 183.87199938297272, + "p90": 192.19200313091278, + "p95": 195.16800343990326, + "p99": 201.56799256801605 }, "combine": { - "p50": 68.35199892520905, - "p90": 77.08799839019775, - "p95": 82.84799754619598, - "p99": 91.61599725484848 + "p50": 50.87999999523163, + "p90": 54.17599901556969, + "p95": 55.67999929189682, + "p99": 59.328000992536545 }, "roundtrip": { - "p50": 123.3920007944107, - "p90": 151.296004652977, - "p95": 158.84800255298615, - "p99": 186.27199530601501 + "p50": 220.12799978256226, + "p90": 227.87199914455414, + "p95": 230.43200373649597, + "p99": 237.31200397014618 }, "isolatedSum": { - "p50": 141.4399966597557, - "p90": 165.82399606704712, - "p95": 182.94399976730347, - "p99": 209.82399582862854 + "p50": 234.75199937820435, + "p90": 246.36800214648247, + "p95": 250.84800273180008, + "p99": 260.8959935605526 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1103872, - "combineLogicalBytes": 1103872, - "fanoutMean": 4.8125, - "recvTokensMax": 16, - "stragglerRank": 0, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49685,35 +51780,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 76.51200145483017, - "p90": 102.88000106811523, - "p95": 109.11999642848969, - "p99": 128.31999361515045 + "p50": 187.77599930763245, + "p90": 268.0320143699646, + "p95": 271.36000990867615, + "p99": 282.49600529670715 }, "combine": { - "p50": 69.82400268316269, - "p90": 81.44000172615051, - "p95": 86.75199747085571, - "p99": 98.04800152778625 + "p50": 52.44800075888634, + "p90": 63.90400230884552, + "p95": 64.86400216817856, + "p99": 69.76000219583511 }, "roundtrip": { - "p50": 126.14400684833527, - "p90": 157.6640009880066, - "p95": 167.84000396728516, - "p99": 190.88000059127808 + "p50": 225.3440022468567, + "p90": 308.9280128479004, + "p95": 312.48000264167786, + "p99": 320.5440044403076 }, "isolatedSum": { - "p50": 146.33600413799286, - "p90": 184.32000279426575, - "p95": 195.8719938993454, - "p99": 226.3679951429367 + "p50": 240.22400006651878, + "p90": 331.9360166788101, + "p95": 336.2240120768547, + "p99": 352.25600749254227 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2250752, - "combineLogicalBytes": 2250752, - "fanoutMean": 4.90625, - "recvTokensMax": 31, - "stragglerRank": 0, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49722,35 +51817,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 75.9039968252182, - "p90": 97.4079966545105, - "p95": 101.88800096511841, - "p99": 117.60000139474869 + "p50": 184.03199315071106, + "p90": 193.31200420856476, + "p95": 197.79199361801147, + "p99": 205.9839963912964 }, "combine": { - "p50": 70.62400132417679, - "p90": 84.73599702119827, - "p95": 90.11200070381165, - "p99": 107.42399841547012 + "p50": 51.7439991235733, + "p90": 55.296000093221664, + "p95": 57.312000542879105, + "p99": 63.19999694824219 }, "roundtrip": { - "p50": 125.69600343704224, - "p90": 150.751993060112, - "p95": 158.30400586128235, - "p99": 175.4239946603775 + "p50": 220.8320051431656, + "p90": 228.7680059671402, + "p95": 231.455996632576, + "p99": 239.55200612545013 }, "isolatedSum": { - "p50": 146.527998149395, - "p90": 182.14399367570877, - "p95": 192.00000166893005, - "p99": 225.0239998102188 + "p50": 235.77599227428436, + "p90": 248.60800430178642, + "p95": 255.10399416089058, + "p99": 269.1839933395386 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4472832, - "combineLogicalBytes": 4472832, - "fanoutMean": 4.875, - "recvTokensMax": 62, - "stragglerRank": 3, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49759,35 +51854,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 76.60800218582153, - "p90": 93.47199648618698, - "p95": 101.9200012087822, - "p99": 109.82400178909302 + "p50": 187.96800076961517, + "p90": 273.24798703193665, + "p95": 286.6879999637604, + "p99": 400.06399154663086 }, "combine": { - "p50": 71.26399874687195, - "p90": 84.09599959850311, - "p95": 88.32000195980072, - "p99": 100.89600086212158 + "p50": 53.75999957323074, + "p90": 65.15199691057205, + "p95": 67.45599955320358, + "p99": 75.23199915885925 }, "roundtrip": { - "p50": 128.25599312782288, - "p90": 152.96000242233276, - "p95": 160.76800227165222, - "p99": 201.92000269889832 + "p50": 225.600004196167, + "p90": 310.8479976654053, + "p95": 322.6880133152008, + "p99": 449.7919976711273 }, "isolatedSum": { - "p50": 147.87200093269348, - "p90": 177.5679960846901, - "p95": 190.24000316858292, - "p99": 210.7200026512146 + "p50": 241.72800034284592, + "p90": 338.3999839425087, + "p95": 354.14399951696396, + "p99": 475.2959907054901 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8888320, - "combineLogicalBytes": 8888320, - "fanoutMean": 4.84375, - "recvTokensMax": 124, - "stragglerRank": 6, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49796,35 +51891,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 83.8719978928566, - "p90": 102.55999863147736, - "p95": 108.92800241708755, - "p99": 121.76000326871872 + "p50": 189.11999464035034, + "p90": 271.36000990867615, + "p95": 286.9440019130707, + "p99": 324.0959942340851 }, "combine": { - "p50": 78.43200117349625, - "p90": 91.839998960495, - "p95": 96.57599776983261, - "p99": 108.12799632549286 + "p50": 56.44800141453743, + "p90": 68.57600063085556, + "p95": 69.11999732255936, + "p99": 73.56800138950348 }, "roundtrip": { - "p50": 138.46400380134583, - "p90": 160.19199788570404, - "p95": 168.09600591659546, - "p99": 186.14399433135986 + "p50": 226.27200186252594, + "p90": 234.14400219917297, + "p95": 238.68800699710846, + "p99": 254.27201390266418 }, "isolatedSum": { - "p50": 162.30399906635284, - "p90": 194.39999759197235, - "p95": 205.50400018692017, - "p99": 229.88799959421158 + "p50": 245.56799605488777, + "p90": 339.9360105395317, + "p95": 356.06399923563004, + "p99": 397.66399562358856 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 17733632, - "combineLogicalBytes": 17733632, - "fanoutMean": 4.83203125, - "recvTokensMax": 248, - "stragglerRank": 6, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49833,35 +51928,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 99.29600358009338, - "p90": 117.79200285673141, - "p95": 125.44000148773193, - "p99": 154.01600301265717 + "p50": 189.34400379657745, + "p90": 270.08000016212463, + "p95": 275.2639949321747, + "p99": 289.98398780822754 }, "combine": { - "p50": 90.14400094747543, - "p90": 102.91200131177902, - "p95": 110.17599701881409, - "p99": 119.35999989509583 + "p50": 64.60800021886826, + "p90": 76.89599692821503, + "p95": 78.23999971151352, + "p99": 82.2720006108284 }, "roundtrip": { - "p50": 166.75199568271637, - "p90": 185.7600063085556, - "p95": 193.02399456501007, - "p99": 220.60799598693848 + "p50": 238.3359968662262, + "p90": 318.015992641449, + "p95": 321.4719891548157, + "p99": 329.72800731658936 }, "isolatedSum": { - "p50": 189.44000452756882, - "p90": 220.70400416851044, - "p95": 235.61599850654602, - "p99": 273.376002907753 + "p50": 253.9520040154457, + "p90": 346.97599709033966, + "p95": 353.5039946436882, + "p99": 372.25598841905594 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 35424256, - "combineLogicalBytes": 35424256, - "fanoutMean": 4.826171875, - "recvTokensMax": 492, - "stragglerRank": 2, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49870,35 +51965,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 124.4800016283989, - "p90": 135.93600690364838, - "p95": 141.184002161026, - "p99": 167.23200678825378 + "p50": 192.19200313091278, + "p90": 272.15999364852905, + "p95": 275.7120132446289, + "p99": 291.29600524902344 }, "combine": { - "p50": 115.68000167608261, - "p90": 127.29600071907043, - "p95": 131.99999928474426, - "p99": 150.78400075435638 + "p50": 78.17599922418594, + "p90": 87.93599903583527, + "p95": 89.15200084447861, + "p99": 95.20000219345093 }, "roundtrip": { - "p50": 216.95999801158905, - "p90": 232.80000686645508, - "p95": 238.27199637889862, - "p99": 261.02399826049805 + "p50": 255.3279995918274, + "p90": 335.6480002403259, + "p95": 343.9359962940216, + "p99": 380.0320029258728 }, "isolatedSum": { - "p50": 240.1600033044815, - "p90": 263.2320076227188, - "p95": 273.18400144577026, - "p99": 318.01600754261017 + "p50": 270.3680023550987, + "p90": 360.0959926843643, + "p95": 364.8640140891075, + "p99": 386.49600744247437 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 3, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -49906,38 +52001,39 @@ ] }, { - "id": "cx-ffad9f17", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", - "colorKey": "h200_b2ffaf91", - "comparisonKey": "b3b8e5cc27948267", + "id": "cx-c961a187", + "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_97196257", + "comparisonKey": "994b6e44326c8d14", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:43.326778+00:00", + "generatedAt": "2026-06-26T23:51:36.382828+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "label": "H100 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -49958,18 +52054,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "16babcaf4204243", - "workloadId": "set:8:289b7f9c14292e96", + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.61328125, - "eplbImbalanceAfter": 1.0009114583333334, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272060649", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272060649", - "createdAt": "2026-06-27T00:02:17Z", + "id": "28271691858", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271691858", + "createdAt": "2026-06-26T23:51:36.382828+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -49977,35 +52073,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 72.9919970035553, - "p90": 96.73599898815155, - "p95": 102.7199998497963, - "p99": 128.83199751377106 + "p50": 196.03200256824493, + "p90": 203.48800718784332, + "p95": 207.32800662517548, + "p99": 214.9759978055954 }, "combine": { - "p50": 68.15999746322632, - "p90": 81.05599880218506, - "p95": 86.40000224113464, - "p99": 94.91200000047684 + "p50": 53.727999329566956, + "p90": 55.48800155520439, + "p95": 57.760000228881836, + "p99": 60.80000102519989 }, "roundtrip": { - "p50": 122.30399996042252, - "p90": 153.85599434375763, - "p95": 167.23200678825378, - "p99": 196.03200256824493 + "p50": 231.26399517059326, + "p90": 238.91200125217438, + "p95": 242.36799776554108, + "p99": 250.0160038471222 }, "isolatedSum": { - "p50": 141.15199446678162, - "p90": 177.7919977903366, - "p95": 189.12000209093094, - "p99": 223.7439975142479 + "p50": 249.7600018978119, + "p90": 258.9760087430477, + "p95": 265.0880068540573, + "p99": 275.7759988307953 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, "fanoutMean": 5.25, - "recvTokensMax": 7, - "stragglerRank": 7, + "recvTokensMax": 8, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50014,35 +52110,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.02399724721909, - "p90": 95.77599912881851, - "p95": 103.74400019645691, - "p99": 121.72800302505493 + "p50": 195.80799341201782, + "p90": 202.78400182724, + "p95": 205.1199972629547, + "p99": 212.12799847126007 }, "combine": { - "p50": 67.80800223350525, - "p90": 80.73599636554718, - "p95": 87.39200234413147, - "p99": 99.45599734783173 + "p50": 55.93600124120712, + "p90": 57.53599852323532, + "p95": 59.93599817156792, + "p99": 62.880001962184906 }, "roundtrip": { - "p50": 121.34400010108948, - "p90": 149.1840034723282, - "p95": 156.76799416542053, - "p99": 182.36799538135529 + "p50": 233.60000550746918, + "p90": 240.9600019454956, + "p95": 243.13600361347198, + "p99": 255.10400533676147 }, "isolatedSum": { - "p50": 140.83199948072433, - "p90": 176.5119954943657, - "p95": 191.13600254058838, - "p99": 221.18400037288666 + "p50": 251.74399465322495, + "p90": 260.3200003504753, + "p95": 265.05599543452263, + "p99": 275.008000433445 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1189888, - "combineLogicalBytes": 1189888, - "fanoutMean": 5.1875, - "recvTokensMax": 12, - "stragglerRank": 5, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50051,35 +52147,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 73.40800017118454, - "p90": 92.54399687051773, - "p95": 101.15200281143188, - "p99": 184.28799510002136 + "p50": 200.15999674797058, + "p90": 287.48801350593567, + "p95": 290.2719974517822, + "p99": 298.17599058151245 }, "combine": { - "p50": 68.28799843788147, - "p90": 82.40000158548355, - "p95": 88.03199976682663, - "p99": 100.44799745082855 + "p50": 57.11999908089638, + "p90": 68.67200136184692, + "p95": 69.56800073385239, + "p99": 75.3600001335144 }, "roundtrip": { - "p50": 124.38400089740753, - "p90": 158.59200060367584, - "p95": 172.2240000963211, - "p99": 259.42400097846985 + "p50": 238.01599442958832, + "p90": 328.5120129585266, + "p95": 332.73598551750183, + "p99": 340.1600122451782 }, "isolatedSum": { - "p50": 141.695998609066, - "p90": 174.94399845600128, - "p95": 189.18400257825851, - "p99": 284.7359925508499 + "p50": 257.27999582886696, + "p90": 356.1600148677826, + "p95": 359.8399981856346, + "p99": 373.53599071502686 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 23, - "stragglerRank": 5, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50088,35 +52184,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.5600014925003, - "p90": 100.12800246477127, - "p95": 106.78400099277496, - "p99": 138.11199367046356 + "p50": 199.072003364563, + "p90": 282.1120023727417, + "p95": 285.8240008354187, + "p99": 292.7359938621521 }, "combine": { - "p50": 69.08799707889557, - "p90": 81.28000050783157, - "p95": 86.81599795818329, - "p99": 96.67199850082397 + "p50": 57.5999990105629, + "p90": 66.14399701356888, + "p95": 66.72000139951706, + "p99": 71.48800045251846 }, "roundtrip": { - "p50": 123.23199957609177, - "p90": 151.58399939537048, - "p95": 159.87199544906616, - "p99": 174.6560037136078 + "p50": 236.32000386714935, + "p90": 315.3280019760132, + "p95": 318.91199946403503, + "p99": 326.2079954147339 }, "isolatedSum": { - "p50": 143.64799857139587, - "p90": 181.40800297260284, - "p95": 193.59999895095825, - "p99": 234.78399217128754 + "p50": 256.6720023751259, + "p90": 348.2559993863106, + "p95": 352.54400223493576, + "p99": 364.22399431467056 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, "fanoutMean": 5.296875, - "recvTokensMax": 47, - "stragglerRank": 6, + "recvTokensMax": 50, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50125,35 +52221,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 74.65600222349167, - "p90": 99.32799637317657, - "p95": 105.56799918413162, - "p99": 127.20000743865967 + "p50": 199.71199333667755, + "p90": 288.86398673057556, + "p95": 291.23198986053467, + "p99": 296.4160144329071 }, "combine": { - "p50": 69.88800317049026, - "p90": 83.10399949550629, - "p95": 88.639996945858, - "p99": 99.35999661684036 + "p50": 58.62399935722351, + "p90": 70.14399766921997, + "p95": 71.03999704122543, + "p99": 74.11199808120728 }, "roundtrip": { - "p50": 124.9919980764389, - "p90": 151.48800611495972, - "p95": 159.5200002193451, - "p99": 197.88800179958344 + "p50": 239.19999599456787, + "p90": 329.75998520851135, + "p95": 332.5439989566803, + "p99": 338.3359909057617 }, "isolatedSum": { - "p50": 144.54400539398193, - "p90": 182.43199586868286, - "p95": 194.20799612998962, - "p99": 226.56000405550003 + "p50": 258.33599269390106, + "p90": 359.00798439979553, + "p95": 362.2719869017601, + "p99": 370.5280125141144 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9605120, - "combineLogicalBytes": 9605120, - "fanoutMean": 5.234375, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, "recvTokensMax": 93, - "stragglerRank": 3, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50162,35 +52258,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 80.35200089216232, - "p90": 101.02400183677673, - "p95": 105.6319996714592, - "p99": 116.7680025100708 + "p50": 200.3519982099533, + "p90": 288.2559895515442, + "p95": 290.49599170684814, + "p99": 295.1360046863556 }, "combine": { - "p50": 76.80000364780426, - "p90": 88.86399865150452, - "p95": 94.17600184679031, - "p99": 101.56799852848053 + "p50": 63.040003180503845, + "p90": 73.44000041484833, + "p95": 73.95199686288834, + "p99": 79.45600152015686 }, "roundtrip": { - "p50": 135.04000008106232, - "p90": 155.29599785804749, - "p95": 165.50399363040924, - "p99": 190.43199717998505 + "p50": 244.25600469112396, + "p90": 330.7200074195862, + "p95": 333.24798941612244, + "p99": 339.35999870300293 }, "isolatedSum": { - "p50": 157.15200453996658, - "p90": 189.88800048828125, - "p95": 199.8080015182495, - "p99": 218.33600103855133 + "p50": 263.39200139045715, + "p90": 361.6959899663925, + "p95": 364.4479885697365, + "p99": 374.59200620651245 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19367936, - "combineLogicalBytes": 19367936, - "fanoutMean": 5.27734375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50199,35 +52295,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 97.120001912117, - "p90": 111.00800335407257, - "p95": 117.11999773979187, - "p99": 134.39999520778656 + "p50": 199.5519995689392, + "p90": 287.55199909210205, + "p95": 291.6480004787445, + "p99": 305.5360019207001 }, "combine": { - "p50": 87.39200234413147, - "p90": 99.32799637317657, - "p95": 105.6319996714592, - "p99": 121.18399888277054 + "p50": 73.34399968385696, + "p90": 85.02399921417236, + "p95": 86.5280032157898, + "p99": 89.72799777984619 }, "roundtrip": { - "p50": 159.2320054769516, - "p90": 177.2480010986328, - "p95": 184.28799510002136, - "p99": 207.71199464797974 + "p50": 254.72000241279602, + "p90": 339.83999490737915, + "p95": 342.97600388526917, + "p99": 349.5680093765259 }, "isolatedSum": { - "p50": 184.51200425624847, - "p90": 210.33599972724915, - "p95": 222.75199741125107, - "p99": 255.5839940905571 + "p50": 272.8959992527962, + "p90": 372.5759983062744, + "p95": 378.1760036945343, + "p99": 395.26399970054626 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38535168, - "combineLogicalBytes": 38535168, - "fanoutMean": 5.25, - "recvTokensMax": 358, - "stragglerRank": 6, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50236,35 +52332,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 115.23199826478958, - "p90": 132.4159950017929, - "p95": 140.47999680042267, - "p99": 171.64799571037292 + "p50": 206.33600652217865, + "p90": 288.32000494003296, + "p95": 292.4480140209198, + "p99": 296.671986579895 }, "combine": { - "p50": 102.84800082445145, - "p90": 114.07999694347382, - "p95": 119.1679984331131, - "p99": 129.60000336170197 + "p50": 86.87999844551086, + "p90": 100.19200295209885, + "p95": 104.63999956846237, + "p99": 326.24000310897827 }, "roundtrip": { - "p50": 195.90400159358978, - "p90": 210.11200547218323, - "p95": 217.15199947357178, - "p99": 243.74400079250336 + "p50": 274.944007396698, + "p90": 355.0719916820526, + "p95": 358.8480055332184, + "p99": 364.8959994316101 }, "isolatedSum": { - "p50": 218.07999908924103, - "p90": 246.49599194526672, - "p95": 259.64799523353577, - "p99": 301.2479990720749 + "p50": 293.2160049676895, + "p90": 388.5120078921318, + "p95": 397.0880135893822, + "p99": 622.9119896888733 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76869632, - "combineLogicalBytes": 76869632, - "fanoutMean": 5.236328125, - "recvTokensMax": 688, - "stragglerRank": 4, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50272,16 +52368,16 @@ ] }, { - "id": "cx-49529f9d", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h200_f2b19f62", - "comparisonKey": "cc27e02aea0a210a", + "id": "cx-0c56b994", + "identity": "h100|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_a96c99f3", + "comparisonKey": "b1bf09d425749f09", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:04.313162+00:00", + "generatedAt": "2026-06-27T11:13:21.071476+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -50289,21 +52385,22 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "label": "H100 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -50324,8 +52421,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:120a8dc1dba92ca9", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -50333,45 +52430,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272072315", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272072315", - "createdAt": "2026-06-27T00:02:38Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287494014", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287494014", + "createdAt": "2026-06-27T11:13:21.071476+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 72.95999675989151, - "p90": 100.73599964380264, - "p95": 110.88000237941742, - "p99": 152.99199521541595 + "p50": 98.68799895048141, + "p90": 103.26399654150009, + "p95": 105.27999699115753, + "p99": 110.11199653148651 }, "combine": { - "p50": 65.2799978852272, - "p90": 80.9599980711937, - "p95": 85.28000116348267, - "p99": 102.1760031580925 + "p50": 69.24799829721451, + "p90": 71.16799801588058, + "p95": 72.51200079917908, + "p99": 74.97599720954895 }, "roundtrip": { - "p50": 121.08799815177917, - "p90": 155.20000457763672, - "p95": 166.27199947834015, - "p99": 225.11999309062958 + "p50": 197.40800559520721, + "p90": 202.4639993906021, + "p95": 204.96000349521637, + "p99": 210.87999641895294 }, "isolatedSum": { - "p50": 138.2399946451187, - "p90": 181.69599771499634, - "p95": 196.16000354290009, - "p99": 255.16799837350845 + "p50": 167.93599724769592, + "p90": 174.43199455738068, + "p95": 177.7919977903366, + "p99": 185.08799374103546 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 2, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50380,35 +52477,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.2479989528656, - "p90": 105.47199845314026, - "p95": 114.84800279140472, - "p99": 135.74400544166565 + "p50": 71.23199850320816, + "p90": 100.73599964380264, + "p95": 103.26399654150009, + "p99": 108.83200168609619 }, "combine": { - "p50": 67.61600077152252, - "p90": 79.83999699354172, - "p95": 83.5840031504631, - "p99": 92.99200028181076 + "p50": 58.27200040221214, + "p90": 69.95200365781784, + "p95": 71.68000191450119, + "p99": 75.45600086450577 }, "roundtrip": { - "p50": 119.64800208806992, - "p90": 145.56799829006195, - "p95": 150.91200172901154, - "p99": 165.18400609493256 + "p50": 151.96800231933594, + "p90": 197.24799692630768, + "p95": 199.71199333667755, + "p99": 207.93600380420685 }, "isolatedSum": { - "p50": 140.86399972438812, - "p90": 185.31199544668198, - "p95": 198.43200594186783, - "p99": 228.7360057234764 + "p50": 129.5039989054203, + "p90": 170.68800330162048, + "p95": 174.94399845600128, + "p99": 184.28800255060196 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 7, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50417,35 +52514,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 73.79200309515, - "p90": 102.88000106811523, - "p95": 112.0000034570694, - "p99": 131.8719983100891 + "p50": 71.84000313282013, + "p90": 94.87999975681305, + "p95": 98.49599748849869, + "p99": 103.93600165843964 }, "combine": { - "p50": 67.80800223350525, - "p90": 78.8159966468811, - "p95": 83.29600095748901, - "p99": 102.08000242710114 + "p50": 60.447998344898224, + "p90": 67.1359971165657, + "p95": 68.64000111818314, + "p99": 72.95999675989151 }, "roundtrip": { - "p50": 120.60800194740295, - "p90": 144.44799721240997, - "p95": 152.67199277877808, - "p99": 166.59200191497803 + "p50": 154.40000593662262, + "p90": 196.31999731063843, + "p95": 197.79199361801147, + "p99": 202.2400051355362 }, "isolatedSum": { - "p50": 141.60000532865524, - "p90": 181.69599771499634, - "p95": 195.2960044145584, - "p99": 233.95200073719025 + "p50": 132.28800147771835, + "p90": 162.01599687337875, + "p95": 167.13599860668182, + "p99": 176.89599841833115 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 6, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50454,35 +52551,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.79200309515, - "p90": 97.75999933481216, - "p95": 105.92000186443329, - "p99": 117.69600212574005 + "p50": 93.88799965381622, + "p90": 101.02400183677673, + "p95": 103.42399775981903, + "p99": 116.12799763679504 }, "combine": { - "p50": 68.06399673223495, - "p90": 81.56800270080566, - "p95": 87.39200234413147, - "p99": 104.44799810647964 + "p50": 66.3359984755516, + "p90": 71.48800045251846, + "p95": 73.02399724721909, + "p99": 77.31200009584427 }, "roundtrip": { - "p50": 121.31199985742569, - "p90": 153.98399531841278, - "p95": 162.78399527072906, - "p99": 199.5519995689392 + "p50": 193.6960071325302, + "p90": 200.00000298023224, + "p95": 202.5279998779297, + "p99": 206.56000077724457 }, "isolatedSum": { - "p50": 141.85599982738495, - "p90": 179.32800203561783, - "p95": 193.31200420856476, - "p99": 222.1440002322197 + "p50": 160.22399812936783, + "p90": 172.5120022892952, + "p95": 176.44799500703812, + "p99": 193.4399977326393 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50491,35 +52588,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 73.44000041484833, - "p90": 97.69599884748459, - "p95": 103.74400019645691, - "p99": 117.15199798345566 + "p50": 92.96000003814697, + "p90": 100.99200159311295, + "p95": 102.78400033712387, + "p99": 106.78400099277496 }, "combine": { - "p50": 69.98399645090103, - "p90": 83.16799998283386, - "p95": 88.51200342178345, - "p99": 98.59199821949005 + "p50": 67.52000004053116, + "p90": 72.9919970035553, + "p95": 74.30399954319, + "p99": 78.59200239181519 }, "roundtrip": { - "p50": 125.91999769210815, - "p90": 152.0320028066635, - "p95": 167.7439957857132, - "p99": 200.54399967193604 + "p50": 196.76800072193146, + "p90": 203.0400037765503, + "p95": 205.1199972629547, + "p99": 208.8640034198761 }, "isolatedSum": { - "p50": 143.42399686574936, - "p90": 180.86399883031845, - "p95": 192.25600361824036, - "p99": 215.7439962029457 + "p50": 160.48000007867813, + "p90": 173.98399859666824, + "p95": 177.08799988031387, + "p99": 185.37600338459015 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 6, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50528,35 +52625,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 85.4400023818016, - "p90": 105.05600273609161, - "p95": 111.93600296974182, - "p99": 135.48800349235535 + "p50": 72.92799651622772, + "p90": 95.48799693584442, + "p95": 99.20000284910202, + "p99": 104.8320010304451 }, "combine": { - "p50": 76.12799853086472, - "p90": 88.60799670219421, - "p95": 92.41600334644318, - "p99": 124.06399846076965 + "p50": 66.78400188684464, + "p90": 73.37599992752075, + "p95": 74.75200295448303, + "p99": 78.17599922418594 }, "roundtrip": { - "p50": 136.4479959011078, - "p90": 159.04000401496887, - "p95": 166.81599617004395, - "p99": 204.12799715995789 + "p50": 160.51200032234192, + "p90": 202.07999646663666, + "p95": 204.79999482631683, + "p99": 209.60000157356262 }, "isolatedSum": { - "p50": 161.56800091266632, - "p90": 193.66399943828583, - "p95": 204.352006316185, - "p99": 259.552001953125 + "p50": 139.71199840307236, + "p90": 168.86399686336517, + "p95": 173.95200580358505, + "p99": 183.00800025463104 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 4, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50565,35 +52662,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 99.55199807882309, - "p90": 115.84000289440155, - "p95": 124.79999661445618, - "p99": 159.5200002193451 + "p50": 96.09600156545639, + "p90": 101.72799974679947, + "p95": 107.4879989027977, + "p99": 478.08000445365906 }, "combine": { - "p50": 86.65599673986435, - "p90": 98.68799895048141, - "p95": 104.032002389431, - "p99": 120.28799951076508 + "p50": 82.07999914884567, + "p90": 87.10400015115738, + "p95": 87.8399983048439, + "p99": 89.82399851083755 }, "roundtrip": { - "p50": 162.23999857902527, - "p90": 177.7919977903366, - "p95": 186.62400543689728, - "p99": 207.58399367332458 + "p50": 175.58400332927704, + "p90": 211.96800470352173, + "p95": 215.03999829292297, + "p99": 219.9999988079071 }, "isolatedSum": { - "p50": 186.20799481868744, - "p90": 214.52800184488297, - "p95": 228.83199900388718, - "p99": 279.80799973011017 + "p50": 178.17600071430206, + "p90": 188.83199989795685, + "p95": 195.3279972076416, + "p99": 567.9040029644966 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 6, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50602,34 +52699,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 123.16799908876419, - "p90": 138.08000087738037, - "p95": 149.59999918937683, - "p99": 160.35200655460358 + "p50": 85.82399785518646, + "p90": 103.10400277376175, + "p95": 106.11200332641602, + "p99": 116.60800129175186 }, "combine": { - "p50": 112.47999966144562, - "p90": 122.36800044775009, - "p95": 127.45599448680878, - "p99": 136.9280070066452 + "p50": 91.45600348711014, + "p90": 99.35999661684036, + "p95": 102.62399911880493, + "p99": 148.3200043439865 }, "roundtrip": { - "p50": 213.4079933166504, - "p90": 239.16800320148468, - "p95": 253.6959946155548, - "p99": 450.3040015697479 + "p50": 200.6720006465912, + "p90": 229.18400168418884, + "p95": 231.64799809455872, + "p99": 236.86400055885315 }, "isolatedSum": { - "p50": 235.6479987502098, - "p90": 260.44800132513046, - "p95": 277.0559936761856, - "p99": 297.2800135612488 + "p50": 177.2800013422966, + "p90": 202.4639993906021, + "p95": 208.73600244522095, + "p99": 264.9280056357384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 1, "correct": true, "samplesPooled": 600, @@ -50638,38 +52735,39 @@ ] }, { - "id": "cx-904f847b", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h200_bac4102c", - "comparisonKey": "6234055b9069f2f2", + "id": "cx-55a4c230", + "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_97196257", + "comparisonKey": "8ab5124e24ec36ab", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:21.213602+00:00", + "generatedAt": "2026-06-26T23:52:02.860609+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "label": "H100 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -50690,18 +52788,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:120a8dc1dba92ca9", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272075655", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272075655", - "createdAt": "2026-06-27T00:02:45Z", + "id": "28271706435", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271706435", + "createdAt": "2026-06-26T23:52:02.860609+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -50709,35 +52807,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 71.99999690055847, - "p90": 99.35999661684036, - "p95": 108.47999900579453, - "p99": 130.8480054140091 + "p50": 196.8960016965866, + "p90": 227.77600586414337, + "p95": 297.40801453590393, + "p99": 503.32802534103394 }, "combine": { - "p50": 67.1359971165657, - "p90": 80.64000308513641, - "p95": 84.44800227880478, - "p99": 108.12799632549286 + "p50": 57.920001447200775, + "p90": 62.144000083208084, + "p95": 67.10399687290192, + "p99": 282.0799946784973 }, "roundtrip": { - "p50": 121.08799815177917, - "p90": 149.4079977273941, - "p95": 161.24799847602844, - "p99": 199.8080015182495 + "p50": 237.40799725055695, + "p90": 243.77599358558655, + "p95": 245.31200528144836, + "p99": 250.0160038471222 }, "isolatedSum": { - "p50": 139.13599401712418, - "p90": 179.99999970197678, - "p95": 192.9280012845993, - "p99": 238.97600173950195 + "p50": 254.81600314378738, + "p90": 289.92000594735146, + "p95": 364.51201140880585, + "p99": 785.4080200195312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 6, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50746,35 +52844,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.98399710655212, - "p90": 101.79200023412704, - "p95": 111.7120012640953, - "p99": 146.33600413799286 + "p50": 197.1839964389801, + "p90": 204.92799580097198, + "p95": 207.45599269866943, + "p99": 214.6880030632019 }, "combine": { - "p50": 68.7360018491745, - "p90": 82.04799890518188, - "p95": 88.73599767684937, - "p99": 105.21599650382996 + "p50": 58.49599838256836, + "p90": 60.92799827456474, + "p95": 63.26399743556976, + "p99": 70.65600156784058 }, "roundtrip": { - "p50": 124.41600114107132, - "p90": 160.0320041179657, - "p95": 172.86400496959686, - "p99": 196.44799828529358 + "p50": 237.56800591945648, + "p90": 243.96799504756927, + "p95": 247.29600548744202, + "p99": 255.61600923538208 }, "isolatedSum": { - "p50": 142.71999895572662, - "p90": 183.83999913930893, - "p95": 200.44799894094467, - "p99": 251.55200064182281 + "p50": 255.67999482154846, + "p90": 265.8559940755367, + "p95": 270.7199901342392, + "p99": 285.3440046310425 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 5, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50783,35 +52881,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 76.22399926185608, - "p90": 108.76800119876862, - "p95": 123.1359988451004, - "p99": 148.8640010356903 + "p50": 200.47999918460846, + "p90": 282.71999955177307, + "p95": 291.20001196861267, + "p99": 401.2480080127716 }, "combine": { - "p50": 68.7360018491745, - "p90": 82.14399963617325, - "p95": 88.54400366544724, - "p99": 105.02400249242783 + "p50": 59.90400165319443, + "p90": 66.84800237417221, + "p95": 69.5360004901886, + "p99": 75.68000257015228 }, "roundtrip": { - "p50": 124.25599992275238, - "p90": 160.0320041179657, - "p95": 170.01600563526154, - "p99": 244.89599466323853 + "p50": 243.20000410079956, + "p90": 321.9839930534363, + "p95": 326.7199993133545, + "p99": 334.75199341773987 }, "isolatedSum": { - "p50": 144.96000111103058, - "p90": 190.91200083494186, - "p95": 211.68000251054764, - "p99": 253.88800352811813 + "p50": 260.3840008378029, + "p90": 349.5680019259453, + "p95": 360.73601245880127, + "p99": 476.9280105829239 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 5, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50820,35 +52918,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.62400197982788, - "p90": 101.21600329875946, - "p95": 111.10399663448334, - "p99": 145.47200500965118 + "p50": 200.6399929523468, + "p90": 261.9200050830841, + "p95": 265.6959891319275, + "p99": 275.1680016517639 }, "combine": { - "p50": 69.34399902820587, - "p90": 84.70399677753448, - "p95": 89.50400352478027, - "p99": 104.44799810647964 + "p50": 60.99199876189232, + "p90": 69.2799985408783, + "p95": 69.88800317049026, + "p99": 75.32799988985062 }, "roundtrip": { - "p50": 125.37600100040436, - "p90": 159.4880074262619, - "p95": 170.1119989156723, - "p99": 203.23200523853302 + "p50": 239.9040013551712, + "p90": 296.9599962234497, + "p95": 299.8400032520294, + "p99": 307.5200021266937 }, "isolatedSum": { - "p50": 143.96800100803375, - "p90": 185.92000007629395, - "p95": 200.6080001592636, - "p99": 249.92000311613083 + "p50": 261.6319917142391, + "p90": 331.2000036239624, + "p95": 335.58399230241776, + "p99": 350.49600154161453 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 0, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50857,35 +52955,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 72.73600250482559, - "p90": 97.75999933481216, - "p95": 108.03200304508209, - "p99": 141.9840008020401 + "p50": 201.75999402999878, + "p90": 280.3199887275696, + "p95": 284.89598631858826, + "p99": 351.48799419403076 }, "combine": { - "p50": 70.36799937486649, - "p90": 88.28800171613693, - "p95": 94.68799829483032, - "p99": 104.54399883747101 + "p50": 61.76000088453293, + "p90": 69.72800195217133, + "p95": 72.92799651622772, + "p99": 133.82400572299957 }, "roundtrip": { - "p50": 127.00800597667694, - "p90": 156.12800419330597, - "p95": 166.9439971446991, - "p99": 198.33600521087646 + "p50": 245.82399427890778, + "p90": 325.53601264953613, + "p95": 328.8959860801697, + "p99": 600.3199815750122 }, "isolatedSum": { - "p50": 143.10400187969208, - "p90": 186.0480010509491, - "p95": 202.72000133991241, - "p99": 246.5279996395111 + "p50": 263.5199949145317, + "p90": 350.0479906797409, + "p95": 357.823982834816, + "p99": 485.31199991703033 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 5, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50894,35 +52992,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.98399841785431, - "p90": 106.65600001811981, - "p95": 116.22399836778641, - "p99": 165.69599509239197 + "p50": 200.73600113391876, + "p90": 285.0559949874878, + "p95": 287.9680097103119, + "p99": 303.42400074005127 }, "combine": { - "p50": 76.9599974155426, - "p90": 90.87999910116196, - "p95": 97.120001912117, - "p99": 118.23999881744385 + "p50": 66.78400188684464, + "p90": 78.20799946784973, + "p95": 79.93599772453308, + "p99": 83.8719978928566 }, "roundtrip": { - "p50": 135.74400544166565, - "p90": 164.48000073432922, - "p95": 176.70400440692902, - "p99": 220.22399306297302 + "p50": 249.9839961528778, + "p90": 319.487988948822, + "p95": 328.8959860801697, + "p99": 336.35199069976807 }, "isolatedSum": { - "p50": 158.9439958333969, - "p90": 197.53599911928177, - "p95": 213.3440002799034, - "p99": 283.9359939098358 + "p50": 267.5200030207634, + "p90": 363.2639944553375, + "p95": 367.90400743484497, + "p99": 387.29599863290787 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 5, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50931,35 +53029,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 97.21600264310837, - "p90": 125.50400197505951, - "p95": 140.99200069904327, - "p99": 185.85599958896637 + "p50": 200.73600113391876, + "p90": 281.2480032444, + "p95": 289.11998867988586, + "p99": 304.9919903278351 }, "combine": { - "p50": 87.77599781751633, - "p90": 105.53599894046783, - "p95": 113.0559965968132, - "p99": 125.63200294971466 + "p50": 77.11999863386154, + "p90": 84.1279998421669, + "p95": 86.40000224113464, + "p99": 95.77599912881851 }, "roundtrip": { - "p50": 159.7760021686554, - "p90": 186.65599822998047, - "p95": 201.53599977493286, - "p99": 221.69600427150726 + "p50": 259.5840096473694, + "p90": 337.8559947013855, + "p95": 341.3439989089966, + "p99": 350.5280017852783 }, "isolatedSum": { - "p50": 184.9920004606247, - "p90": 231.04000091552734, - "p95": 254.04799729585648, - "p99": 311.48800253868103 + "p50": 277.8559997677803, + "p90": 365.3760030865669, + "p95": 375.5199909210205, + "p99": 400.7679894566536 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 6, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -50968,35 +53066,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.40000003576279, - "p90": 141.7279988527298, - "p95": 151.296004652977, - "p99": 174.84800517559052 + "p50": 212.5760018825531, + "p90": 282.1759879589081, + "p95": 286.5920066833496, + "p99": 307.96799063682556 }, "combine": { - "p50": 103.74400019645691, - "p90": 121.21599912643433, - "p95": 128.60800325870514, - "p99": 147.13600277900696 + "p50": 92.06400066614151, + "p90": 98.11200201511383, + "p95": 99.48799759149551, + "p99": 103.74400019645691 }, "roundtrip": { - "p50": 198.08000326156616, - "p90": 219.7760045528412, - "p95": 227.55199670791626, - "p99": 265.3760015964508 + "p50": 289.44000601768494, + "p90": 355.3279936313629, + "p95": 359.71200466156006, + "p99": 366.91200733184814 }, "isolatedSum": { - "p50": 222.1440002322197, - "p90": 262.9439979791641, - "p95": 279.90400791168213, - "p99": 321.9840079545975 + "p50": 304.6400025486946, + "p90": 380.2879899740219, + "p95": 386.0800042748451, + "p99": 411.71199083328247 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 7, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51004,16 +53102,16 @@ ] }, { - "id": "cx-06bd64b9", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h200_1eda221e", - "comparisonKey": "00e2c45e1159b581", + "id": "cx-416fcf7d", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_a96c99f3", + "comparisonKey": "59d5014bb7031dbe", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:16.896756+00:00", + "generatedAt": "2026-06-27T10:13:04.882575+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -51021,21 +53119,22 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -51056,52 +53155,52 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272045914", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272045914", - "createdAt": "2026-06-27T00:01:50Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28286086353", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286086353", + "createdAt": "2026-06-27T10:13:04.882575+00:00", + "sha": "76a3032d20288ee17220eb6099346f74d56ce005" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 72.15999811887741, - "p90": 99.0080013871193, - "p95": 105.56799918413162, - "p99": 131.80799782276154 + "p50": 100.03200173377991, + "p90": 104.44799810647964, + "p95": 106.30399733781815, + "p99": 110.59200018644333 }, "combine": { - "p50": 68.70400160551071, - "p90": 83.23200047016144, - "p95": 88.8959988951683, - "p99": 117.40799993276596 + "p50": 74.65600222349167, + "p90": 76.38400048017502, + "p95": 77.69600301980972, + "p99": 81.7599967122078 }, "roundtrip": { - "p50": 121.60000205039978, - "p90": 151.8079936504364, - "p95": 162.88000345230103, - "p99": 197.63199985027313 + "p50": 195.64799964427948, + "p90": 208.3200067281723, + "p95": 210.65600216388702, + "p99": 216.15999937057495 }, "isolatedSum": { - "p50": 140.86399972438812, - "p90": 182.24000185728073, - "p95": 194.46399807929993, - "p99": 249.2159977555275 + "p50": 174.68800395727158, + "p90": 180.83199858665466, + "p95": 184.00000035762787, + "p99": 192.35199689865112 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, "recvTokensMax": 7, "stragglerRank": 4, "correct": true, @@ -51112,35 +53211,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.7600028514862, - "p90": 99.96800124645233, - "p95": 106.97600245475769, - "p99": 125.63200294971466 + "p50": 71.74400240182877, + "p90": 101.08800232410431, + "p95": 102.62399911880493, + "p99": 109.15199667215347 }, "combine": { - "p50": 67.58400052785873, - "p90": 79.52000200748444, - "p95": 84.35200154781342, - "p99": 95.61599791049957 + "p50": 64.19199705123901, + "p90": 74.43200051784515, + "p95": 75.00799745321274, + "p99": 78.62400263547897 }, "roundtrip": { - "p50": 121.95199728012085, - "p90": 150.52799880504608, - "p95": 158.9760035276413, - "p99": 188.51199746131897 + "p50": 158.59200060367584, + "p90": 206.81600272655487, + "p95": 209.9519968032837, + "p99": 367.71199107170105 }, "isolatedSum": { - "p50": 141.34400337934494, - "p90": 179.48800325393677, - "p95": 191.3280040025711, - "p99": 221.24800086021423 + "p50": 135.93599945306778, + "p90": 175.52000284194946, + "p95": 177.63199657201767, + "p99": 187.77599930763245 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 6, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51149,35 +53248,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 74.23999905586243, - "p90": 98.36799651384354, - "p95": 105.8880016207695, - "p99": 117.60000139474869 + "p50": 71.77600264549255, + "p90": 102.78400033712387, + "p95": 104.76800054311752, + "p99": 109.63200032711029 }, "combine": { - "p50": 68.57600063085556, - "p90": 81.82399719953537, - "p95": 86.496002972126, - "p99": 94.62399780750275 + "p50": 65.8240020275116, + "p90": 77.85599678754807, + "p95": 78.5600021481514, + "p99": 81.82399719953537 }, "roundtrip": { - "p50": 123.19999933242798, - "p90": 152.92799472808838, - "p95": 164.12800550460815, - "p99": 221.98399901390076 + "p50": 159.71200168132782, + "p90": 209.98400449752808, + "p95": 212.09600567817688, + "p99": 216.92800521850586 }, "isolatedSum": { - "p50": 142.815999686718, - "p90": 180.1919937133789, - "p95": 192.3840045928955, - "p99": 212.22399920225143 + "p50": 137.60000467300415, + "p90": 180.63999712467194, + "p95": 183.32800269126892, + "p99": 191.45599752664566 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 1, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51186,34 +53285,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 76.06399804353714, - "p90": 117.8240031003952, - "p95": 132.03200697898865, - "p99": 183.45600366592407 + "p50": 97.79199957847595, + "p90": 103.61599922180176, + "p95": 106.175996363163, + "p99": 111.90400272607803 }, "combine": { - "p50": 69.37599927186966, - "p90": 85.02399921417236, - "p95": 89.66399729251862, - "p99": 100.3199964761734 + "p50": 75.71200281381607, + "p90": 77.98399776220322, + "p95": 79.77599650621414, + "p99": 83.64800363779068 }, "roundtrip": { - "p50": 123.16799908876419, - "p90": 152.8639942407608, - "p95": 160.96000373363495, - "p99": 184.1920018196106 + "p50": 195.71200013160706, + "p90": 209.6640020608902, + "p95": 211.96800470352173, + "p99": 217.8879976272583 }, "isolatedSum": { - "p50": 145.4399973154068, - "p90": 202.84800231456757, - "p95": 221.69600427150726, - "p99": 283.7760001420975 + "p50": 173.50400239229202, + "p90": 181.59999698400497, + "p95": 185.95199286937714, + "p99": 195.5520063638687 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -51223,35 +53322,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 76.67200267314911, - "p90": 107.10400342941284, - "p95": 114.20799791812897, - "p99": 128.9599984884262 + "p50": 97.9200005531311, + "p90": 102.91200131177902, + "p95": 105.34399747848511, + "p99": 110.04800349473953 }, "combine": { - "p50": 72.25599884986877, - "p90": 88.76799792051315, - "p95": 96.00000083446503, - "p99": 114.75200206041336 + "p50": 77.31200009584427, + "p90": 80.79999685287476, + "p95": 81.98399841785431, + "p99": 87.00799942016602 }, "roundtrip": { - "p50": 128.31999361515045, - "p90": 158.6879938840866, - "p95": 168.89600455760956, - "p99": 192.89599359035492 + "p50": 197.02400267124176, + "p90": 212.3199999332428, + "p95": 214.36800062656403, + "p99": 219.200000166893 }, "isolatedSum": { - "p50": 148.92800152301788, - "p90": 195.872001349926, - "p95": 210.207998752594, - "p99": 243.71200054883957 + "p50": 175.23200064897537, + "p90": 183.71199816465378, + "p95": 187.32799589633942, + "p99": 197.05600291490555 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 7, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51260,35 +53359,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 83.39200168848038, - "p90": 103.80800068378448, - "p95": 109.43999886512756, - "p99": 126.71999633312225 + "p50": 73.98399710655212, + "p90": 102.55999863147736, + "p95": 105.02400249242783, + "p99": 107.87200182676315 }, "combine": { - "p50": 77.18399912118912, - "p90": 89.79199826717377, - "p95": 95.10400146245956, - "p99": 105.98400235176086 + "p50": 73.21599870920181, + "p90": 85.56800335645676, + "p95": 86.46400272846222, + "p99": 90.33600240945816 }, "roundtrip": { - "p50": 134.783998131752, - "p90": 157.79200196266174, - "p95": 167.13599860668182, - "p99": 210.94399690628052 + "p50": 168.03200542926788, + "p90": 216.73600375652313, + "p95": 218.36799383163452, + "p99": 223.1999933719635 }, "isolatedSum": { - "p50": 160.5760008096695, - "p90": 193.59999895095825, - "p95": 204.54400032758713, - "p99": 232.70399868488312 + "p50": 147.19999581575394, + "p90": 188.1280019879341, + "p95": 191.48800522089005, + "p99": 198.2080042362213 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 7, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51297,35 +53396,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 93.56799721717834, - "p90": 113.63200098276138, - "p95": 120.2239990234375, - "p99": 133.4719955921173 + "p50": 97.98400104045868, + "p90": 142.752006649971, + "p95": 145.82400023937225, + "p99": 154.27200496196747 }, "combine": { - "p50": 86.40000224113464, - "p90": 101.72799974679947, - "p95": 105.6319996714592, - "p99": 116.48000031709671 + "p50": 92.19200164079666, + "p90": 112.96000331640244, + "p95": 113.82400244474411, + "p99": 118.07999759912491 }, "roundtrip": { - "p50": 157.9200029373169, - "p90": 181.34400248527527, - "p95": 187.42400407791138, - "p99": 211.87199652194977 + "p50": 179.77599799633026, + "p90": 277.3439884185791, + "p95": 285.535991191864, + "p99": 456.64000511169434 }, "isolatedSum": { - "p50": 179.967999458313, - "p90": 215.36000072956085, - "p95": 225.8559986948967, - "p99": 249.95199590921402 + "p50": 190.17600268125534, + "p90": 255.71200996637344, + "p95": 259.64800268411636, + "p99": 272.3520025610924 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51334,35 +53433,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.15199798345566, - "p90": 137.1839940547943, - "p95": 142.33599603176117, - "p99": 165.79200327396393 + "p50": 90.43200314044952, + "p90": 111.42399907112122, + "p95": 113.24799805879593, + "p99": 117.40799993276596 }, "combine": { - "p50": 106.84800148010254, - "p90": 119.32799965143204, - "p95": 122.81599640846252, - "p99": 133.53599607944489 + "p50": 100.5759984254837, + "p90": 112.47999966144562, + "p95": 114.01599645614624, + "p99": 117.53600090742111 }, "roundtrip": { - "p50": 197.56799936294556, - "p90": 213.85599672794342, - "p95": 221.3120013475418, - "p99": 245.37600576877594 + "p50": 219.7120040655136, + "p90": 246.87999486923218, + "p95": 249.2160052061081, + "p99": 254.07999753952026 }, "isolatedSum": { - "p50": 223.9999994635582, - "p90": 256.51199370622635, - "p95": 265.1519924402237, - "p99": 299.3279993534088 + "p50": 191.00800156593323, + "p90": 223.90399873256683, + "p95": 227.26399451494217, + "p99": 234.94400084018707 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 7, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51370,28 +53469,29 @@ ] }, { - "id": "cx-0d6ef23b", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_c851a534", - "comparisonKey": "6b4f4d7f65293019", + "id": "cx-d4dbb29d", + "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_97196257", + "comparisonKey": "9687217877b9ce9c", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:45.312905+00:00", + "generatedAt": "2026-06-26T23:48:10.138934+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm)", + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -51401,19 +53501,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -51431,41 +53531,41 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254392935", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", - "createdAt": "2026-06-26T17:28:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271579958", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271579958", + "createdAt": "2026-06-26T23:48:10.138934+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 74.11199808120728, - "p90": 94.11200135946274, - "p95": 104.35199737548828, - "p99": 138.0160003900528 + "p50": 193.05600225925446, + "p90": 204.3839991092682, + "p95": 210.52800118923187, + "p99": 277.9200077056885 }, "combine": { - "p50": 68.41599941253662, - "p90": 78.72000336647034, - "p95": 83.48800241947174, - "p99": 105.72800040245056 + "p50": 60.95999851822853, + "p90": 63.29599767923355, + "p95": 65.31199812889099, + "p99": 68.76800209283829 }, "roundtrip": { - "p50": 124.4800016283989, - "p90": 144.31999623775482, - "p95": 156.3200056552887, - "p99": 193.53599846363068 + "p50": 237.63200640678406, + "p90": 244.25600469112396, + "p95": 246.14399671554565, + "p99": 269.4079875946045 }, "isolatedSum": { - "p50": 142.5279974937439, - "p90": 172.83200472593307, - "p95": 187.83999979496002, - "p99": 243.74400079250336 + "p50": 254.016000777483, + "p90": 267.67999678850174, + "p95": 275.83999931812286, + "p99": 346.68800979852676 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, @@ -51478,31 +53578,31 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 74.33599978685379, - "p90": 99.42399710416794, - "p95": 109.66400057077408, - "p99": 131.71200454235077 + "p50": 192.9280012845993, + "p90": 200.6720006465912, + "p95": 204.79999482631683, + "p99": 264.5759880542755 }, "combine": { - "p50": 69.85600292682648, - "p90": 83.00799876451492, - "p95": 90.40000289678574, - "p99": 114.33599889278412 + "p50": 62.272001057863235, + "p90": 64.7680014371872, + "p95": 67.391999065876, + "p99": 73.08799773454666 }, "roundtrip": { - "p50": 122.43200093507767, - "p90": 144.6080058813095, - "p95": 154.62400019168854, - "p99": 173.69599640369415 + "p50": 235.6480062007904, + "p90": 243.0720031261444, + "p95": 245.60000002384186, + "p99": 259.71201062202454 }, "isolatedSum": { - "p50": 144.19200271368027, - "p90": 182.43199586868286, - "p95": 200.06400346755981, - "p99": 246.0480034351349 + "p50": 255.20000234246254, + "p90": 265.4400020837784, + "p95": 272.19199389219284, + "p99": 337.6639857888222 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, @@ -51515,35 +53615,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 74.97599720954895, - "p90": 95.29600292444229, - "p95": 104.12800312042236, - "p99": 139.74399864673615 + "p50": 197.24799692630768, + "p90": 286.080002784729, + "p95": 290.71998596191406, + "p99": 302.2400140762329 }, "combine": { - "p50": 69.40799951553345, - "p90": 81.63200318813324, - "p95": 88.22400122880936, - "p99": 119.4240003824234 + "p50": 63.32799792289734, + "p90": 71.32799923419952, + "p95": 75.45600086450577, + "p99": 82.62400329113007 }, "roundtrip": { - "p50": 123.74400347471237, - "p90": 150.36800503730774, - "p95": 160.3199988603592, - "p99": 204.8960030078888 + "p50": 242.94400215148926, + "p90": 349.40800070762634, + "p95": 354.4960021972656, + "p99": 367.13600158691406 }, "isolatedSum": { - "p50": 144.3839967250824, - "p90": 176.92800611257553, - "p95": 192.35200434923172, - "p99": 259.16799902915955 + "p50": 260.575994849205, + "p90": 357.4080020189285, + "p95": 366.17598682641983, + "p99": 384.864017367363 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 4, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51552,31 +53652,31 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.78400319814682, - "p90": 92.25600212812424, - "p95": 102.91200131177902, - "p99": 123.16799908876419 + "p50": 196.383997797966, + "p90": 251.583993434906, + "p95": 254.8159956932068, + "p99": 268.15998554229736 }, "combine": { - "p50": 70.52800059318542, - "p90": 81.95199817419052, - "p95": 87.48800307512283, - "p99": 100.51199793815613 + "p50": 63.87200206518173, + "p90": 72.73600250482559, + "p95": 73.5040009021759, + "p99": 77.95199751853943 }, "roundtrip": { - "p50": 124.03199821710587, - "p90": 147.20000326633453, - "p95": 153.9199948310852, - "p99": 180.00000715255737 + "p50": 242.11199581623077, + "p90": 299.3920147418976, + "p95": 304.1599988937378, + "p99": 410.8160138130188 }, "isolatedSum": { - "p50": 145.31200379133224, - "p90": 174.20800030231476, - "p95": 190.40000438690186, - "p99": 223.67999702692032 + "p50": 260.25599986314774, + "p90": 324.3199959397316, + "p95": 328.3199965953827, + "p99": 346.1119830608368 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, @@ -51589,31 +53689,31 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 73.18399846553802, - "p90": 92.83199906349182, - "p95": 103.61599922180176, - "p99": 195.93599438667297 + "p50": 197.63199985027313, + "p90": 288.35201263427734, + "p95": 294.048011302948, + "p99": 322.04800844192505 }, "combine": { - "p50": 71.32799923419952, - "p90": 86.33600175380707, - "p95": 92.03200042247772, - "p99": 120.80000340938568 + "p50": 66.46399945020676, + "p90": 79.9039974808693, + "p95": 106.33599758148193, + "p99": 204.25599813461304 }, "roundtrip": { - "p50": 129.72800433635712, - "p90": 161.31199896335602, - "p95": 172.86400496959686, - "p99": 215.10399878025055 + "p50": 246.62399291992188, + "p90": 330.24001121520996, + "p95": 333.5359990596771, + "p99": 341.18399024009705 }, "isolatedSum": { - "p50": 144.51199769973755, - "p90": 179.1680008172989, - "p95": 195.64799964427948, - "p99": 316.73599779605865 + "p50": 264.0959993004799, + "p90": 368.25601011514664, + "p95": 400.38400888442993, + "p99": 526.3040065765381 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, @@ -51626,31 +53726,31 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 82.2720006108284, - "p90": 100.80000013113022, - "p95": 108.92800241708755, - "p99": 134.88000631332397 + "p50": 198.40000569820404, + "p90": 284.35200452804565, + "p95": 288.06400299072266, + "p99": 295.9040105342865 }, "combine": { - "p50": 76.03199779987335, - "p90": 89.40800279378891, - "p95": 94.97600048780441, - "p99": 117.95199662446976 + "p50": 70.97599655389786, + "p90": 79.96799796819687, + "p95": 80.70400357246399, + "p99": 83.52000266313553 }, "roundtrip": { - "p50": 130.8480054140091, - "p90": 154.33600544929504, - "p95": 164.73600268363953, - "p99": 204.0639966726303 + "p50": 250.36799907684326, + "p90": 306.5919876098633, + "p95": 310.2079927921295, + "p99": 368.8639998435974 }, "isolatedSum": { - "p50": 158.30399841070175, - "p90": 190.20800292491913, - "p95": 203.90400290489197, - "p99": 252.83200293779373 + "p50": 269.3760022521019, + "p90": 364.3200024962425, + "p95": 368.76800656318665, + "p99": 379.424013197422 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, @@ -51663,31 +53763,31 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 91.32800251245499, - "p90": 110.04800349473953, - "p95": 116.86400324106216, - "p99": 146.84799313545227 + "p50": 198.65599274635315, + "p90": 284.8320007324219, + "p95": 289.69600796699524, + "p99": 304.4480085372925 }, "combine": { - "p50": 87.2960016131401, - "p90": 98.36799651384354, - "p95": 104.70400005578995, - "p99": 124.92799758911133 + "p50": 80.48000186681747, + "p90": 88.83199840784073, + "p95": 90.52799642086029, + "p99": 101.31199657917023 }, "roundtrip": { - "p50": 156.031996011734, - "p90": 173.24799299240112, - "p95": 180.38399517536163, - "p99": 215.39199352264404 + "p50": 260.96001267433167, + "p90": 351.80801153182983, + "p95": 355.55198788642883, + "p99": 367.0400083065033 }, "isolatedSum": { - "p50": 178.6240041255951, - "p90": 208.41600000858307, - "p95": 221.5680032968521, - "p99": 271.7759907245636 + "p50": 279.1359946131706, + "p90": 373.6639991402626, + "p95": 380.22400438785553, + "p99": 405.7600051164627 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, @@ -51700,35 +53800,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.03199690580368, - "p90": 129.7599971294403, - "p95": 136.57599687576294, - "p99": 149.24800395965576 + "p50": 216.8319970369339, + "p90": 312.8640055656433, + "p95": 320.73599100112915, + "p99": 336.41600608825684 }, "combine": { - "p50": 103.42399775981903, - "p90": 116.54400080442429, - "p95": 123.3920007944107, - "p99": 141.95199310779572 + "p50": 98.94400089979172, + "p90": 112.83200234174728, + "p95": 113.79200220108032, + "p99": 119.13599818944931 }, "roundtrip": { - "p50": 192.54399836063385, - "p90": 208.8959962129593, - "p95": 215.64799547195435, - "p99": 228.7359982728958 + "p50": 303.2959997653961, + "p90": 388.0000114440918, + "p95": 392.2879993915558, + "p99": 401.2480080127716 }, "isolatedSum": { - "p50": 219.4559946656227, - "p90": 246.3039979338646, - "p95": 259.96799767017365, - "p99": 291.1999970674515 + "p50": 315.7759979367256, + "p90": 425.6960079073906, + "p95": 434.5279932022095, + "p99": 455.55200427770615 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 3, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51736,50 +53836,51 @@ ] }, { - "id": "cx-0f126172", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", - "colorKey": "h200_a1e795ec", - "comparisonKey": "467cf4a4daff1cff", + "id": "cx-52396484", + "identity": "h100|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_a96c99f3", + "comparisonKey": "7d245d1c48b9f399", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:47.472039+00:00", + "generatedAt": "2026-06-27T11:15:21.281924+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · balanced", + "label": "H100 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -51788,8 +53889,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -51797,45 +53898,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254443915", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", - "createdAt": "2026-06-26T17:29:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28287500362", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287500362", + "createdAt": "2026-06-27T11:15:21.281924+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 73.95199686288834, - "p90": 88.0960002541542, - "p95": 97.24800288677216, - "p99": 108.25599730014801 + "p50": 98.78399968147278, + "p90": 104.54399883747101, + "p95": 108.22399705648422, + "p99": 114.88000303506851 }, "combine": { - "p50": 70.91200351715088, - "p90": 81.60000294446945, - "p95": 87.26400136947632, - "p99": 97.28000313043594 + "p50": 71.45600020885468, + "p90": 73.34399968385696, + "p95": 74.49600100517273, + "p99": 145.88800072669983 }, "roundtrip": { - "p50": 125.2480000257492, - "p90": 149.63200688362122, - "p95": 157.85600244998932, - "p99": 175.04000663757324 + "p50": 201.12000405788422, + "p90": 207.2640061378479, + "p95": 210.11200547218323, + "p99": 237.59999871253967 }, "isolatedSum": { - "p50": 144.86400038003922, - "p90": 169.69600319862366, - "p95": 184.51200425624847, - "p99": 205.53600043058395 + "p50": 170.23999989032745, + "p90": 177.88799852132797, + "p95": 182.71999806165695, + "p99": 260.76800376176834 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, "recvTokensMax": 8, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51844,35 +53945,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 74.23999905586243, - "p90": 91.00800007581711, - "p95": 98.88000041246414, - "p99": 130.23999333381653 + "p50": 99.48799759149551, + "p90": 103.64799946546555, + "p95": 105.66399991512299, + "p99": 111.55200004577637 }, "combine": { - "p50": 70.52800059318542, - "p90": 79.71200346946716, - "p95": 85.50400286912918, - "p99": 106.46399855613708 + "p50": 72.95999675989151, + "p90": 74.5600014925003, + "p95": 75.99999755620956, + "p99": 78.97599786520004 }, "roundtrip": { - "p50": 123.6800029873848, - "p90": 142.07999408245087, - "p95": 152.99199521541595, - "p99": 184.35199558734894 + "p50": 203.19999754428864, + "p90": 207.13600516319275, + "p95": 210.1760059595108, + "p99": 213.82400393486023 }, "isolatedSum": { - "p50": 144.76799964904785, - "p90": 170.72000354528427, - "p95": 184.38400328159332, - "p99": 236.7039918899536 + "p50": 172.44799435138702, + "p90": 178.20800095796585, + "p95": 181.66399747133255, + "p99": 190.5279979109764 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 2, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51881,35 +53982,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 75.03999769687653, - "p90": 97.9200005531311, - "p95": 108.47999900579453, - "p99": 140.09599387645721 + "p50": 99.29600358009338, + "p90": 104.09600287675858, + "p95": 106.175996363163, + "p99": 110.49599945545197 }, "combine": { - "p50": 70.11199742555618, - "p90": 81.34400099515915, - "p95": 86.496002972126, - "p99": 99.29600358009338 + "p50": 72.06399738788605, + "p90": 74.17599856853485, + "p95": 75.52000135183334, + "p99": 79.74400371313095 }, "roundtrip": { - "p50": 125.69600343704224, - "p90": 151.36000514030457, - "p95": 159.55199301242828, - "p99": 178.3359944820404 + "p50": 202.72000133991241, + "p90": 207.90399610996246, + "p95": 211.0079973936081, + "p99": 221.24800086021423 }, "isolatedSum": { - "p50": 145.1519951224327, - "p90": 179.26400154829025, - "p95": 194.97600197792053, - "p99": 239.3919974565506 + "p50": 171.36000096797943, + "p90": 178.27200144529343, + "p95": 181.69599771499634, + "p99": 190.24000316858292 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 5, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51918,35 +54019,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 73.56800138950348, - "p90": 94.17600184679031, - "p95": 102.62399911880493, - "p99": 126.14400684833527 + "p50": 100.16000270843506, + "p90": 104.35199737548828, + "p95": 106.27199709415436, + "p99": 111.93600296974182 }, "combine": { - "p50": 70.72000205516815, - "p90": 82.04799890518188, - "p95": 86.43200248479843, - "p99": 96.47999703884125 + "p50": 73.11999797821045, + "p90": 75.16799867153168, + "p95": 76.80000364780426, + "p99": 83.20000022649765 }, "roundtrip": { - "p50": 125.69600343704224, - "p90": 148.0640023946762, - "p95": 156.76799416542053, - "p99": 182.72000551223755 + "p50": 203.42400670051575, + "p90": 208.12800526618958, + "p95": 210.78400313854218, + "p99": 215.29600024223328 }, "isolatedSum": { - "p50": 144.28800344467163, - "p90": 176.2240007519722, - "p95": 189.05600160360336, - "p99": 222.6240038871765 + "p50": 173.2800006866455, + "p90": 179.51999604701996, + "p95": 183.07200074195862, + "p99": 195.13600319623947 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 5, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -51955,34 +54056,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 74.46400076150894, - "p90": 90.71999788284302, - "p95": 96.44799679517746, - "p99": 112.19199746847153 + "p50": 98.88000041246414, + "p90": 103.58399897813797, + "p95": 106.27199709415436, + "p99": 112.22399771213531 }, "combine": { - "p50": 76.03199779987335, - "p90": 84.70399677753448, - "p95": 91.16800129413605, - "p99": 104.54399883747101 + "p50": 75.93599706888199, + "p90": 78.3040001988411, + "p95": 80.60800284147263, + "p99": 82.91199803352356 }, "roundtrip": { - "p50": 129.60000336170197, - "p90": 153.6960005760193, - "p95": 161.3440066576004, - "p99": 196.28800451755524 + "p50": 205.72799444198608, + "p90": 210.01599729061127, + "p95": 212.6079946756363, + "p99": 216.89599752426147 }, "isolatedSum": { - "p50": 150.4959985613823, - "p90": 175.4239946603775, - "p95": 187.6159980893135, - "p99": 216.73599630594254 + "p50": 174.81599748134613, + "p90": 181.88799917697906, + "p95": 186.87999993562698, + "p99": 195.13599574565887 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -51992,35 +54093,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 83.20000022649765, - "p90": 100.12800246477127, - "p95": 107.45599865913391, - "p99": 122.3360002040863 + "p50": 99.93600100278854, + "p90": 142.71999895572662, + "p95": 161.5999937057495, + "p99": 181.11999332904816 }, "combine": { - "p50": 80.79999685287476, - "p90": 89.88799899816513, - "p95": 95.36000341176987, - "p99": 100.54399818181992 + "p50": 82.07999914884567, + "p90": 102.01600193977356, + "p95": 109.40799862146378, + "p99": 114.52800035476685 }, "roundtrip": { - "p50": 142.17600226402283, - "p90": 155.45600652694702, - "p95": 165.3439998626709, - "p99": 182.0800006389618 + "p50": 211.64800226688385, + "p90": 216.35200083255768, + "p95": 218.23999285697937, + "p99": 223.32799434661865 }, "isolatedSum": { - "p50": 163.9999970793724, - "p90": 190.0160014629364, - "p95": 202.81600207090378, - "p99": 222.87999838590622 + "p50": 182.01600015163422, + "p90": 244.73600089550018, + "p95": 271.0079923272133, + "p99": 295.647993683815 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 5, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52029,35 +54130,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 107.61599987745285, - "p90": 121.0239976644516, - "p95": 127.07200646400452, - "p99": 148.73600006103516 + "p50": 101.40799731016159, + "p90": 206.81600272655487, + "p95": 216.86400473117828, + "p99": 370.88000774383545 }, "combine": { - "p50": 95.87199985980988, - "p90": 105.3759977221489, - "p95": 112.60800063610077, - "p99": 123.29600006341934 + "p50": 91.16800129413605, + "p90": 95.29600292444229, + "p95": 99.5199978351593, + "p99": 122.40000069141388 }, "roundtrip": { - "p50": 176.67199671268463, - "p90": 191.80800020694733, - "p95": 203.5840004682541, - "p99": 225.98400712013245 + "p50": 221.37600183486938, + "p90": 226.43199563026428, + "p95": 228.7680059671402, + "p99": 233.34400355815887 }, "isolatedSum": { - "p50": 203.48799973726273, - "p90": 226.3999953866005, - "p95": 239.68000710010529, - "p99": 272.0320001244545 + "p50": 192.57599860429764, + "p90": 302.11200565099716, + "p95": 316.3840025663376, + "p99": 493.28000843524933 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 5, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52066,35 +54167,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 133.66399705410004, - "p90": 146.04799449443817, - "p95": 152.41600573062897, - "p99": 162.56000101566315 + "p50": 106.23999685049057, + "p90": 109.8880022764206, + "p95": 112.5440001487732, + "p99": 117.5680011510849 }, "combine": { - "p50": 118.52800101041794, - "p90": 127.68000364303589, - "p95": 130.91200590133667, - "p99": 144.67200636863708 + "p50": 107.77600109577179, + "p90": 110.20799726247787, + "p95": 111.48799955844879, + "p99": 114.56000059843063 }, "roundtrip": { - "p50": 225.92000663280487, - "p90": 240.48000574111938, - "p95": 251.3279914855957, - "p99": 700.223982334137 + "p50": 240.35200476646423, + "p90": 247.1040040254593, + "p95": 249.82400238513947, + "p99": 295.80798745155334 }, "isolatedSum": { - "p50": 252.19199806451797, - "p90": 273.72799813747406, - "p95": 283.32801163196564, - "p99": 307.23200738430023 + "p50": 214.01599794626236, + "p90": 220.09599953889847, + "p95": 224.03199970722198, + "p99": 232.12800174951553 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52102,50 +54203,51 @@ ] }, { - "id": "cx-8e3ecfeb", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", - "colorKey": "h200_0a93a01f", - "comparisonKey": "c7e35a057338b2fa", + "id": "cx-8e5c4d34", + "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_97196257", + "comparisonKey": "969c3964291e1270", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:04.173894+00:00", + "generatedAt": "2026-06-26T23:50:43.012530+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf", + "label": "H100 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -52154,8 +54256,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -52163,45 +54265,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254452252", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", - "createdAt": "2026-06-26T17:29:31Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271660154", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271660154", + "createdAt": "2026-06-26T23:50:43.012530+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 74.27199929952621, - "p90": 108.83200168609619, - "p95": 118.49600076675415, - "p99": 155.5200070142746 + "p50": 198.7520009279251, + "p90": 206.2399983406067, + "p95": 209.56799387931824, + "p99": 221.69600427150726 }, "combine": { - "p50": 68.38399916887283, - "p90": 84.03199911117554, - "p95": 90.20800143480301, - "p99": 114.88000303506851 + "p50": 60.83200126886368, + "p90": 64.31999802589417, + "p95": 65.98400324583054, + "p99": 69.05599683523178 }, "roundtrip": { - "p50": 123.07199835777283, - "p90": 153.08800339698792, - "p95": 165.8560037612915, - "p99": 205.9199959039688 + "p50": 242.71999299526215, + "p90": 250.07998943328857, + "p95": 254.5279860496521, + "p99": 290.0159955024719 }, "isolatedSum": { - "p50": 142.65599846839905, - "p90": 192.86400079727173, - "p95": 208.70400220155716, - "p99": 270.4000100493431 + "p50": 259.5840021967888, + "p90": 270.55999636650085, + "p95": 275.5519971251488, + "p99": 290.75200110673904 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, "recvTokensMax": 8, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52210,35 +54312,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.95199686288834, - "p90": 97.82399982213974, - "p95": 106.6880002617836, - "p99": 132.9919993877411 + "p50": 205.53599298000336, + "p90": 313.6320114135742, + "p95": 323.8399922847748, + "p99": 375.5840063095093 }, "combine": { - "p50": 68.64000111818314, - "p90": 80.51200211048126, - "p95": 85.37600189447403, - "p99": 98.49599748849869 + "p50": 62.81600147485733, + "p90": 76.1599987745285, + "p95": 79.19999957084656, + "p99": 83.0719992518425 }, "roundtrip": { - "p50": 123.36000055074692, - "p90": 150.176003575325, - "p95": 158.4639996290207, - "p99": 181.63199722766876 + "p50": 242.49599874019623, + "p90": 250.43201446533203, + "p95": 253.08799743652344, + "p99": 294.1119968891144 }, "isolatedSum": { - "p50": 142.59199798107147, - "p90": 178.336001932621, - "p95": 192.06400215625763, - "p99": 231.48799687623978 + "p50": 268.3519944548607, + "p90": 389.7920101881027, + "p95": 403.03999185562134, + "p99": 458.6560055613518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 5, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52247,35 +54349,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 73.5040009021759, - "p90": 95.551997423172, - "p95": 104.86400127410889, - "p99": 123.4240010380745 + "p50": 203.5519927740097, + "p90": 291.55200719833374, + "p95": 296.09599709510803, + "p99": 303.6159873008728 }, "combine": { - "p50": 67.80800223350525, - "p90": 78.46400141716003, - "p95": 84.95999872684479, - "p99": 125.2799928188324 + "p50": 63.26399743556976, + "p90": 73.98399710655212, + "p95": 75.83999633789062, + "p99": 80.09599894285202 }, "roundtrip": { - "p50": 122.78400361537933, - "p90": 150.65599977970123, - "p95": 159.07199680805206, - "p99": 200.51200687885284 + "p50": 247.42400646209717, + "p90": 336.67200803756714, + "p95": 339.4559919834137, + "p99": 346.20800614356995 }, "isolatedSum": { - "p50": 141.31200313568115, - "p90": 174.01599884033203, - "p95": 189.82400000095367, - "p99": 248.7039938569069 + "p50": 266.81599020957947, + "p90": 365.53600430488586, + "p95": 371.93599343299866, + "p99": 383.7119862437248 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 7, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52284,35 +54386,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 75.23199915885925, - "p90": 103.32799702882767, - "p95": 111.87200248241425, - "p99": 143.26399564743042 + "p50": 199.45600628852844, + "p90": 207.8080028295517, + "p95": 213.02400529384613, + "p99": 235.29599606990814 }, "combine": { - "p50": 69.60000097751617, - "p90": 85.79199761152267, - "p95": 91.71199798583984, - "p99": 124.12799894809723 + "p50": 62.72000074386597, + "p90": 67.16799736022949, + "p95": 68.64000111818314, + "p99": 73.60000163316727 }, "roundtrip": { - "p50": 126.36800110340118, - "p90": 160.12799739837646, - "p95": 167.64800250530243, - "p99": 193.2159960269928 + "p50": 245.85600197315216, + "p90": 253.1839907169342, + "p95": 256.9279968738556, + "p99": 269.3119943141937 }, "isolatedSum": { - "p50": 144.83200013637543, - "p90": 189.11999464035034, - "p95": 203.5840004682541, - "p99": 267.39199459552765 + "p50": 262.1760070323944, + "p90": 274.9760001897812, + "p95": 281.66400641202927, + "p99": 308.8959977030754 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52321,35 +54423,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 77.40800082683563, - "p90": 104.63999956846237, - "p95": 113.43999952077866, - "p99": 144.0960019826889 + "p50": 204.22400534152985, + "p90": 292.60799288749695, + "p95": 296.3840067386627, + "p99": 434.30399894714355 }, "combine": { - "p50": 70.52800059318542, - "p90": 87.23200112581253, - "p95": 90.94399958848953, - "p99": 101.1200025677681 + "p50": 66.14399701356888, + "p90": 75.55200159549713, + "p95": 76.1599987745285, + "p99": 79.8719972372055 }, "roundtrip": { - "p50": 127.6479959487915, - "p90": 161.85599565505981, - "p95": 175.7120043039322, - "p99": 230.27199506759644 + "p50": 250.59199333190918, + "p90": 335.32801270484924, + "p95": 340.2239978313446, + "p99": 366.5919899940491 }, "isolatedSum": { - "p50": 147.93600142002106, - "p90": 191.8720006942749, - "p95": 204.3839991092682, - "p99": 245.216004550457 + "p50": 270.3680023550987, + "p90": 368.1599944829941, + "p95": 372.5440055131912, + "p99": 514.1759961843491 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 7, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52358,35 +54460,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 83.0719992518425, - "p90": 109.50399935245514, - "p95": 115.61600118875504, - "p99": 128.1599998474121 + "p50": 200.99200308322906, + "p90": 286.3039970397949, + "p95": 293.3120131492615, + "p99": 305.11999130249023 }, "combine": { - "p50": 77.34400033950806, - "p90": 91.64799749851227, - "p95": 95.61599791049957, - "p99": 112.73600161075592 + "p50": 70.88000327348709, + "p90": 75.83999633789062, + "p95": 78.11199873685837, + "p99": 86.84799820184708 }, "roundtrip": { - "p50": 132.60799646377563, - "p90": 157.0879966020584, - "p95": 165.0560051202774, - "p99": 194.20799612998962 + "p50": 253.31199169158936, + "p90": 259.71201062202454, + "p95": 262.4959945678711, + "p99": 270.9439992904663 }, "isolatedSum": { - "p50": 160.41599959135056, - "p90": 201.1519968509674, - "p95": 211.2319990992546, - "p99": 240.89600145816803 + "p50": 271.87200635671616, + "p90": 362.14399337768555, + "p95": 371.42401188611984, + "p99": 391.9679895043373 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 1, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52395,34 +54497,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 97.15200215578079, - "p90": 111.35999858379364, - "p95": 121.31199985742569, - "p99": 134.8479986190796 + "p50": 204.22400534152985, + "p90": 293.8239872455597, + "p95": 299.74400997161865, + "p99": 323.4559893608093 }, "combine": { - "p50": 87.5839963555336, - "p90": 99.80800002813339, - "p95": 104.06400263309479, - "p99": 116.95999652147293 + "p50": 81.82399719953537, + "p90": 93.40800344944, + "p95": 96.63999825716019, + "p99": 99.64799880981445 }, "roundtrip": { - "p50": 161.9199961423874, - "p90": 177.72799730300903, - "p95": 184.67199802398682, - "p99": 235.61599850654602 + "p50": 268.73600482940674, + "p90": 351.6159951686859, + "p95": 354.4960021972656, + "p99": 361.6639971733093 }, "isolatedSum": { - "p50": 184.7359985113144, - "p90": 211.16799861192703, - "p95": 225.37600249052048, - "p99": 251.80799514055252 + "p50": 286.0480025410652, + "p90": 387.2319906949997, + "p95": 396.38400822877884, + "p99": 423.1039881706238 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -52432,34 +54534,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.97599762678146, - "p90": 147.10399508476257, - "p95": 156.25600516796112, - "p99": 183.07200074195862 + "p50": 224.2240011692047, + "p90": 294.5919930934906, + "p95": 298.4960079193115, + "p99": 310.8159899711609 }, "combine": { - "p50": 110.49599945545197, - "p90": 123.87199699878693, - "p95": 129.40800189971924, - "p99": 150.751993060112 + "p50": 99.90400075912476, + "p90": 110.33599823713303, + "p95": 111.35999858379364, + "p99": 114.68800157308578 }, "roundtrip": { - "p50": 208.73600244522095, - "p90": 225.43999552726746, - "p95": 233.024001121521, - "p99": 256.415992975235 + "p50": 310.88000535964966, + "p90": 375.2320110797882, + "p95": 378.04800271987915, + "p99": 386.46399974823 }, "isolatedSum": { - "p50": 233.47199708223343, - "p90": 270.9759920835495, - "p95": 285.66400706768036, - "p99": 333.8239938020706 + "p50": 324.12800192832947, + "p90": 404.9279913306236, + "p95": 409.85600650310516, + "p99": 425.5039915442467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -52468,16 +54570,16 @@ ] }, { - "id": "cx-9efea369", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", - "colorKey": "h200_993777bf", - "comparisonKey": "cdec001c60a84b85", + "id": "cx-4e4a7f2d", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_91aa6e56", + "comparisonKey": "511cf861d6b2e142", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:46:59.245966+00:00", + "generatedAt": "2026-06-26T17:28:00.849157+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -52485,21 +54587,22 @@ "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "H100 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -52520,54 +54623,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255303840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", - "createdAt": "2026-06-26T17:45:35Z", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28254323956", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", + "createdAt": "2026-06-26T17:28:00.849157+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 72.89600372314453, - "p90": 99.45599734783173, - "p95": 108.73600095510483, - "p99": 128.86400520801544 + "p50": 97.98400104045868, + "p90": 102.88000106811523, + "p95": 104.38399761915207, + "p99": 110.20799726247787 }, "combine": { - "p50": 67.19999760389328, - "p90": 78.3040001988411, - "p95": 82.46400207281113, - "p99": 102.65599936246872 + "p50": 72.28799909353256, + "p90": 74.14399832487106, + "p95": 75.29599964618683, + "p99": 78.65600287914276 }, "roundtrip": { - "p50": 119.32799965143204, - "p90": 147.77599275112152, - "p95": 155.07200360298157, - "p99": 171.03999853134155 + "p50": 190.65600633621216, + "p90": 195.90400159358978, + "p95": 198.30399751663208, + "p99": 202.72000133991241 }, "isolatedSum": { - "p50": 140.0960013270378, - "p90": 177.75999754667282, - "p95": 191.20000302791595, - "p99": 231.52000457048416 + "p50": 170.27200013399124, + "p90": 177.0239993929863, + "p95": 179.6799972653389, + "p99": 188.86400014162064 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52576,34 +54679,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 73.18399846553802, - "p90": 94.27200257778168, - "p95": 104.5759990811348, - "p99": 122.68800288438797 + "p50": 72.15999811887741, + "p90": 99.90400075912476, + "p95": 102.52799838781357, + "p99": 105.0880029797554 }, "combine": { - "p50": 68.09599697589874, - "p90": 81.15199953317642, - "p95": 86.17600053548813, - "p99": 113.3119985461235 + "p50": 63.35999816656113, + "p90": 73.18399846553802, + "p95": 73.98399710655212, + "p99": 78.46400141716003 }, "roundtrip": { - "p50": 120.31999975442886, - "p90": 147.45600521564484, - "p95": 157.82399475574493, - "p99": 190.08000195026398 + "p50": 153.82400155067444, + "p90": 194.43200528621674, + "p95": 196.28800451755524, + "p99": 201.05600357055664 }, "isolatedSum": { - "p50": 141.27999544143677, - "p90": 175.4240021109581, - "p95": 190.75199961662292, - "p99": 236.00000143051147 + "p50": 135.51999628543854, + "p90": 173.08799922466278, + "p95": 176.5119954943657, + "p99": 183.55200439691544 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -52613,35 +54716,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 78.62400263547897, - "p90": 130.5920034646988, - "p95": 144.54400539398193, - "p99": 178.847998380661 + "p50": 72.31999933719635, + "p90": 103.4879982471466, + "p95": 107.26399719715118, + "p99": 115.48800021409988 }, "combine": { - "p50": 69.08799707889557, - "p90": 80.51200211048126, - "p95": 87.87199854850769, - "p99": 104.19200360774994 + "p50": 64.03200328350067, + "p90": 76.28799974918365, + "p95": 77.82399654388428, + "p99": 81.98399841785431 }, "roundtrip": { - "p50": 124.70400333404541, - "p90": 154.14400398731232, - "p95": 165.15199840068817, - "p99": 194.68800723552704 + "p50": 156.09599649906158, + "p90": 202.36800611019135, + "p95": 205.63200116157532, + "p99": 212.51200139522552 }, "isolatedSum": { - "p50": 147.71199971437454, - "p90": 211.10400557518005, - "p95": 232.41600394248962, - "p99": 283.04000198841095 + "p50": 136.35200262069702, + "p90": 179.77599799633026, + "p95": 185.08799374103546, + "p99": 197.4719986319542 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 2, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52650,35 +54753,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 74.46400076150894, - "p90": 99.39199686050415, - "p95": 109.76000130176544, - "p99": 140.6400054693222 + "p50": 97.50399738550186, + "p90": 102.30399668216705, + "p95": 105.85600137710571, + "p99": 113.40799927711487 }, "combine": { - "p50": 68.76800209283829, - "p90": 83.64800363779068, - "p95": 90.14400094747543, - "p99": 115.35999923944473 + "p50": 63.80800157785416, + "p90": 74.94399696588516, + "p95": 76.28799974918365, + "p99": 80.89599758386612 }, "roundtrip": { - "p50": 124.54400211572647, - "p90": 155.7759940624237, - "p95": 170.56000232696533, - "p99": 186.91200017929077 + "p50": 154.6880006790161, + "p90": 194.7840005159378, + "p95": 199.0399956703186, + "p99": 203.87199521064758 }, "isolatedSum": { - "p50": 143.23200285434723, - "p90": 183.04000049829483, - "p95": 199.90400224924088, - "p99": 256.00000470876694 + "p50": 161.31199896335602, + "p90": 177.24799364805222, + "p95": 182.14400112628937, + "p99": 194.303996860981 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 5, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52687,34 +54790,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 76.25599950551987, - "p90": 106.9440022110939, - "p95": 120.7360029220581, - "p99": 149.24800395965576 + "p50": 97.08800166845322, + "p90": 104.3199971318245, + "p95": 107.39199817180634, + "p99": 113.43999952077866 }, "combine": { - "p50": 70.52800059318542, - "p90": 85.24800091981888, - "p95": 90.04800021648407, - "p99": 104.5759990811348 + "p50": 75.74400305747986, + "p90": 78.49600166082382, + "p95": 80.06399869918823, + "p99": 83.36000144481659 }, "roundtrip": { - "p50": 129.98400628566742, - "p90": 161.05599701404572, - "p95": 173.8560050725937, - "p99": 205.21600544452667 + "p50": 195.2960044145584, + "p90": 205.85599541664124, + "p95": 209.85600352287292, + "p99": 223.83999824523926 }, "isolatedSum": { - "p50": 146.7840000987053, - "p90": 192.19200313091278, - "p95": 210.78400313854218, - "p99": 253.82400304079056 + "p50": 172.83200472593307, + "p90": 182.81599879264832, + "p95": 187.45599687099457, + "p99": 196.80000096559525 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -52724,35 +54827,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 81.91999793052673, - "p90": 99.07200187444687, - "p95": 107.04000294208527, - "p99": 128.57599556446075 + "p50": 73.11999797821045, + "p90": 104.16000336408615, + "p95": 106.84800148010254, + "p99": 112.09599673748016 }, "combine": { - "p50": 76.03199779987335, - "p90": 89.63199704885483, - "p95": 96.54399752616882, - "p99": 106.08000308275223 + "p50": 69.2799985408783, + "p90": 81.88799768686295, + "p95": 82.87999778985977, + "p99": 88.28800171613693 }, "roundtrip": { - "p50": 129.08799946308136, - "p90": 156.76799416542053, - "p95": 167.29600727558136, - "p99": 217.3440009355545 + "p50": 161.21600568294525, + "p90": 206.65599405765533, + "p95": 210.84800362586975, + "p99": 216.22399985790253 }, "isolatedSum": { - "p50": 157.95199573040009, - "p90": 188.7039989233017, - "p95": 203.5840004682541, - "p99": 234.65599864721298 + "p50": 142.39999651908875, + "p90": 186.0480010509491, + "p95": 189.7279992699623, + "p99": 200.3839984536171 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 7, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52761,35 +54864,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 101.79200023412704, - "p90": 136.6720050573349, - "p95": 146.36799693107605, - "p99": 175.10400712490082 + "p50": 78.65600287914276, + "p90": 106.9440022110939, + "p95": 110.55999994277954, + "p99": 125.44000148773193 }, "combine": { - "p50": 93.44000369310379, - "p90": 112.76800185441971, - "p95": 117.15199798345566, - "p99": 131.71200454235077 + "p50": 83.64800363779068, + "p90": 96.38399630784988, + "p95": 97.69599884748459, + "p99": 100.00000149011612 }, "roundtrip": { - "p50": 165.43999314308167, - "p90": 204.44799959659576, - "p95": 212.38400042057037, - "p99": 240.03200232982635 + "p50": 175.7120043039322, + "p90": 222.6880043745041, + "p95": 225.24799406528473, + "p99": 231.74400627613068 }, "isolatedSum": { - "p50": 195.23200392723083, - "p90": 249.4400069117546, - "p95": 263.5199949145317, - "p99": 306.8160116672516 + "p50": 162.30400651693344, + "p90": 203.3279985189438, + "p95": 208.25599879026413, + "p99": 225.44000297784805 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52798,34 +54901,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 115.68000167608261, - "p90": 135.29600203037262, - "p95": 142.17600226402283, - "p99": 160.64000129699707 + "p50": 90.33600240945816, + "p90": 110.84800213575363, + "p95": 113.82400244474411, + "p99": 117.11999773979187 }, "combine": { - "p50": 104.96000200510025, - "p90": 118.04799735546112, - "p95": 122.68800288438797, - "p99": 147.64800667762756 + "p50": 98.78399968147278, + "p90": 111.00800335407257, + "p95": 112.0000034570694, + "p99": 117.21599847078323 }, "roundtrip": { - "p50": 194.97600197792053, - "p90": 212.64000236988068, - "p95": 220.19200026988983, - "p99": 234.78400707244873 + "p50": 216.12800657749176, + "p90": 240.60800671577454, + "p95": 244.25600469112396, + "p99": 250.2720057964325 }, "isolatedSum": { - "p50": 220.64000368118286, - "p90": 253.34399938583374, - "p95": 264.8640051484108, - "p99": 308.28800797462463 + "p50": 189.12000209093094, + "p90": 221.8560054898262, + "p95": 225.8240059018135, + "p99": 234.3359962105751 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -52834,28 +54937,29 @@ ] }, { - "id": "cx-cee2e19b", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_edd92e38", - "comparisonKey": "4a9eb2a61bfd9462", + "id": "cx-750e874d", + "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_7f10961a", + "comparisonKey": "f145cb161a39591f", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:08.901856+00:00", + "generatedAt": "2026-06-26T15:23:35.919985+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "normalized", "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) [cl]", + "label": "H100 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -52865,7 +54969,7 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -52874,10 +54978,10 @@ "achievedFraction": 0.1818, "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "resource-constrained", + "resourceClass": "unknown", "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -52895,45 +54999,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254409438", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", - "createdAt": "2026-06-26T17:28:41Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28247584217", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247584217", + "createdAt": "2026-06-26T15:23:35.919985+00:00", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 62.97600269317627, - "p90": 86.40000224113464, - "p95": 94.14400160312653, - "p99": 136.9599997997284 + "p50": 251.93598866462708, + "p90": 260.3839933872223, + "p95": 263.10399174690247, + "p99": 268.5759961605072 }, "combine": { - "p50": 69.21599805355072, - "p90": 82.04799890518188, - "p95": 87.20000088214874, - "p99": 98.49599748849869 + "p50": 68.41599941253662, + "p90": 69.88800317049026, + "p95": 70.8480030298233, + "p99": 76.03199779987335 }, "roundtrip": { - "p50": 109.98400300741196, - "p90": 133.08799266815186, - "p95": 140.8960074186325, - "p99": 178.27199399471283 + "p50": 296.51200771331787, + "p90": 304.1279911994934, + "p95": 306.40000104904175, + "p99": 349.15199875831604 }, "isolatedSum": { - "p50": 132.192000746727, - "p90": 168.44800114631653, - "p95": 181.34400248527527, - "p99": 235.45599728822708 + "p50": 320.3519880771637, + "p90": 330.27199655771255, + "p95": 333.95199477672577, + "p99": 344.60799396038055 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52942,35 +55046,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 64.38399851322174, - "p90": 88.73599767684937, - "p95": 94.87999975681305, - "p99": 119.48800086975098 + "p50": 200.51200687885284, + "p90": 256.8320035934448, + "p95": 259.99999046325684, + "p99": 268.0000066757202 }, "combine": { - "p50": 69.2799985408783, - "p90": 83.52000266313553, - "p95": 88.95999938249588, - "p99": 107.10400342941284 + "p50": 63.00800293684006, + "p90": 71.00799679756165, + "p95": 71.84000313282013, + "p99": 74.68800246715546 }, "roundtrip": { - "p50": 110.20799726247787, - "p90": 138.2720023393631, - "p95": 145.37599682807922, - "p99": 175.55199563503265 + "p50": 243.1039959192276, + "p90": 300.1919984817505, + "p95": 303.5840094089508, + "p99": 308.9919984340668 }, "isolatedSum": { - "p50": 133.66399705410004, - "p90": 172.2560003399849, - "p95": 183.83999913930893, - "p99": 226.59200429916382 + "p50": 263.5200098156929, + "p90": 327.84000039100647, + "p95": 331.83999359607697, + "p99": 342.68800914287567 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 6, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -52979,35 +55083,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 64.41599875688553, - "p90": 90.52799642086029, - "p95": 101.88800096511841, - "p99": 132.28799402713776 + "p50": 199.13600385189056, + "p90": 287.9680097103119, + "p95": 291.1359965801239, + "p99": 298.2720136642456 }, "combine": { - "p50": 70.62400132417679, - "p90": 85.34400165081024, - "p95": 90.71999788284302, - "p99": 102.27199643850327 + "p50": 63.519999384880066, + "p90": 75.1039981842041, + "p95": 76.73600316047668, + "p99": 81.40800148248672 }, "roundtrip": { - "p50": 113.43999952077866, - "p90": 141.79199934005737, - "p95": 148.22399616241455, - "p99": 183.58400464057922 + "p50": 246.17600440979004, + "p90": 330.84800839424133, + "p95": 333.9200019836426, + "p99": 343.6479866504669 }, "isolatedSum": { - "p50": 135.04000008106232, - "p90": 175.87199807167053, - "p95": 192.60799884796143, - "p99": 234.55999046564102 + "p50": 262.65600323677063, + "p90": 363.072007894516, + "p95": 367.8719997406006, + "p99": 379.68001514673233 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53016,35 +55120,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 63.1679967045784, - "p90": 82.75199681520462, - "p95": 87.96799927949905, - "p99": 107.744000852108 + "p50": 199.16799664497375, + "p90": 258.14399123191833, + "p95": 261.4080011844635, + "p99": 267.16798543930054 }, "combine": { - "p50": 69.85600292682648, - "p90": 85.1840004324913, - "p95": 90.46400338411331, - "p99": 100.99200159311295 + "p50": 63.4239986538887, + "p90": 72.57600128650665, + "p95": 73.18399846553802, + "p99": 76.28799974918365 }, "roundtrip": { - "p50": 112.44799941778183, - "p90": 139.20000195503235, - "p95": 152.38399803638458, - "p99": 206.7520022392273 + "p50": 244.83199417591095, + "p90": 302.3039996623993, + "p95": 305.759996175766, + "p99": 310.94399094581604 }, "isolatedSum": { - "p50": 133.02399963140488, - "p90": 167.93599724769592, - "p95": 178.43200266361237, - "p99": 208.73600244522095 + "p50": 262.59199529886246, + "p90": 330.719992518425, + "p95": 334.5919996500015, + "p99": 343.4559851884842 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53053,35 +55157,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 60.35200133919716, - "p90": 85.02399921417236, - "p95": 91.67999774217606, - "p99": 111.13599687814713 + "p50": 200.28799772262573, + "p90": 286.5599989891052, + "p95": 290.0800108909607, + "p99": 296.57599329948425 }, "combine": { - "p50": 70.3359991312027, - "p90": 86.87999844551086, - "p95": 89.82399851083755, - "p99": 99.35999661684036 + "p50": 65.5359998345375, + "p90": 76.86399668455124, + "p95": 77.66400277614594, + "p99": 80.76799660921097 }, "roundtrip": { - "p50": 116.03199690580368, - "p90": 141.34399592876434, - "p95": 148.3519971370697, - "p99": 184.9920004606247 + "p50": 248.57600033283234, + "p90": 330.4640054702759, + "p95": 333.6319923400879, + "p99": 344.7360098361969 }, "isolatedSum": { - "p50": 130.68800047039986, - "p90": 171.90399765968323, - "p95": 181.5039962530136, - "p99": 210.4959934949875 + "p50": 265.82399755716324, + "p90": 363.42399567365646, + "p95": 367.7440136671066, + "p99": 377.3439899086952 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53090,35 +55194,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 71.74400240182877, - "p90": 90.71999788284302, - "p95": 96.73599898815155, - "p99": 118.23999881744385 + "p50": 198.88000190258026, + "p90": 284.4800055027008, + "p95": 288.12798857688904, + "p99": 293.0240035057068 }, "combine": { - "p50": 77.66400277614594, - "p90": 93.05600076913834, - "p95": 97.69599884748459, - "p99": 108.92800241708755 + "p50": 69.18399780988693, + "p90": 80.54400235414505, + "p95": 81.4720019698143, + "p99": 84.63999629020691 }, "roundtrip": { - "p50": 122.36800044775009, - "p90": 149.05600249767303, - "p95": 159.61599349975586, - "p99": 184.12800133228302 + "p50": 253.12000513076782, + "p90": 334.01599526405334, + "p95": 336.89600229263306, + "p99": 340.31999111175537 }, "isolatedSum": { - "p50": 149.4080051779747, - "p90": 183.77599865198135, - "p95": 194.43199783563614, - "p99": 227.1680012345314 + "p50": 268.0639997124672, + "p90": 365.02400785684586, + "p95": 369.59999054670334, + "p99": 377.6639997959137 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53127,35 +55231,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 82.65600353479385, - "p90": 100.3199964761734, - "p95": 109.15199667215347, - "p99": 139.39200341701508 + "p50": 202.07999646663666, + "p90": 355.00800609588623, + "p95": 361.7280125617981, + "p99": 423.007994890213 }, "combine": { - "p50": 91.45600348711014, - "p90": 106.52799904346466, - "p95": 114.30399864912033, - "p99": 132.22399353981018 + "p50": 82.65600353479385, + "p90": 94.11200135946274, + "p95": 95.8079993724823, + "p99": 99.45599734783173 }, "roundtrip": { - "p50": 147.42399752140045, - "p90": 165.3439998626709, - "p95": 174.20800030231476, - "p99": 198.65599274635315 + "p50": 266.88000559806824, + "p90": 352.03200578689575, + "p95": 355.3600013256073, + "p99": 361.4720106124878 }, "isolatedSum": { - "p50": 174.112007021904, - "p90": 206.84799551963806, - "p95": 223.4559953212738, - "p99": 271.61599695682526 + "p50": 284.7360000014305, + "p90": 449.12000745534897, + "p95": 457.5360119342804, + "p99": 522.4639922380447 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53164,35 +55268,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.12000322341919, - "p90": 118.40000003576279, - "p95": 122.81599640846252, - "p99": 147.32800424098969 + "p50": 221.79199755191803, + "p90": 289.72798585891724, + "p95": 293.08798909187317, + "p99": 300.9600043296814 }, "combine": { - "p50": 104.73600029945374, - "p90": 122.11199849843979, - "p95": 126.75200402736664, - "p99": 138.84800672531128 + "p50": 98.27200323343277, + "p90": 108.8000014424324, + "p95": 110.1439967751503, + "p99": 113.88800293207169 }, "roundtrip": { - "p50": 184.38400328159332, - "p90": 200.41599869728088, - "p95": 207.96799659729004, - "p99": 272.44800329208374 + "p50": 303.74398827552795, + "p90": 364.8639917373657, + "p95": 367.45598912239075, + "p99": 371.5519905090332 }, "isolatedSum": { - "p50": 209.85600352287292, - "p90": 240.51199853420258, - "p95": 249.56800043582916, - "p99": 286.17601096630096 + "p50": 320.0640007853508, + "p90": 398.52798730134964, + "p95": 403.23198586702347, + "p99": 414.8480072617531 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53200,28 +55304,29 @@ ] }, { - "id": "cx-8a74732f", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_76bb7d5d", - "comparisonKey": "b4a52819ec3c25b8", + "id": "cx-b83230a1", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_eddc3af6", + "comparisonKey": "f291497d6f9ce0d1", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:31.596673+00:00", + "generatedAt": "2026-06-26T17:31:42.999710+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 [cl]", + "label": "H100 EP8 · deepep · fp8 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -53231,19 +55336,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -53261,45 +55366,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271608834", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271608834", - "createdAt": "2026-06-26T23:48:07Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254341346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", + "createdAt": "2026-06-26T17:31:42.999710+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 62.144000083208084, - "p90": 86.62399649620056, - "p95": 98.49599748849869, - "p99": 125.5359947681427 + "p50": 81.34400099515915, + "p90": 84.927998483181, + "p95": 86.496002972126, + "p99": 90.14400094747543 }, "combine": { - "p50": 68.54400038719177, - "p90": 84.41600203514099, - "p95": 92.83199906349182, - "p99": 123.07199835777283 + "p50": 71.3919997215271, + "p90": 73.91999661922455, + "p95": 74.87999647855759, + "p99": 77.98399776220322 }, "roundtrip": { - "p50": 109.31199789047241, - "p90": 135.29600203037262, - "p95": 143.77599954605103, - "p99": 159.84000265598297 + "p50": 173.15199971199036, + "p90": 178.6240041255951, + "p95": 180.92800676822662, + "p99": 186.5600049495697 }, "isolatedSum": { - "p50": 130.68800047039986, - "p90": 171.03999853134155, - "p95": 191.3279965519905, - "p99": 248.60799312591553 + "p50": 152.73600071668625, + "p90": 158.84799510240555, + "p95": 161.3759994506836, + "p99": 168.12799870967865 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53308,35 +55413,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 63.74400109052658, - "p90": 91.26400202512741, - "p95": 99.87200051546097, - "p99": 171.9679981470108 + "p50": 58.49599838256836, + "p90": 82.78399705886841, + "p95": 84.3840017914772, + "p99": 90.01599997282028 }, "combine": { - "p50": 70.81600278615952, - "p90": 194.75199282169342, - "p95": 206.94400370121002, - "p99": 256.9279968738556 + "p50": 63.07200342416763, + "p90": 74.0479975938797, + "p95": 74.8480036854744, + "p99": 77.44000107049942 }, "roundtrip": { - "p50": 110.04800349473953, - "p90": 140.1599943637848, - "p95": 147.13600277900696, - "p99": 161.50400042533875 + "p50": 141.12000167369843, + "p90": 176.54399573802948, + "p95": 178.81600558757782, + "p99": 181.92000687122345 }, "isolatedSum": { - "p50": 134.5600038766861, - "p90": 286.01599484682083, - "p95": 306.816004216671, - "p99": 428.8959950208664 + "p50": 121.56800180673599, + "p90": 156.8319946527481, + "p95": 159.2320054769516, + "p99": 167.4560010433197 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 1, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53345,35 +55450,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 62.94400244951248, - "p90": 80.51200211048126, - "p95": 89.02399986982346, - "p99": 111.39199882745743 + "p50": 59.13599953055382, + "p90": 82.68799632787704, + "p95": 85.37600189447403, + "p99": 91.61599725484848 }, "combine": { - "p50": 68.38399916887283, - "p90": 79.8719972372055, - "p95": 88.54400366544724, - "p99": 100.54399818181992 + "p50": 63.64800035953522, + "p90": 74.14399832487106, + "p95": 75.19999891519547, + "p99": 79.32800054550171 }, "roundtrip": { - "p50": 111.16799712181091, - "p90": 139.80799913406372, - "p95": 148.41599762439728, - "p99": 167.07199811935425 + "p50": 140.83200693130493, + "p90": 178.49600315093994, + "p95": 180.92800676822662, + "p99": 187.45599687099457 }, "isolatedSum": { - "p50": 131.32800161838531, - "p90": 160.38399934768677, - "p95": 177.5680035352707, - "p99": 211.93599700927734 + "p50": 122.78399989008904, + "p90": 156.8319946527481, + "p95": 160.5760008096695, + "p99": 170.9439978003502 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 5, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53382,35 +55487,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 61.88800185918808, - "p90": 83.16799998283386, - "p95": 92.51199662685394, - "p99": 104.06400263309479 + "p50": 61.792001128196716, + "p90": 83.20000022649765, + "p95": 86.07999980449677, + "p99": 96.00000083446503 }, "combine": { - "p50": 68.67200136184692, - "p90": 82.84799754619598, - "p95": 88.639996945858, - "p99": 105.05600273609161 + "p50": 65.43999910354614, + "p90": 75.93599706888199, + "p95": 78.14399898052216, + "p99": 83.74399691820145 }, "roundtrip": { - "p50": 110.84800213575363, - "p90": 140.79999923706055, - "p95": 148.0640023946762, - "p99": 159.2639982700348 + "p50": 144.44799721240997, + "p90": 181.15200102329254, + "p95": 184.25600230693817, + "p99": 199.8080015182495 }, "isolatedSum": { - "p50": 130.560003221035, - "p90": 166.01599752902985, - "p95": 181.15199357271194, - "p99": 209.1200053691864 + "p50": 127.23200023174286, + "p90": 159.13599729537964, + "p95": 164.22399878501892, + "p99": 179.74399775266647 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 5, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53419,35 +55524,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 59.84000116586685, - "p90": 82.17599987983704, - "p95": 92.32000261545181, - "p99": 105.92000186443329 + "p50": 80.38400113582611, + "p90": 83.52000266313553, + "p95": 85.08799970149994, + "p99": 92.38400310277939 }, "combine": { - "p50": 69.72800195217133, - "p90": 84.19200032949448, - "p95": 90.68799763917923, - "p99": 106.91200196743011 + "p50": 75.80800354480743, + "p90": 77.85599678754807, + "p95": 79.03999835252762, + "p99": 80.83199709653854 }, "roundtrip": { - "p50": 112.12799698114395, - "p90": 134.62400436401367, - "p95": 145.9839940071106, - "p99": 164.09599781036377 + "p50": 150.59199929237366, + "p90": 182.49599635601044, + "p95": 184.60799753665924, + "p99": 194.815993309021 }, "isolatedSum": { - "p50": 129.56800311803818, - "p90": 166.3680002093315, - "p95": 183.00800025463104, - "p99": 212.8320038318634 + "p50": 156.19200468063354, + "p90": 161.3759994506836, + "p95": 164.12799805402756, + "p99": 173.21600019931793 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53456,35 +55561,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.20799815654755, - "p90": 94.08000111579895, - "p95": 101.15200281143188, - "p99": 118.17599833011627 + "p50": 61.792001128196716, + "p90": 81.727996468544, + "p95": 84.28800106048584, + "p99": 89.88799899816513 }, "combine": { - "p50": 76.64000242948532, - "p90": 91.2960022687912, - "p95": 97.43999689817429, - "p99": 105.27999699115753 + "p50": 69.34399902820587, + "p90": 79.96799796819687, + "p95": 81.24800026416779, + "p99": 83.99999886751175 }, "roundtrip": { - "p50": 123.77600371837616, - "p90": 148.3519971370697, - "p95": 155.29599785804749, - "p99": 175.135999917984 + "p50": 146.11199498176575, + "p90": 184.32000279426575, + "p95": 186.52799725532532, + "p99": 192.44800508022308 }, "isolatedSum": { - "p50": 146.84800058603287, - "p90": 185.37600338459015, - "p95": 198.59199970960617, - "p99": 223.4559953212738 + "p50": 131.1360001564026, + "p90": 161.69599443674088, + "p95": 165.53600132465363, + "p99": 173.88799786567688 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 5, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53493,35 +55598,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 86.43200248479843, - "p90": 99.48799759149551, - "p95": 106.84800148010254, - "p99": 127.42400169372559 + "p50": 72.80000299215317, + "p90": 86.43200248479843, + "p95": 92.54399687051773, + "p99": 99.7759997844696 }, "combine": { - "p50": 85.82399785518646, - "p90": 96.63999825716019, - "p95": 104.76800054311752, - "p99": 113.21599781513214 + "p50": 85.08799970149994, + "p90": 95.0080007314682, + "p95": 96.41599655151367, + "p99": 101.21600329875946 }, "roundtrip": { - "p50": 147.8399932384491, - "p90": 164.5440012216568, - "p95": 169.95200514793396, - "p99": 197.53600656986237 + "p50": 182.8799992799759, + "p90": 202.94399559497833, + "p95": 208.3200067281723, + "p99": 218.176007270813 }, "isolatedSum": { - "p50": 172.2560003399849, - "p90": 196.1279958486557, - "p95": 211.61600202322006, - "p99": 240.63999950885773 + "p50": 157.8880026936531, + "p90": 181.44000321626663, + "p95": 188.9599934220314, + "p99": 200.99200308322906 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53530,35 +55635,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 103.7760004401207, - "p90": 118.9119964838028, - "p95": 127.16799974441528, - "p99": 134.97599959373474 + "p50": 77.79199630022049, + "p90": 92.12800115346909, + "p95": 93.72799843549728, + "p99": 98.24000298976898 }, "combine": { - "p50": 105.15200346708298, - "p90": 119.00799721479416, - "p95": 124.35200065374374, - "p99": 139.55199718475342 + "p50": 99.55199807882309, + "p90": 109.72800105810165, + "p95": 110.91200262308121, + "p99": 114.46399986743927 }, "roundtrip": { - "p50": 185.2799952030182, - "p90": 201.7280012369156, - "p95": 207.39200711250305, - "p99": 224.95999932289124 + "p50": 205.1520049571991, + "p90": 219.200000166893, + "p95": 220.89600563049316, + "p99": 223.4880030155182 }, "isolatedSum": { - "p50": 208.92800390720367, - "p90": 237.91999369859695, - "p95": 251.52000039815903, - "p99": 274.52799677848816 + "p50": 177.34399437904358, + "p90": 201.85600221157074, + "p95": 204.6400010585785, + "p99": 212.70400285720825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53566,28 +55671,29 @@ ] }, { - "id": "cx-274a06b0", - "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_c9aeae24", - "comparisonKey": "0abd2163f516521c", + "id": "cx-d8e58489", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ec72792b", + "comparisonKey": "2bfd4913feb2a935", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:44.931546+00:00", + "generatedAt": "2026-06-26T23:47:54.320638+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "decode", - "mode": "ll", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 LL", + "label": "H100 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -53597,18 +55703,18 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -53627,9 +55733,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271645585", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271645585", - "createdAt": "2026-06-26T23:49:15Z", + "id": "28271573150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271573150", + "createdAt": "2026-06-26T23:47:54.320638+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -53637,35 +55743,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 37.567999213933945, - "p90": 48.0320006608963, - "p95": 52.41600051522255, - "p99": 62.33600154519081 + "p50": 78.3040001988411, + "p90": 82.07999914884567, + "p95": 84.44800227880478, + "p99": 88.03199976682663 }, "combine": { - "p50": 33.663999289274216, - "p90": 44.38399896025658, - "p95": 46.879999339580536, - "p99": 61.85600161552429 + "p50": 71.1359977722168, + "p90": 72.86400347948074, + "p95": 73.82400333881378, + "p99": 77.88799703121185 }, "roundtrip": { - "p50": 51.231998950242996, - "p90": 70.14399766921997, - "p95": 77.31200009584427, - "p99": 100.0640019774437 + "p50": 136.63999736309052, + "p90": 174.75199699401855, + "p95": 177.15199291706085, + "p99": 181.08800053596497 }, "isolatedSum": { - "p50": 71.23199850320816, - "p90": 92.41599962115288, - "p95": 99.29599985480309, - "p99": 124.1920031607151 + "p50": 149.4399979710579, + "p90": 154.94400262832642, + "p95": 158.27200561761856, + "p99": 165.91999679803848 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 2, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53674,34 +55780,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 37.88800165057182, - "p90": 49.15200173854828, - "p95": 55.87200075387955, - "p99": 76.89599692821503 + "p50": 56.832000613212585, + "p90": 79.74400371313095, + "p95": 81.11999928951263, + "p99": 85.69599688053131 }, "combine": { - "p50": 32.896000891923904, - "p90": 43.83999854326248, - "p95": 47.07200080156326, - "p99": 67.74400174617767 + "p50": 62.3680017888546, + "p90": 71.58400118350983, + "p95": 72.25599884986877, + "p99": 75.9039968252182 }, "roundtrip": { - "p50": 51.00800096988678, - "p90": 67.9360032081604, - "p95": 74.20799881219864, - "p99": 96.83199971914291 + "p50": 138.0160003900528, + "p90": 172.95999825000763, + "p95": 174.30399358272552, + "p99": 179.61600422859192 }, "isolatedSum": { - "p50": 70.78400254249573, - "p90": 92.99200028181076, - "p95": 102.94400155544281, - "p99": 144.6399986743927 + "p50": 119.20000240206718, + "p90": 151.32800489664078, + "p95": 153.3759981393814, + "p99": 161.5999937057495 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, - "recvTokensMax": 21, + "recvTokensMax": 13, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -53711,35 +55817,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 37.53599897027016, - "p90": 44.95999962091446, - "p95": 51.61599814891815, - "p99": 66.30399823188782 + "p50": 56.92800134420395, + "p90": 82.0159986615181, + "p95": 85.02399921417236, + "p99": 87.77599781751633 }, "combine": { - "p50": 29.791999608278275, - "p90": 39.16800022125244, - "p95": 44.064000248909, - "p99": 53.63199859857559 - }, - "roundtrip": { - "p50": 51.13599821925163, - "p90": 63.519999384880066, - "p95": 71.77600264549255, - "p99": 81.34400099515915 + "p50": 63.07200342416763, + "p90": 74.94399696588516, + "p95": 76.28799974918365, + "p99": 79.99999821186066 + }, + "roundtrip": { + "p50": 138.7840062379837, + "p90": 179.51999604701996, + "p95": 182.01600015163422, + "p99": 187.42400407791138 }, "isolatedSum": { - "p50": 67.32799857854843, - "p90": 84.1279998421669, - "p95": 95.67999839782715, - "p99": 119.93599683046341 + "p50": 120.00000476837158, + "p90": 156.95999562740326, + "p95": 161.31199896335602, + "p99": 167.77599602937698 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 1, + "recvTokensMax": 29, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53748,34 +55854,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 38.27200084924698, - "p90": 51.32799968123436, - "p95": 57.08799883723259, - "p99": 66.97600334882736 + "p50": 56.832000613212585, + "p90": 80.99199831485748, + "p95": 82.94399827718735, + "p99": 87.99999952316284 }, "combine": { - "p50": 34.623999148607254, - "p90": 44.03200000524521, - "p95": 46.62400111556053, - "p99": 54.55999821424484 + "p50": 63.71200084686279, + "p90": 74.43200051784515, + "p95": 75.19999891519547, + "p99": 79.52000200748444 }, "roundtrip": { - "p50": 55.39200082421303, - "p90": 67.58400052785873, - "p95": 75.42400062084198, - "p99": 95.0080007314682 + "p50": 139.93600010871887, + "p90": 178.5919964313507, + "p95": 181.98400735855103, + "p99": 185.47199666500092 }, "isolatedSum": { - "p50": 72.89599999785423, - "p90": 95.35999968647957, - "p95": 103.71199995279312, - "p99": 121.5360015630722 + "p50": 120.54400146007538, + "p90": 155.42399883270264, + "p95": 158.1439971923828, + "p99": 167.52000153064728 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, - "recvTokensMax": 74, + "recvTokensMax": 47, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -53785,35 +55891,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 38.816001266241074, - "p90": 54.17599901556969, - "p95": 57.72799998521805, - "p99": 75.00799745321274 + "p50": 57.472001761198044, + "p90": 81.79199695587158, + "p95": 84.28800106048584, + "p99": 87.87199854850769 }, "combine": { - "p50": 36.288000643253326, - "p90": 46.01600021123886, - "p95": 48.00000041723251, - "p99": 69.47200000286102 + "p50": 65.5359998345375, + "p90": 77.37600058317184, + "p95": 79.3600007891655, + "p99": 82.46400207281113 }, "roundtrip": { - "p50": 59.967998415231705, - "p90": 73.05599749088287, - "p95": 77.2159993648529, - "p99": 92.12800115346909 + "p50": 141.184002161026, + "p90": 181.7920058965683, + "p95": 184.9599927663803, + "p99": 191.93600118160248 }, "isolatedSum": { - "p50": 75.1040019094944, - "p90": 100.19199922680855, - "p95": 105.72800040245056, - "p99": 144.47999745607376 + "p50": 123.00800159573555, + "p90": 159.16799753904343, + "p95": 163.64800184965134, + "p99": 170.33600062131882 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 2, + "recvTokensMax": 92, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53822,35 +55928,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 45.40799930691719, - "p90": 55.32800033688545, - "p95": 60.15999987721443, - "p99": 70.88000327348709 + "p50": 60.32000109553337, + "p90": 82.0159986615181, + "p95": 84.63999629020691, + "p99": 91.0400003194809 }, "combine": { - "p50": 43.87199878692627, - "p90": 53.53600159287453, - "p95": 55.32800033688545, - "p99": 67.9360032081604 + "p50": 70.97599655389786, + "p90": 82.14399963617325, + "p95": 83.20000022649765, + "p99": 88.60799670219421 }, "roundtrip": { - "p50": 72.35199958086014, - "p90": 82.8159973025322, - "p95": 86.01599931716919, - "p99": 98.88000041246414 + "p50": 147.0080018043518, + "p90": 185.7919991016388, + "p95": 188.06399405002594, + "p99": 192.25600361824036 }, "isolatedSum": { - "p50": 89.27999809384346, - "p90": 108.86400192975998, - "p95": 115.48800021409988, - "p99": 138.8160064816475 + "p50": 131.29599764943123, + "p90": 164.15999829769135, + "p95": 167.83999651670456, + "p99": 179.6479970216751 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 4, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53859,34 +55965,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 55.296000093221664, - "p90": 66.6240006685257, - "p95": 70.36799937486649, - "p99": 88.16000074148178 + "p50": 69.60000097751617, + "p90": 85.69599688053131, + "p95": 87.99999952316284, + "p99": 100.8640006184578 }, "combine": { - "p50": 59.07199904322624, - "p90": 67.71200150251389, - "p95": 70.43199986219406, - "p99": 79.3600007891655 + "p50": 80.6720033288002, + "p90": 92.70399808883667, + "p95": 93.66399794816971, + "p99": 97.4079966545105 }, "roundtrip": { - "p50": 97.34400361776352, - "p90": 109.3439981341362, - "p95": 115.32799899578094, - "p99": 128.12800705432892 + "p50": 160.70400178432465, + "p90": 200.83199441432953, + "p95": 203.19999754428864, + "p99": 211.5200012922287 }, "isolatedSum": { - "p50": 114.3679991364479, - "p90": 134.33600217103958, - "p95": 140.79999923706055, - "p99": 167.52000153064728 + "p50": 150.27200430631638, + "p90": 178.39999496936798, + "p95": 181.66399747133255, + "p99": 198.2719972729683 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, - "recvTokensMax": 564, + "recvTokensMax": 367, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -53896,35 +56002,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 81.05599880218506, - "p90": 91.26400202512741, - "p95": 95.77599912881851, - "p99": 104.38399761915207 + "p50": 77.05599814653397, + "p90": 91.96799993515015, + "p95": 94.43199634552002, + "p99": 99.32799637317657 }, "combine": { - "p50": 86.40000224113464, - "p90": 98.36799651384354, - "p95": 102.84800082445145, - "p99": 111.96800321340561 + "p50": 97.53599762916565, + "p90": 109.37599837779999, + "p95": 110.68800091743469, + "p99": 115.7120019197464 }, "roundtrip": { - "p50": 148.44800531864166, - "p90": 162.88000345230103, - "p95": 168.16000640392303, - "p99": 178.24000120162964 + "p50": 203.80799472332, + "p90": 219.9999988079071, + "p95": 222.59199619293213, + "p99": 236.4799976348877 }, "isolatedSum": { - "p50": 167.4560010433197, - "p90": 189.63199853897095, - "p95": 198.62399995326996, - "p99": 216.35200083255768 + "p50": 174.59199577569962, + "p90": 201.34399831295013, + "p95": 205.1199972629547, + "p99": 215.03999829292297 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 4, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -53932,28 +56038,29 @@ ] }, { - "id": "cx-81e223f4", - "identity": "h200|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_7cfa04c4", - "comparisonKey": "72cd529af4968fe8", + "id": "cx-f1a3625a", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_7720baf2", + "comparisonKey": "800e526f613bc59d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:48.529187+00:00", + "generatedAt": "2026-06-26T23:49:09.827299+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 LL", + "label": "H100 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -53963,7 +56070,7 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -53993,9 +56100,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271650161", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271650161", - "createdAt": "2026-06-26T23:49:22Z", + "id": "28271594334", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", + "createdAt": "2026-06-26T23:49:09.827299+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -54003,35 +56110,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 38.55999931693077, - "p90": 52.25599929690361, - "p95": 57.69599974155426, - "p99": 68.70400160551071 + "p50": 30.81599995493889, + "p90": 33.824000507593155, + "p95": 36.67199984192848, + "p99": 41.760001331567764 }, "combine": { - "p50": 33.440001308918, - "p90": 46.23999819159508, - "p95": 50.36799982190132, - "p99": 62.912002205848694 + "p50": 33.535998314619064, + "p90": 36.06399893760681, + "p95": 38.656000047922134, + "p99": 94.62399780750275 }, "roundtrip": { - "p50": 52.70399898290634, - "p90": 70.43199986219406, - "p95": 77.85599678754807, - "p99": 90.27200192213058 + "p50": 2063.647985458374, + "p90": 2066.3039684295654, + "p95": 2067.5199031829834, + "p99": 2072.1280574798584 }, "isolatedSum": { - "p50": 72.00000062584877, - "p90": 98.49599748849869, - "p95": 108.06399956345558, - "p99": 131.6160038113594 + "p50": 64.35199826955795, + "p90": 69.88799944519997, + "p95": 75.32799988985062, + "p99": 136.3839991390705 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 14, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54040,35 +56147,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 37.76000067591667, - "p90": 48.25599864125252, - "p95": 55.93600124120712, - "p99": 79.68000322580338 + "p50": 30.688000842928886, + "p90": 33.440001308918, + "p95": 35.32800078392029, + "p99": 41.85599833726883 }, "combine": { - "p50": 32.80000016093254, - "p90": 41.120000183582306, - "p95": 44.863998889923096, - "p99": 49.8879998922348 + "p50": 35.10399907827377, + "p90": 39.135999977588654, + "p95": 60.99199876189232, + "p99": 184.2239946126938 }, "roundtrip": { - "p50": 52.83199995756149, - "p90": 65.88800251483917, - "p95": 71.80800288915634, - "p99": 80.60800284147263 + "p50": 2065.023899078369, + "p90": 2067.647933959961, + "p95": 2069.279909133911, + "p99": 2082.5600624084473 }, "isolatedSum": { - "p50": 70.56000083684921, - "p90": 89.37599882483482, - "p95": 100.80000013113022, - "p99": 129.56800311803818 + "p50": 65.79199992120266, + "p90": 72.57600128650665, + "p95": 96.3199995458126, + "p99": 226.07999294996262 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 21, - "stragglerRank": 4, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54077,35 +56184,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 38.495998829603195, - "p90": 52.44800075888634, - "p95": 56.543998420238495, - "p99": 76.4480009675026 + "p50": 30.527999624609947, + "p90": 32.70399942994118, + "p95": 34.33600068092346, + "p99": 38.72000053524971 }, "combine": { - "p50": 33.055998384952545, - "p90": 44.16000097990036, - "p95": 45.951999723911285, - "p99": 53.568001836538315 + "p50": 34.71999987959862, + "p90": 36.896001547575, + "p95": 37.82400116324425, + "p99": 40.672000497579575 }, "roundtrip": { - "p50": 52.70399898290634, - "p90": 64.2239972949028, - "p95": 71.96799665689468, - "p99": 81.53600245714188 + "p50": 2065.7920837402344, + "p90": 2069.4079399108887, + "p95": 2074.079990386963, + "p99": 2120.703935623169 }, "isolatedSum": { - "p50": 71.55199721455574, - "p90": 96.6080017387867, - "p95": 102.49599814414978, - "p99": 130.0160028040409 + "p50": 65.24799950420856, + "p90": 69.60000097751617, + "p95": 72.16000184416771, + "p99": 79.39200103282928 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 39, - "stragglerRank": 6, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54114,35 +56221,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 39.07199949026108, - "p90": 52.960000932216644, - "p95": 56.992001831531525, - "p99": 65.43999910354614 + "p50": 31.007999554276466, + "p90": 33.24799984693527, + "p95": 35.45600175857544, + "p99": 42.11200028657913 }, "combine": { - "p50": 34.04799848794937, - "p90": 44.19200122356415, - "p95": 46.1760014295578, - "p99": 57.472001761198044 + "p50": 35.74400022625923, + "p90": 38.62399980425835, + "p95": 39.903998374938965, + "p99": 44.12800073623657 }, "roundtrip": { - "p50": 54.11199852824211, - "p90": 68.60800087451935, - "p95": 74.78400319814682, - "p99": 85.28000116348267 + "p50": 2066.240072250366, + "p90": 2069.6959495544434, + "p95": 2070.784091949463, + "p99": 2073.9200115203857 }, "isolatedSum": { - "p50": 73.11999797821045, - "p90": 97.15200215578079, - "p95": 103.16800326108932, - "p99": 122.91200086474419 + "p50": 66.7519997805357, + "p90": 71.87199965119362, + "p95": 75.3600001335144, + "p99": 86.2400010228157 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 74, - "stragglerRank": 6, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54151,35 +56258,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 40.12800008058548, - "p90": 55.00800162553787, - "p95": 59.29600074887276, - "p99": 66.81600213050842 + "p50": 32.32000023126602, + "p90": 39.103999733924866, + "p95": 52.799999713897705, + "p99": 55.36000058054924 }, "combine": { - "p50": 38.047999143600464, - "p90": 49.82399940490723, - "p95": 52.799999713897705, - "p99": 63.19999694824219 + "p50": 38.656000047922134, + "p90": 41.79200157523155, + "p95": 42.97599941492081, + "p99": 47.520000487565994 }, "roundtrip": { - "p50": 61.5679994225502, - "p90": 75.48800110816956, - "p95": 82.36800134181976, - "p99": 96.89600020647049 + "p50": 2071.9680786132812, + "p90": 2074.592113494873, + "p95": 2075.615882873535, + "p99": 2079.7760486602783 }, "isolatedSum": { - "p50": 78.17599922418594, - "p90": 104.8320010304451, - "p95": 112.09600046277046, - "p99": 130.0159990787506 + "p50": 70.97600027918816, + "p90": 80.89600130915642, + "p95": 95.77599912881851, + "p99": 102.88000106811523 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 145, - "stragglerRank": 0, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54188,35 +56295,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 46.23999819159508, - "p90": 56.57599866390228, - "p95": 62.30400130152702, - "p99": 70.8480030298233 + "p50": 36.3520011305809, + "p90": 38.11199963092804, + "p95": 40.22400081157684, + "p99": 45.951999723911285 }, "combine": { - "p50": 43.96799951791763, - "p90": 53.75999957323074, - "p95": 58.33600088953972, - "p99": 61.216000467538834 + "p50": 47.968000173568726, + "p90": 50.87999999523163, + "p95": 51.83999985456467, + "p99": 58.04799869656563 }, "roundtrip": { - "p50": 71.19999825954437, - "p90": 80.86399734020233, - "p95": 85.28000116348267, - "p99": 93.21600198745728 + "p50": 2082.7200412750244, + "p90": 2085.2479934692383, + "p95": 2086.2081050872803, + "p99": 2089.1199111938477 }, "isolatedSum": { - "p50": 90.20799770951271, - "p90": 110.33599823713303, - "p95": 120.64000219106674, - "p99": 132.06400349736214 + "p50": 84.32000130414963, + "p90": 88.99199962615967, + "p95": 92.06400066614151, + "p99": 103.99999842047691 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 287, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54225,35 +56332,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 56.60799890756607, - "p90": 77.53600180149078, - "p95": 85.31200140714645, - "p99": 192.03199446201324 + "p50": 41.600000113248825, + "p90": 51.00800096988678, + "p95": 52.12799832224846, + "p99": 55.1999993622303 }, "combine": { - "p50": 58.240000158548355, - "p90": 67.29599833488464, - "p95": 69.56800073385239, - "p99": 77.82399654388428 + "p50": 60.67200005054474, + "p90": 68.67200136184692, + "p95": 71.68000191450119, + "p99": 97.08800166845322 }, "roundtrip": { - "p50": 96.28800302743912, - "p90": 107.39199817180634, - "p95": 111.58400028944016, - "p99": 126.52799487113953 + "p50": 2101.8240451812744, + "p90": 2108.736038208008, + "p95": 2111.936092376709, + "p99": 2120.1279163360596 }, "isolatedSum": { - "p50": 114.84799906611443, - "p90": 144.83200013637543, - "p95": 154.88000214099884, - "p99": 269.8559910058975 + "p50": 102.27200016379356, + "p90": 119.6800023317337, + "p95": 123.80800023674965, + "p99": 152.28800103068352 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 564, - "stragglerRank": 2, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54262,35 +56369,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 81.7599967122078, - "p90": 92.67199784517288, - "p95": 95.13600170612335, - "p99": 128.38399410247803 + "p50": 54.016001522541046, + "p90": 56.223999708890915, + "p95": 57.312000542879105, + "p99": 60.575999319553375 }, "combine": { - "p50": 86.27200126647949, - "p90": 94.91200000047684, - "p95": 97.120001912117, - "p99": 105.27999699115753 + "p50": 88.54400366544724, + "p90": 91.93599969148636, + "p95": 92.70399808883667, + "p99": 114.81600254774094 }, "roundtrip": { - "p50": 147.2959965467453, - "p90": 157.56799280643463, - "p95": 162.36799955368042, - "p99": 174.9120056629181 + "p50": 2143.0718898773193, + "p90": 2146.7199325561523, + "p95": 2147.455930709839, + "p99": 2153.791904449463 }, "isolatedSum": { - "p50": 168.0319979786873, - "p90": 187.58399784564972, - "p95": 192.25600361824036, - "p99": 233.66399109363556 + "p50": 142.56000518798828, + "p90": 148.15999940037727, + "p95": 150.01599863171577, + "p99": 175.3920018672943 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 1104, - "stragglerRank": 5, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54298,28 +56405,29 @@ ] }, { - "id": "cx-43b4144e", - "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_0a1a73b3", - "comparisonKey": "14196b9d68f90910", + "id": "cx-73d1725a", + "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_3a77ee8e", + "comparisonKey": "93509525aa3f27c6", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:32.638567+00:00", + "generatedAt": "2026-06-26T23:49:16.484836+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 LL (norm)", + "label": "H100 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -54329,12 +56437,12 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, + "requestedFraction": null, "achievedFraction": null, "configuredUnits": null, "deviceUnits": 132, @@ -54359,45 +56467,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254426529", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254426529", - "createdAt": "2026-06-26T17:29:02Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271598000", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", + "createdAt": "2026-06-26T23:49:16.484836+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 36.86400130391121, - "p90": 47.13600128889084, - "p95": 51.52000114321709, - "p99": 63.32799792289734 + "p50": 31.10400028526783, + "p90": 33.376000821590424, + "p95": 34.88000109791756, + "p99": 39.264000952243805 }, "combine": { - "p50": 33.440001308918, - "p90": 42.527999728918076, - "p95": 46.81599885225296, - "p99": 52.22399905323982 + "p50": 32.575998455286026, + "p90": 35.32800078392029, + "p95": 36.928001791238785, + "p99": 40.41599854826927 }, "roundtrip": { - "p50": 50.52800104022026, - "p90": 65.15199691057205, - "p95": 71.03999704122543, - "p99": 78.68800312280655 + "p50": 2062.4639987945557, + "p90": 2065.1841163635254, + "p95": 2067.9678916931152, + "p99": 2091.871976852417 }, "isolatedSum": { - "p50": 70.30400261282921, - "p90": 89.66400101780891, - "p95": 98.33599999547005, - "p99": 115.55199697613716 + "p50": 63.679998740553856, + "p90": 68.70400160551071, + "p95": 71.80800288915634, + "p99": 79.67999950051308 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 14, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54406,35 +56514,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 37.408001720905304, - "p90": 48.608001321554184, - "p95": 54.687999188899994, - "p99": 65.2799978852272 + "p50": 30.719999223947525, + "p90": 32.99200162291527, + "p95": 35.551998764276505, + "p99": 40.64000025391579 }, "combine": { "p50": 32.735999673604965, - "p90": 42.59200021624565, - "p95": 45.05600035190582, - "p99": 51.35999992489815 + "p90": 35.00799834728241, + "p95": 36.3520011305809, + "p99": 43.807998299598694 }, "roundtrip": { - "p50": 51.4880008995533, - "p90": 66.72000139951706, - "p95": 72.54400104284286, - "p99": 85.08799970149994 + "p50": 2063.136100769043, + "p90": 2065.376043319702, + "p95": 2067.296028137207, + "p99": 2071.039915084839 }, "isolatedSum": { - "p50": 70.14400139451027, - "p90": 91.20000153779984, - "p95": 99.74399954080582, - "p99": 116.63999781012535 + "p50": 63.45599889755249, + "p90": 67.99999997019768, + "p95": 71.9039998948574, + "p99": 84.44799855351448 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 21, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54443,35 +56551,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 37.79200091958046, - "p90": 49.27999898791313, - "p95": 54.91200089454651, - "p99": 61.08799949288368 + "p50": 32.35200047492981, + "p90": 46.65600135922432, + "p95": 47.42399975657463, + "p99": 53.279999643564224 }, "combine": { - "p50": 31.231999397277832, - "p90": 43.487999588251114, - "p95": 47.26399853825569, - "p99": 65.31199812889099 + "p50": 33.824000507593155, + "p90": 36.768000572919846, + "p95": 39.07199949026108, + "p99": 50.783999264240265 }, "roundtrip": { - "p50": 51.58400163054466, - "p90": 68.89600306749344, - "p95": 73.95199686288834, - "p99": 91.61599725484848 + "p50": 2064.095973968506, + "p90": 2066.9119358062744, + "p95": 2069.567918777466, + "p99": 2080.512046813965 }, "isolatedSum": { - "p50": 69.02400031685829, - "p90": 92.76799857616425, - "p95": 102.1759994328022, - "p99": 126.39999762177467 + "p50": 66.17600098252296, + "p90": 83.42400193214417, + "p95": 86.49599924683571, + "p99": 104.06399890780449 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 39, - "stragglerRank": 0, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54480,35 +56588,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 37.53599897027016, - "p90": 48.128001391887665, - "p95": 54.75199967622757, - "p99": 62.111999839544296 + "p50": 31.90400078892708, + "p90": 34.04799848794937, + "p95": 35.74400022625923, + "p99": 39.77600112557411 }, "combine": { - "p50": 34.46400165557861, - "p90": 44.544000178575516, - "p95": 47.231998294591904, - "p99": 57.37600103020668 + "p50": 34.17599946260452, + "p90": 36.22400015592575, + "p95": 37.53599897027016, + "p99": 42.208001017570496 }, "roundtrip": { - "p50": 54.687999188899994, - "p90": 67.4239993095398, - "p95": 73.44000041484833, - "p99": 91.96799993515015 + "p50": 2065.279960632324, + "p90": 2068.416118621826, + "p95": 2070.6560611724854, + "p99": 2080.8000564575195 }, "isolatedSum": { - "p50": 72.00000062584877, - "p90": 92.67200157046318, - "p95": 101.98399797081947, - "p99": 119.48800086975098 + "p50": 66.0800002515316, + "p90": 70.27199864387512, + "p95": 73.27999919652939, + "p99": 81.98400214314461 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 74, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54517,35 +56625,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 39.29600119590759, - "p90": 51.35999992489815, - "p95": 55.64799904823303, - "p99": 64.96000289916992 + "p50": 32.86400064826012, + "p90": 34.432001411914825, + "p95": 36.25600039958954, + "p99": 40.73600098490715 }, "combine": { - "p50": 36.67199984192848, - "p90": 46.62400111556053, - "p95": 50.56000128388405, - "p99": 60.38400158286095 + "p50": 37.88800165057182, + "p90": 44.67200115323067, + "p95": 46.30399867892265, + "p99": 69.24799829721451 }, "roundtrip": { - "p50": 60.47999858856201, - "p90": 74.5920017361641, - "p95": 79.3600007891655, - "p99": 87.87199854850769 + "p50": 2071.1679458618164, + "p90": 2079.5199871063232, + "p95": 2080.4800987243652, + "p99": 2085.439920425415 }, "isolatedSum": { - "p50": 75.96800103783607, - "p90": 97.98400104045868, - "p95": 106.20800033211708, - "p99": 125.34400448203087 + "p50": 70.75200229883194, + "p90": 79.10400256514549, + "p95": 82.55999907851219, + "p99": 109.98399928212166 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 145, - "stragglerRank": 2, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54554,35 +56662,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 45.05600035190582, - "p90": 55.00800162553787, - "p95": 57.95200169086456, - "p99": 66.01600348949432 + "p50": 35.00799834728241, + "p90": 36.928001791238785, + "p95": 39.07199949026108, + "p99": 41.98399931192398 }, "combine": { - "p50": 44.28799822926521, - "p90": 53.05600166320801, - "p95": 55.904000997543335, - "p99": 61.3120011985302 + "p50": 43.68000105023384, + "p90": 45.72800174355507, + "p95": 46.879999339580536, + "p99": 52.480001002550125 }, "roundtrip": { - "p50": 72.64000177383423, - "p90": 84.16000008583069, - "p95": 88.03199976682663, - "p99": 106.30399733781815 + "p50": 2079.263925552368, + "p90": 2081.279993057251, + "p95": 2082.5281143188477, + "p99": 2086.1759185791016 }, "isolatedSum": { - "p50": 89.34399858117104, - "p90": 108.06400328874588, - "p95": 113.8560026884079, - "p99": 127.32800468802452 + "p50": 78.68799939751625, + "p90": 82.65600353479385, + "p95": 85.95199882984161, + "p99": 94.4640003144741 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 287, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54591,35 +56699,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 55.23199960589409, - "p90": 65.63200056552887, - "p95": 71.48800045251846, - "p99": 79.55200225114822 + "p50": 42.11200028657913, + "p90": 65.15199691057205, + "p95": 74.36800003051758, + "p99": 88.99199962615967 }, "combine": { - "p50": 58.43200162053108, - "p90": 69.37599927186966, - "p95": 71.07199728488922, - "p99": 79.42400127649307 + "p50": 58.9120015501976, + "p90": 63.87200206518173, + "p95": 64.80000168085098, + "p99": 71.45600020885468 }, "roundtrip": { - "p50": 96.8639999628067, - "p90": 108.44799876213074, - "p95": 113.72800171375275, - "p99": 121.72800302505493 + "p50": 2100.9280681610107, + "p90": 2110.1760864257812, + "p95": 2111.2639904022217, + "p99": 2114.367961883545 }, "isolatedSum": { - "p50": 113.66400122642517, - "p90": 135.00799983739853, - "p95": 142.55999773740768, - "p99": 158.9760035276413 + "p50": 101.02400183677673, + "p90": 129.02399897575378, + "p95": 139.16800171136856, + "p99": 160.44799983501434 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 564, - "stragglerRank": 3, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54628,35 +56736,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 79.26400005817413, - "p90": 88.44800293445587, - "p95": 92.6399976015091, - "p99": 101.69599950313568 + "p50": 53.18399891257286, + "p90": 54.78399991989136, + "p95": 56.60799890756607, + "p99": 61.535999178886414 }, "combine": { - "p50": 86.01599931716919, - "p90": 95.0080007314682, - "p95": 97.02400118112564, - "p99": 103.32799702882767 + "p50": 85.75999736785889, + "p90": 88.03199976682663, + "p95": 89.12000060081482, + "p99": 95.29600292444229 }, "roundtrip": { - "p50": 147.32800424098969, - "p90": 157.53600001335144, - "p95": 161.47199273109436, - "p99": 169.0240055322647 + "p50": 2140.671968460083, + "p90": 2143.5201168060303, + "p95": 2145.632028579712, + "p99": 2288.991928100586 }, "isolatedSum": { - "p50": 165.27999937534332, - "p90": 183.45600366592407, - "p95": 189.66399878263474, - "p99": 205.02399653196335 + "p50": 138.94399628043175, + "p90": 142.815999686718, + "p95": 145.7279995083809, + "p99": 156.8320021033287 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 1104, - "stragglerRank": 6, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54664,32 +56772,33 @@ ] }, { - "id": "cx-b5299c0b", - "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h200_87683f6c", - "comparisonKey": "0d3b5b81799f76d5", + "id": "cx-1d30dd2c", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h100_ac25b0a1", + "comparisonKey": "405d06288635d74f", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:33.916655+00:00", + "generatedAt": "2026-06-26T17:32:59.549027+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", "backend": "deepep", "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8 LL (norm)", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -54700,13 +56809,13 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -54716,8 +56825,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -54725,45 +56834,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271736220", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271736220", - "createdAt": "2026-06-26T23:52:01Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254359089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", + "createdAt": "2026-06-26T17:32:59.549027+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 221.15199267864227, - "p90": 287.26398944854736, - "p95": 315.39198756217957, - "p99": 401.98400616645813 + "p50": 30.432000756263733, + "p90": 32.32000023126602, + "p95": 34.143999218940735, + "p99": 38.015998899936676 }, "combine": { - "p50": 47.87199944257736, - "p90": 66.27199798822403, - "p95": 73.91999661922455, - "p99": 92.51199662685394 + "p50": 32.287999987602234, + "p90": 34.78400036692619, + "p95": 35.87200120091438, + "p99": 40.383998304605484 }, "roundtrip": { - "p50": 246.75199389457703, - "p90": 302.2400140762329, - "p95": 335.61599254608154, - "p99": 400.160014629364 + "p50": 2063.9359951019287, + "p90": 2065.632104873657, + "p95": 2066.9760704040527, + "p99": 2069.6001052856445 }, "isolatedSum": { - "p50": 269.02399212121964, - "p90": 353.5359874367714, - "p95": 389.3119841814041, - "p99": 494.4960027933121 + "p50": 62.72000074386597, + "p90": 67.10400059819221, + "p95": 70.01600041985512, + "p99": 78.39999720454216 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 0, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54772,35 +56881,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 264.6079957485199, - "p90": 342.3680067062378, - "p95": 371.0399866104126, - "p99": 447.00801372528076 + "p50": 30.368000268936157, + "p90": 32.09599852561951, + "p95": 34.01599824428558, + "p99": 37.248000502586365 }, "combine": { - "p50": 54.46400120854378, - "p90": 68.03199648857117, - "p95": 74.8480036854744, - "p99": 88.83199840784073 + "p50": 32.22399950027466, + "p90": 34.46400165557861, + "p95": 35.711999982595444, + "p99": 45.88799923658371 }, "roundtrip": { - "p50": 257.2160065174103, - "p90": 336.4480137825012, - "p95": 375.10401010513306, - "p99": 443.93599033355713 + "p50": 2064.768075942993, + "p90": 2067.13604927063, + "p95": 2069.024085998535, + "p99": 2083.7440490722656 }, "isolatedSum": { - "p50": 319.0719969570637, - "p90": 410.40000319480896, - "p95": 445.887990295887, - "p99": 535.8400121331215 + "p50": 62.591999769210815, + "p90": 66.56000018119812, + "p95": 69.72799822688103, + "p99": 83.13599973917007 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 6, + "recvTokensMax": 21, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54809,35 +56918,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 210.14399826526642, - "p90": 260.0319981575012, - "p95": 276.99199318885803, - "p99": 401.856005191803 + "p50": 30.527999624609947, + "p90": 32.54399821162224, + "p95": 35.26400029659271, + "p99": 40.063999593257904 }, "combine": { - "p50": 49.02400076389313, - "p90": 61.983998864889145, - "p95": 68.57600063085556, - "p99": 82.43200182914734 + "p50": 34.2399999499321, + "p90": 37.53599897027016, + "p95": 38.24000060558319, + "p99": 40.031999349594116 }, "roundtrip": { - "p50": 252.73600220680237, - "p90": 308.51200222969055, - "p95": 325.76000690460205, - "p99": 404.2240083217621 + "p50": 2065.376043319702, + "p90": 2067.3279762268066, + "p95": 2068.3200359344482, + "p99": 2070.5599784851074 }, "isolatedSum": { - "p50": 259.16799902915955, - "p90": 322.01599702239037, - "p95": 345.5679938197136, - "p99": 484.2880070209503 + "p50": 64.76799957454205, + "p90": 70.0799971818924, + "p95": 73.5040009021759, + "p99": 80.09599894285202 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54846,35 +56955,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 229.40799593925476, - "p90": 285.91999411582947, - "p95": 302.97601222991943, - "p99": 384.799987077713 + "p50": 31.231999397277832, + "p90": 33.055998384952545, + "p95": 35.61599925160408, + "p99": 38.94399851560593 }, "combine": { - "p50": 50.6879985332489, - "p90": 65.95200300216675, - "p95": 71.48800045251846, - "p99": 85.56800335645676 + "p50": 33.76000002026558, + "p90": 35.999998450279236, + "p95": 37.76000067591667, + "p99": 53.888000547885895 }, "roundtrip": { - "p50": 262.7840042114258, - "p90": 331.9680094718933, - "p95": 359.6160113811493, - "p99": 441.0560131072998 + "p50": 2066.528081893921, + "p90": 2068.511962890625, + "p95": 2069.6959495544434, + "p99": 2078.07993888855 }, "isolatedSum": { - "p50": 280.09599447250366, - "p90": 351.8719971179962, - "p95": 374.4640126824379, - "p99": 470.36799043416977 + "p50": 64.99199941754341, + "p90": 69.05599683523178, + "p95": 73.37599992752075, + "p99": 92.83199906349182 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 6, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54883,34 +56992,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 211.67999505996704, - "p90": 262.0159983634949, - "p95": 281.5360128879547, - "p99": 434.4319999217987 + "p50": 32.51200169324875, + "p90": 34.20799970626831, + "p95": 36.86400130391121, + "p99": 40.09599983692169 }, "combine": { - "p50": 50.87999999523163, - "p90": 67.74400174617767, - "p95": 72.76800274848938, - "p99": 100.47999769449234 + "p50": 37.21600025892258, + "p90": 39.45599868893623, + "p95": 40.41599854826927, + "p99": 42.399998754262924 }, "roundtrip": { - "p50": 261.1199915409088, - "p90": 332.5119912624359, - "p95": 354.8800051212311, - "p99": 414.2720103263855 + "p50": 2071.392059326172, + "p90": 2074.687957763672, + "p95": 2078.7200927734375, + "p99": 2156.5120220184326 }, "isolatedSum": { - "p50": 262.55999505519867, - "p90": 329.76000010967255, - "p95": 354.3040156364441, - "p99": 534.911997616291 + "p50": 69.72800195217133, + "p90": 73.66399839520454, + "p95": 77.27999985218048, + "p99": 82.49599859118462 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -54920,34 +57029,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 210.68799495697021, - "p90": 258.91199707984924, - "p95": 279.87200021743774, - "p99": 326.1440098285675 + "p50": 35.10399907827377, + "p90": 36.38400137424469, + "p95": 38.43199834227562, + "p99": 42.208001017570496 }, "combine": { - "p50": 53.85600030422211, - "p90": 68.67200136184692, - "p95": 72.51200079917908, - "p99": 91.90399944782257 + "p50": 42.7200011909008, + "p90": 44.89599913358688, + "p95": 45.66400125622749, + "p99": 48.70399832725525 }, "roundtrip": { - "p50": 265.6959891319275, - "p90": 326.2079954147339, - "p95": 351.52000188827515, - "p99": 446.3360011577606 + "p50": 2080.22403717041, + "p90": 2081.9520950317383, + "p95": 2083.359956741333, + "p99": 2118.4639930725098 }, "isolatedSum": { - "p50": 264.5439952611923, - "p90": 327.58399844169617, - "p95": 352.3840010166168, - "p99": 418.0480092763901 + "p50": 77.82400026917458, + "p90": 81.28000050783157, + "p95": 84.09599959850311, + "p99": 90.91199934482574 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -54957,35 +57066,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 209.6640020608902, - "p90": 265.21599292755127, - "p95": 291.0720109939575, - "p99": 366.14400148391724 + "p50": 42.047999799251556, + "p90": 47.90399968624115, + "p95": 48.8319993019104, + "p99": 53.119998425245285 }, "combine": { - "p50": 61.43999844789505, - "p90": 73.91999661922455, - "p95": 79.42400127649307, - "p99": 92.06400066614151 + "p50": 57.40800127387047, + "p90": 62.68800050020218, + "p95": 64.51199948787689, + "p99": 67.03999638557434 }, "roundtrip": { - "p50": 262.2399926185608, - "p90": 317.7280128002167, - "p95": 350.7840037345886, - "p99": 447.9680061340332 + "p50": 2100.5120277404785, + "p90": 2108.383893966675, + "p95": 2109.503984451294, + "p99": 2111.9039058685303 }, "isolatedSum": { - "p50": 271.10400050878525, - "p90": 339.1359895467758, - "p95": 370.4960122704506, - "p99": 458.20800215005875 + "p50": 99.45600107312202, + "p90": 110.59200018644333, + "p95": 113.34399878978729, + "p99": 120.15999481081963 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -54994,34 +57103,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 217.8879976272583, - "p90": 276.99199318885803, - "p95": 317.05600023269653, - "p99": 742.6559925079346 + "p50": 53.599998354911804, + "p90": 55.39200082421303, + "p95": 56.41600117087364, + "p99": 61.08799949288368 }, "combine": { - "p50": 72.67200201749802, - "p90": 88.54400366544724, - "p95": 92.47999638319016, - "p99": 113.02399635314941 + "p50": 83.5840031504631, + "p90": 86.11200004816055, + "p95": 87.2960016131401, + "p99": 91.51999652385712 }, "roundtrip": { - "p50": 273.44000339508057, - "p90": 323.5520124435425, - "p95": 345.0239896774292, - "p99": 420.3520119190216 + "p50": 2139.967918395996, + "p90": 2142.303943634033, + "p95": 2142.911911010742, + "p99": 2144.831895828247 }, "isolatedSum": { - "p50": 290.5599996447563, - "p90": 365.53599685430527, - "p95": 409.5359966158867, - "p99": 855.679988861084 + "p50": 137.1840015053749, + "p90": 141.50400087237358, + "p95": 143.71200278401375, + "p99": 152.6079960167408 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -55030,38 +57139,39 @@ ] }, { - "id": "cx-a3751d3c", - "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h200_87683f6c", - "comparisonKey": "972ab14012f6276a", + "id": "cx-d35502c2", + "identity": "h100|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_805b6904", + "comparisonKey": "a3be04b3aa017ede", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:56.538326+00:00", + "generatedAt": "2026-06-27T15:55:34.014711+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8-directcast", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "fp8-directcast", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -55082,8 +57192,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -55091,45 +57201,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271751941", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271751941", - "createdAt": "2026-06-26T23:52:29Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28294158591", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294158591", + "createdAt": "2026-06-27T15:55:34.014711+00:00", + "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 217.3759937286377, - "p90": 269.1839933395386, - "p95": 295.1360046863556, - "p99": 345.69600224494934 + "p50": 97.50399738550186, + "p90": 102.27199643850327, + "p95": 104.70400005578995, + "p99": 110.75200140476227 }, "combine": { - "p50": 50.592001527547836, - "p90": 66.46399945020676, - "p95": 71.74400240182877, - "p99": 89.34400230646133 + "p50": 73.60000163316727, + "p90": 75.42400062084198, + "p95": 76.92799717187881, + "p99": 80.48000186681747 }, "roundtrip": { - "p50": 245.60000002384186, - "p90": 292.64000058174133, - "p95": 306.0480058193207, - "p99": 346.8160033226013 + "p50": 193.79200041294098, + "p90": 199.26400482654572, + "p95": 201.47199928760529, + "p99": 205.79199492931366 }, "isolatedSum": { - "p50": 267.96799525618553, - "p90": 335.64799278974533, - "p95": 366.88000708818436, - "p99": 435.0400045514107 + "p50": 171.10399901866913, + "p90": 177.69599705934525, + "p95": 181.63199722766876, + "p99": 191.23200327157974 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 215040, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55138,32 +57248,32 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 206.2399983406067, - "p90": 254.94399666786194, - "p95": 270.4960107803345, - "p99": 337.21598982810974 + "p50": 71.10399752855301, + "p90": 98.88000041246414, + "p95": 101.59999877214432, + "p99": 105.50399869680405 }, "combine": { - "p50": 51.263999193906784, - "p90": 65.72800129652023, - "p95": 70.52800059318542, - "p99": 75.58400183916092 + "p50": 62.55999952554703, + "p90": 71.07199728488922, + "p95": 71.74400240182877, + "p99": 74.81600344181061 }, "roundtrip": { - "p50": 245.15199661254883, - "p90": 296.31999135017395, - "p95": 316.1279857158661, - "p99": 367.3279881477356 + "p50": 154.01600301265717, + "p90": 193.12000274658203, + "p95": 195.3279972076416, + "p99": 198.91199469566345 }, "isolatedSum": { - "p50": 257.5039975345135, - "p90": 320.6719979643822, - "p95": 341.0240113735199, - "p99": 412.79999166727066 + "p50": 133.66399705410004, + "p90": 169.95199769735336, + "p95": 173.34400117397308, + "p99": 180.32000213861465 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 440320, - "combineLogicalBytes": 880640, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, "stragglerRank": 5, @@ -55175,35 +57285,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 220.38400173187256, - "p90": 289.15199637413025, - "p95": 331.5519988536835, - "p99": 1036.1599922180176 + "p50": 71.16799801588058, + "p90": 103.2319962978363, + "p95": 105.18400371074677, + "p99": 110.52799969911575 }, "combine": { - "p50": 52.191998809576035, - "p90": 65.21599739789963, - "p95": 68.96000355482101, - "p99": 77.88799703121185 + "p50": 63.968002796173096, + "p90": 75.99999755620956, + "p95": 77.98399776220322, + "p99": 81.66400343179703 }, "roundtrip": { - "p50": 248.79999458789825, - "p90": 299.71200227737427, - "p95": 314.5279884338379, - "p99": 352.09599137306213 + "p50": 154.62400019168854, + "p90": 201.02399587631226, + "p95": 203.99999618530273, + "p99": 212.0320051908493 }, "isolatedSum": { - "p50": 272.5760005414486, - "p90": 354.3679937720299, - "p95": 400.5120024085045, - "p99": 1114.0479892492294 + "p50": 135.13600081205368, + "p90": 179.23199385404587, + "p95": 183.16800147294998, + "p99": 192.19200313091278 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 870400, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 3, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55212,34 +57322,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 221.91999852657318, - "p90": 292.4480140209198, - "p95": 316.3520097732544, - "p99": 412.76800632476807 + "p50": 71.45600020885468, + "p90": 100.73599964380264, + "p95": 102.78400033712387, + "p99": 107.55199939012527 }, "combine": { - "p50": 54.84800040721893, - "p90": 71.61600142717361, - "p95": 80.64000308513641, - "p99": 102.1760031580925 + "p50": 63.840001821517944, + "p90": 74.97599720954895, + "p95": 76.19199901819229, + "p99": 83.29600095748901 }, "roundtrip": { - "p50": 249.24799799919128, - "p90": 305.5360019207001, - "p95": 325.1520097255707, - "p99": 406.9119989871979 + "p50": 155.42399883270264, + "p90": 199.68000054359436, + "p95": 201.9840031862259, + "p99": 291.6480004787445 }, "isolatedSum": { - "p50": 276.7679989337921, - "p90": 364.0640154480934, - "p95": 396.9920128583908, - "p99": 514.9440094828606 + "p50": 135.29600203037262, + "p90": 175.7119968533516, + "p95": 178.97599935531616, + "p99": 190.8480003476143 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1735680, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -55249,35 +57359,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 209.75999534130096, - "p90": 260.73598861694336, - "p95": 279.7119915485382, - "p99": 349.98399019241333 + "p50": 71.42399996519089, + "p90": 100.89600086212158, + "p95": 103.00800204277039, + "p99": 108.22399705648422 }, "combine": { - "p50": 54.88000065088272, - "p90": 69.34399902820587, - "p95": 73.91999661922455, - "p99": 101.08800232410431 + "p50": 65.5680000782013, + "p90": 77.08799839019775, + "p95": 78.5600021481514, + "p99": 82.91199803352356 }, "roundtrip": { - "p50": 254.36800718307495, - "p90": 305.2160143852234, - "p95": 330.55999875068665, - "p99": 445.72800397872925 + "p50": 157.18400478363037, + "p90": 202.04800367355347, + "p95": 204.76800203323364, + "p99": 209.4080001115799 }, "isolatedSum": { - "p50": 264.6399959921837, - "p90": 330.07998764514923, - "p95": 353.63198816776276, - "p99": 451.07199251651764 + "p50": 136.99200004339218, + "p90": 177.98399925231934, + "p95": 181.56800419092178, + "p99": 191.13599509000778 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3456000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 2, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55286,35 +57396,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 212.16000616550446, - "p90": 261.34398579597473, - "p95": 274.4959890842438, - "p99": 355.9679985046387 + "p50": 73.18399846553802, + "p90": 101.08800232410431, + "p95": 102.88000106811523, + "p99": 106.81600123643875 }, "combine": { - "p50": 59.487998485565186, - "p90": 75.9039968252182, - "p95": 79.29600030183792, - "p99": 111.13599687814713 + "p50": 71.35999947786331, + "p90": 82.84799754619598, + "p95": 83.67999643087387, + "p99": 86.94399893283844 }, "roundtrip": { - "p50": 262.4320089817047, - "p90": 318.33600997924805, - "p95": 339.4559919834137, - "p99": 384.0320110321045 + "p50": 162.04799711704254, + "p90": 207.23199844360352, + "p95": 209.34399962425232, + "p99": 212.41599321365356 }, "isolatedSum": { - "p50": 271.64800465106964, - "p90": 337.24798262119293, - "p95": 353.7919893860817, - "p99": 467.1039953827858 + "p50": 144.54399794340134, + "p90": 183.9359998703003, + "p95": 186.5599974989891, + "p99": 193.7600001692772 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6988800, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 6, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55323,34 +57433,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 213.44000101089478, - "p90": 259.99999046325684, - "p95": 280.2880108356476, - "p99": 418.08000206947327 + "p50": 81.34400099515915, + "p90": 105.43999820947647, + "p95": 109.6000000834465, + "p99": 460.54399013519287 }, "combine": { - "p50": 67.26399809122086, - "p90": 79.1039988398552, - "p95": 86.94399893283844, - "p99": 97.59999811649323 + "p50": 80.64000308513641, + "p90": 92.99200028181076, + "p95": 94.24000233411789, + "p99": 98.55999797582626 }, "roundtrip": { - "p50": 273.98398518562317, - "p90": 361.2799942493439, - "p95": 384.0959966182709, - "p99": 485.24799942970276 + "p50": 174.01599884033203, + "p90": 220.5439954996109, + "p95": 222.91199862957, + "p99": 228.2239943742752 }, "isolatedSum": { - "p50": 280.70399910211563, - "p90": 339.10398930311203, - "p95": 367.232009768486, - "p99": 515.6800001859665 + "p50": 161.98400408029556, + "p90": 198.43199849128723, + "p95": 203.8400024175644, + "p99": 559.1039881110191 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13987840, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -55360,34 +57470,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 217.53600239753723, - "p90": 271.9680070877075, - "p95": 288.8000011444092, - "p99": 367.71199107170105 + "p50": 88.8959988951683, + "p90": 109.63200032711029, + "p95": 111.32799834012985, + "p99": 116.70400202274323 }, "combine": { - "p50": 80.73599636554718, - "p90": 95.90400010347366, - "p95": 99.16800260543823, - "p99": 122.56000190973282 + "p50": 98.88000041246414, + "p90": 111.10399663448334, + "p95": 112.64000087976456, + "p99": 115.07199704647064 }, "roundtrip": { - "p50": 289.6000146865845, - "p90": 337.69598603248596, - "p95": 350.847989320755, - "p99": 431.4559996128082 + "p50": 215.61600267887115, + "p90": 238.43200504779816, + "p95": 240.76800048351288, + "p99": 245.15199661254883 }, "isolatedSum": { - "p50": 298.2719987630844, - "p90": 367.8720071911812, - "p95": 387.9680037498474, - "p99": 490.27199298143387 + "p50": 187.77599930763245, + "p90": 220.73599696159363, + "p95": 223.9679992198944, + "p99": 231.77599906921387 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -55396,30 +57506,31 @@ ] }, { - "id": "cx-1bedbd87", - "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_87683f6c", - "comparisonKey": "73242cc56a07dc73", + "id": "cx-779ba710", + "identity": "h100|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_b68ae8a2", + "comparisonKey": "6d2a2c2b7775de32", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:22.337969+00:00", + "generatedAt": "2026-06-27T15:55:42.044043+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8-pertoken", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -55427,7 +57538,7 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "fp8-pertoken", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -55449,7 +57560,7 @@ }, "routingConsistent": true, "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -55457,45 +57568,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271767522", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271767522", - "createdAt": "2026-06-26T23:52:56Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28294162181", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294162181", + "createdAt": "2026-06-27T15:55:42.044043+00:00", + "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 217.43999421596527, - "p90": 302.7519881725311, - "p95": 334.4320058822632, - "p99": 396.06401324272156 + "p50": 94.59199756383896, + "p90": 99.35999661684036, + "p95": 101.56799852848053, + "p99": 106.1440035700798 }, "combine": { - "p50": 55.1999993622303, - "p90": 72.03199714422226, - "p95": 78.23999971151352, - "p99": 108.09600353240967 + "p50": 68.4799998998642, + "p90": 71.23199850320816, + "p95": 72.22399860620499, + "p99": 76.06399804353714 }, "roundtrip": { - "p50": 251.71199440956116, - "p90": 317.27999448776245, - "p95": 335.10398864746094, - "p99": 397.92001247406006 + "p50": 184.79999899864197, + "p90": 190.72000682353973, + "p95": 192.9280012845993, + "p99": 197.9839950799942 }, "isolatedSum": { - "p50": 272.6399935781956, - "p90": 374.7839853167534, - "p95": 412.6720055937767, - "p99": 504.1600167751312 + "p50": 163.07199746370316, + "p90": 170.59199512004852, + "p95": 173.79199713468552, + "p99": 182.20800161361694 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55504,35 +57615,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 206.81600272655487, - "p90": 269.6639895439148, - "p95": 289.6000146865845, - "p99": 343.23200583457947 + "p50": 70.78400254249573, + "p90": 95.07200121879578, + "p95": 97.18400239944458, + "p99": 103.13600301742554 }, "combine": { - "p50": 55.135998874902725, - "p90": 71.77600264549255, - "p95": 77.47200131416321, - "p99": 96.09600156545639 + "p50": 62.463998794555664, + "p90": 70.97599655389786, + "p95": 71.52000069618225, + "p99": 75.96799731254578 }, "roundtrip": { - "p50": 247.93599545955658, - "p90": 305.63199520111084, - "p95": 323.168009519577, - "p99": 380.12799620628357 + "p50": 151.8400013446808, + "p90": 189.28000330924988, + "p95": 190.75199961662292, + "p99": 195.26399672031403 }, "isolatedSum": { - "p50": 261.9520016014576, - "p90": 341.43999218940735, - "p95": 367.0720160007477, - "p99": 439.32800740003586 + "p50": 133.2480013370514, + "p90": 166.04799777269363, + "p95": 168.70400309562683, + "p99": 179.1040003299713 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 3, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55541,35 +57652,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 211.04000508785248, - "p90": 283.32799673080444, - "p95": 302.65599489212036, - "p99": 377.6639997959137 + "p50": 94.11200135946274, + "p90": 101.34399682283401, + "p95": 105.12000322341919, + "p99": 111.42399907112122 }, "combine": { - "p50": 56.89600110054016, - "p90": 70.68800181150436, - "p95": 78.3040001988411, - "p99": 85.4400023818016 + "p50": 71.48800045251846, + "p90": 75.68000257015228, + "p95": 77.08799839019775, + "p99": 80.32000064849854 }, "roundtrip": { - "p50": 251.52000784873962, - "p90": 306.4959943294525, - "p95": 319.64799761772156, - "p99": 344.1599905490875 + "p50": 152.92799472808838, + "p90": 198.0160027742386, + "p95": 201.1840045452118, + "p99": 207.64799416065216 }, "isolatedSum": { - "p50": 267.93600618839264, - "p90": 354.0159985423088, - "p95": 380.95999509096146, - "p99": 463.1040021777153 + "p50": 165.6000018119812, + "p90": 177.0239993929863, + "p95": 182.20800161361694, + "p99": 191.74399971961975 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 3, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55578,35 +57689,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 204.92799580097198, - "p90": 272.09600806236267, - "p95": 291.29600524902344, - "p99": 364.3519878387451 + "p50": 94.65599805116653, + "p90": 100.92800110578537, + "p95": 103.10400277376175, + "p99": 107.16799646615982 }, "combine": { - "p50": 56.96000158786774, - "p90": 71.96799665689468, - "p95": 77.79199630022049, - "p99": 86.91199868917465 + "p50": 73.18399846553802, + "p90": 74.81600344181061, + "p95": 76.19199901819229, + "p99": 79.29600030183792 }, "roundtrip": { - "p50": 245.69599330425262, - "p90": 303.16799879074097, - "p95": 321.9519853591919, - "p99": 421.1199879646301 + "p50": 187.83999979496002, + "p90": 195.45599818229675, + "p95": 197.28000462055206, + "p99": 202.84800231456757 }, "isolatedSum": { - "p50": 261.8879973888397, - "p90": 344.06400471925735, - "p95": 369.0880015492439, - "p99": 451.26398652791977 + "p50": 167.83999651670456, + "p90": 175.74400454759598, + "p95": 179.29600179195404, + "p99": 186.46399676799774 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 3, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55615,35 +57726,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 211.61599457263947, - "p90": 274.3679881095886, - "p95": 311.2959861755371, - "p99": 390.8799886703491 + "p50": 94.40000355243683, + "p90": 100.70399940013885, + "p95": 102.55999863147736, + "p99": 108.51199924945831 }, "combine": { - "p50": 58.720000088214874, - "p90": 74.68800246715546, - "p95": 80.09599894285202, - "p99": 87.5839963555336 + "p50": 74.75200295448303, + "p90": 77.44000107049942, + "p95": 79.83999699354172, + "p99": 83.42400193214417 }, "roundtrip": { - "p50": 250.65600872039795, - "p90": 313.24800848960876, - "p95": 336.1920118331909, - "p99": 386.59200072288513 + "p50": 192.1280026435852, + "p90": 201.05600357055664, + "p95": 204.28800582885742, + "p99": 209.4080001115799 }, "isolatedSum": { - "p50": 270.33599466085434, - "p90": 349.0559905767441, - "p95": 391.39198511838913, - "p99": 478.4639850258827 + "p50": 169.15200650691986, + "p90": 178.14400047063828, + "p95": 182.39999562501907, + "p99": 191.93600118160248 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 5, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55652,35 +57763,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 204.92799580097198, - "p90": 262.62399554252625, - "p95": 280.5440127849579, - "p99": 327.4880051612854 + "p50": 73.21599870920181, + "p90": 100.51199793815613, + "p95": 102.75200009346008, + "p99": 106.36799782514572 }, "combine": { - "p50": 64.54399973154068, - "p90": 81.85599744319916, - "p95": 87.8399983048439, - "p99": 104.41599786281586 + "p50": 71.16799801588058, + "p90": 82.0159986615181, + "p95": 83.00799876451492, + "p99": 86.11200004816055 }, "roundtrip": { - "p50": 262.59198784828186, - "p90": 327.7440071105957, - "p95": 351.6159951686859, - "p99": 406.0800075531006 + "p50": 160.76800227165222, + "p90": 204.3199986219406, + "p95": 207.10399746894836, + "p99": 212.0639979839325 }, "isolatedSum": { - "p50": 269.47199553251266, - "p90": 344.4799929857254, - "p95": 368.3840110898018, - "p99": 431.90400302410126 + "p50": 144.3839967250824, + "p90": 182.52799659967422, + "p95": 185.759998857975, + "p99": 192.47999787330627 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 1, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55689,35 +57800,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 247.5840002298355, - "p90": 392.5119936466217, - "p95": 406.14399313926697, - "p99": 443.5200095176697 + "p50": 95.551997423172, + "p90": 104.25599664449692, + "p95": 106.88000172376633, + "p99": 122.65600264072418 }, "combine": { - "p50": 71.84000313282013, - "p90": 89.85599875450134, - "p95": 94.68799829483032, - "p99": 119.32799965143204 + "p50": 89.88799899816513, + "p90": 92.54399687051773, + "p95": 94.04800087213516, + "p99": 97.24800288677216 }, "roundtrip": { - "p50": 261.85598969459534, - "p90": 329.24801111221313, - "p95": 345.15199065208435, - "p99": 426.1760115623474 + "p50": 207.8080028295517, + "p90": 219.16800737380981, + "p95": 221.66399657726288, + "p99": 228.44800353050232 }, "isolatedSum": { - "p50": 319.42400336265564, - "p90": 482.36799240112305, - "p95": 500.8319914340973, - "p99": 562.8480091691017 + "p50": 185.43999642133713, + "p90": 196.79999351501465, + "p95": 200.9280025959015, + "p99": 219.90400552749634 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 7, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55726,35 +57837,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 218.33600103855133, - "p90": 282.6240062713623, - "p95": 299.1040050983429, - "p99": 340.831995010376 + "p50": 88.86399865150452, + "p90": 107.77600109577179, + "p95": 110.23999750614166, + "p99": 115.61600118875504 }, "combine": { - "p50": 87.16800063848495, - "p90": 104.67199981212616, - "p95": 109.18399691581726, - "p99": 127.32799351215363 + "p50": 98.78399968147278, + "p90": 110.49599945545197, + "p95": 111.77600175142288, + "p99": 115.13599753379822 }, "roundtrip": { - "p50": 291.83998703956604, - "p90": 343.6479866504669, - "p95": 355.48800230026245, - "p99": 407.1680009365082 + "p50": 216.8000042438507, + "p90": 236.38400435447693, + "p95": 240.57599902153015, + "p99": 246.14399671554565 }, "isolatedSum": { - "p50": 305.5040016770363, - "p90": 387.29600608348846, - "p95": 408.28800201416016, - "p99": 468.1599885225296 + "p50": 187.6479983329773, + "p90": 218.27200055122375, + "p95": 222.01599925756454, + "p99": 230.75199872255325 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 2, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -55762,38 +57873,39 @@ ] }, { - "id": "cx-1d12a6ce", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_9979edfc", - "comparisonKey": "df5e7066c74d5d30", + "id": "cx-108bdec2", + "identity": "h100|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_42947950", + "comparisonKey": "5aeeda2cd42e92cb", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:59.289355+00:00", + "generatedAt": "2026-06-27T11:13:50.229059+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -55814,8 +57926,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -55823,303 +57935,229 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271622347", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271622347", - "createdAt": "2026-06-26T23:48:34Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287504962", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287504962", + "createdAt": "2026-06-27T11:13:50.229059+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 62.81600147485733, - "p90": 80.86399734020233, - "p95": 92.6399976015091, - "p99": 111.90400272607803 + "p50": 94.17600184679031, + "p90": 98.43199700117111, + "p95": 100.25600343942642, + "p99": 104.47999835014343 }, "combine": { - "p50": 57.792000472545624, - "p90": 63.58399987220764, - "p95": 70.0799971818924, - "p99": 86.91199868917465 + "p50": 87.20000088214874, + "p90": 89.66399729251862, + "p95": 90.40000289678574, + "p99": 95.42399644851685 }, "roundtrip": { - "p50": 147.77599275112152, - "p90": 179.6479970216751, - "p95": 193.53599846363068, - "p99": 309.6640110015869 + "p50": 156.51200711727142, + "p90": 162.20800578594208, + "p95": 163.455992937088, + "p99": 169.53599452972412 }, "isolatedSum": { - "p50": 120.60800194740295, - "p90": 144.44799721240997, - "p95": 162.7199947834015, - "p99": 198.81600141525269 + "p50": 181.37600272893906, + "p90": 188.09599429368973, + "p95": 190.65600633621216, + "p99": 199.90399479866028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 67.61600077152252, - "p90": 106.27199709415436, - "p95": 119.84000355005264, - "p99": 203.93599569797516 - }, - "combine": { - "p50": 59.23200026154518, - "p90": 77.37600058317184, - "p95": 84.95999872684479, - "p99": 107.29599744081497 - }, - "roundtrip": { - "p50": 156.44800662994385, - "p90": 205.50400018692017, - "p95": 228.38400304317474, - "p99": 356.79998993873596 - }, - "isolatedSum": { - "p50": 126.8480010330677, - "p90": 183.6479976773262, - "p95": 204.80000227689743, - "p99": 311.23199313879013 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 66.27199798822403, - "p90": 84.86399799585342, - "p95": 93.56799721717834, - "p99": 116.09599739313126 + "p50": 114.56000059843063, + "p90": 127.26399302482605, + "p95": 130.78400492668152, + "p99": 137.11999356746674 }, "combine": { - "p50": 59.29600074887276, - "p90": 72.95999675989151, - "p95": 80.25600016117096, - "p99": 107.51999914646149 + "p50": 112.15999722480774, + "p90": 115.35999923944473, + "p95": 118.75200271606445, + "p99": 122.5920021533966 }, "roundtrip": { - "p50": 156.8319946527481, - "p90": 185.40799617767334, - "p95": 197.4399983882904, - "p99": 227.1679937839508 + "p50": 197.02400267124176, + "p90": 202.33599841594696, + "p95": 204.57600057125092, + "p99": 207.68000185489655 }, "isolatedSum": { - "p50": 125.56799873709679, - "p90": 157.82399475574493, - "p95": 173.8239973783493, - "p99": 223.61599653959274 + "p50": 226.71999782323837, + "p90": 242.62399226427078, + "p95": 249.53600764274597, + "p99": 259.71199572086334 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 67.77600198984146, - "p90": 101.05600208044052, - "p95": 115.61600118875504, - "p99": 160.38399934768677 + "p50": 153.6639928817749, + "p90": 169.855996966362, + "p95": 171.7119961977005, + "p99": 176.32000148296356 }, "combine": { - "p50": 60.99199876189232, - "p90": 78.20799946784973, - "p95": 82.94399827718735, - "p99": 110.78400164842606 + "p50": 167.71200299263, + "p90": 180.38399517536163, + "p95": 182.43199586868286, + "p99": 184.1599941253662 }, "roundtrip": { - "p50": 156.89599514007568, - "p90": 197.4720060825348, - "p95": 210.36800742149353, - "p99": 302.4959862232208 + "p50": 289.6000146865845, + "p90": 307.45598673820496, + "p95": 310.07999181747437, + "p99": 317.1519935131073 }, "isolatedSum": { - "p50": 128.76800075173378, - "p90": 179.26400154829025, - "p95": 198.55999946594238, - "p99": 271.1680009961128 + "p50": 321.3759958744049, + "p90": 350.23999214172363, + "p95": 354.14399206638336, + "p99": 360.4799956083298 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 68.38399916887283, - "p90": 93.24800223112106, - "p95": 108.57599973678589, - "p99": 143.74400675296783 + "p50": 216.8319970369339, + "p90": 221.02400660514832, + "p95": 222.46399521827698, + "p99": 227.2000014781952 }, "combine": { - "p50": 62.463998794555664, - "p90": 78.78399640321732, - "p95": 87.23200112581253, - "p99": 103.90400141477585 + "p50": 277.0240008831024, + "p90": 282.78398513793945, + "p95": 284.2879891395569, + "p99": 288.4480059146881 }, "roundtrip": { - "p50": 162.1440052986145, - "p90": 208.00000429153442, - "p95": 221.18400037288666, - "p99": 262.30400800704956 + "p50": 469.4080054759979, + "p90": 475.23200511932373, + "p95": 476.83200240135193, + "p99": 480.3520143032074 }, "isolatedSum": { - "p50": 130.8479979634285, - "p90": 172.03199863433838, - "p95": 195.80800086259842, - "p99": 247.64800816774368 + "p50": 493.8559979200363, + "p90": 503.80799174308777, + "p95": 506.75198435783386, + "p99": 515.6480073928833 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 67.71200150251389, - "p90": 98.65599870681763, - "p95": 107.68000036478043, - "p99": 143.19999516010284 - }, - "combine": { - "p50": 68.00000369548798, - "p90": 82.78399705886841, - "p95": 88.0960002541542, - "p99": 113.79200220108032 - }, - "roundtrip": { - "p50": 163.2319986820221, - "p90": 208.73600244522095, - "p95": 238.65599930286407, - "p99": 287.55199909210205 - }, - "isolatedSum": { - "p50": 135.71200519800186, - "p90": 181.43999576568604, - "p95": 195.77600061893463, - "p99": 256.99199736118317 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 79.19999957084656, - "p90": 95.551997423172, - "p95": 104.032002389431, - "p99": 137.85600662231445 + "p50": 361.2799942493439, + "p90": 374.208003282547, + "p95": 379.2960047721863, + "p99": 538.752019405365 }, "combine": { - "p50": 77.7600035071373, - "p90": 87.45600283145905, - "p95": 96.22400254011154, - "p99": 126.39999389648438 + "p50": 470.5600142478943, + "p90": 482.87999629974365, + "p95": 485.0879907608032, + "p99": 490.81599712371826 }, "roundtrip": { - "p50": 175.135999917984, - "p90": 217.92000532150269, - "p95": 236.735999584198, - "p99": 292.86399483680725 + "p50": 804.4800162315369, + "p90": 820.2239871025085, + "p95": 825.3120183944702, + "p99": 835.3919982910156 }, "isolatedSum": { - "p50": 156.96000307798386, - "p90": 183.00800025463104, - "p95": 200.25600492954254, - "p99": 264.2560005187988 + "p50": 831.8400084972382, + "p90": 857.0879995822906, + "p95": 864.3839955329895, + "p99": 1029.5680165290833 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 83.52000266313553, - "p90": 106.65600001811981, - "p95": 115.77600240707397, - "p99": 151.8400013446808 + "p50": 640.5439972877502, + "p90": 648.4159827232361, + "p95": 651.7760157585144, + "p99": 662.015974521637 }, "combine": { - "p50": 95.29600292444229, - "p90": 108.96000266075134, - "p95": 119.45600062608719, - "p99": 141.02399349212646 + "p50": 846.176028251648, + "p90": 854.9759984016418, + "p95": 857.5360178947449, + "p99": 862.8479838371277 }, "roundtrip": { - "p50": 208.70399475097656, - "p90": 241.43999814987183, - "p95": 261.75999641418457, - "p99": 290.97598791122437 + "p50": 1459.9039554595947, + "p90": 1470.5599546432495, + "p95": 1474.4000434875488, + "p99": 1484.1920137405396 }, "isolatedSum": { - "p50": 178.81600558757782, - "p90": 215.61600267887115, - "p95": 235.23200303316116, - "p99": 292.86399483680725 + "p50": 1486.7200255393982, + "p90": 1503.391981124878, + "p95": 1509.3120336532593, + "p99": 1524.8639583587646 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -56128,38 +58166,39 @@ ] }, { - "id": "cx-9a6e69f6", - "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_87683f6c", - "comparisonKey": "c387c5e642249761", + "id": "cx-8265fe0e", + "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_ff7906f8", + "comparisonKey": "d0edce95a580d060", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:29.289162+00:00", + "generatedAt": "2026-06-26T23:52:06.777183+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -56180,8 +58219,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -56189,343 +58228,270 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271636896", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271636896", - "createdAt": "2026-06-26T23:49:02Z", + "id": "28271688175", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271688175", + "createdAt": "2026-06-26T23:52:06.777183+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 228.70400547981262, - "p90": 269.6959972381592, - "p95": 279.5200049877167, - "p99": 338.1119966506958 + "p50": 92.32000261545181, + "p90": 96.41599655151367, + "p95": 98.39999675750732, + "p99": 104.22399640083313 }, "combine": { - "p50": 61.08799949288368, - "p90": 73.5040009021759, - "p95": 82.20800012350082, - "p99": 98.33600372076035 + "p50": 86.97599917650223, + "p90": 88.41600269079208, + "p95": 89.50400352478027, + "p99": 93.31200271844864 }, "roundtrip": { - "p50": 271.232008934021, - "p90": 306.94401264190674, - "p95": 324.2560029029846, - "p99": 374.65599179267883 - }, - "isolatedSum": { - "p50": 289.7920049726963, - "p90": 343.1999981403351, - "p95": 361.7280051112175, - "p99": 436.44800037145615 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 216.06400609016418, - "p90": 246.33599817752838, - "p95": 261.3759934902191, - "p99": 341.40801429748535 - }, - "combine": { - "p50": 59.7120001912117, - "p90": 68.09599697589874, - "p95": 74.46400076150894, - "p99": 89.53599631786346 - }, - "roundtrip": { - "p50": 268.99200677871704, - "p90": 305.08801341056824, - "p95": 324.41601157188416, - "p99": 433.0880045890808 + "p50": 156.73600137233734, + "p90": 160.70400178432465, + "p95": 161.6639941930771, + "p99": 166.04800522327423 }, "isolatedSum": { - "p50": 275.7760062813759, - "p90": 314.4319951534271, - "p95": 335.83999425172806, - "p99": 430.9440106153488 + "p50": 179.29600179195404, + "p90": 184.83199924230576, + "p95": 187.9040002822876, + "p99": 197.53599911928177 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 229.98400032520294, - "p90": 283.07199478149414, - "p95": 300.00001192092896, - "p99": 371.2959885597229 + "p50": 111.10399663448334, + "p90": 133.98399949073792, + "p95": 135.96799969673157, + "p99": 139.96799290180206 }, "combine": { - "p50": 61.055999249219894, - "p90": 78.68800312280655, - "p95": 83.55200290679932, - "p99": 112.47999966144562 + "p50": 112.99200356006622, + "p90": 121.47200107574463, + "p95": 122.01599776744843, + "p99": 128.35200130939484 }, "roundtrip": { - "p50": 274.1119861602783, - "p90": 337.0879888534546, - "p95": 358.7520122528076, - "p99": 398.75200390815735 + "p50": 202.72000133991241, + "p90": 217.6000028848648, + "p95": 219.39200162887573, + "p99": 223.7440049648285 }, "isolatedSum": { - "p50": 291.03999957442284, - "p90": 361.7599979043007, - "p95": 383.55201482772827, - "p99": 483.7759882211685 + "p50": 224.09600019454956, + "p90": 255.45600056648254, + "p95": 257.98399746418, + "p99": 268.3199942111969 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 3, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 218.87999773025513, - "p90": 251.55198574066162, - "p95": 265.855997800827, - "p99": 311.39200925827026 + "p50": 153.08800339698792, + "p90": 166.9439971446991, + "p95": 168.67199540138245, + "p99": 175.55199563503265 }, "combine": { - "p50": 62.111999839544296, - "p90": 71.6480016708374, - "p95": 77.11999863386154, - "p99": 90.40000289678574 + "p50": 168.92799735069275, + "p90": 181.15200102329254, + "p95": 183.07200074195862, + "p99": 186.0480010509491 }, "roundtrip": { - "p50": 266.9120132923126, - "p90": 300.57600140571594, - "p95": 317.8560137748718, - "p99": 357.02401399612427 + "p50": 291.29600524902344, + "p90": 307.45598673820496, + "p95": 309.6959888935089, + "p99": 313.9199912548065 }, "isolatedSum": { - "p50": 280.9919975697994, - "p90": 323.199987411499, - "p95": 342.97599643468857, - "p99": 401.792012155056 + "p50": 322.01600074768066, + "p90": 348.09599816799164, + "p95": 351.74399614334106, + "p99": 361.59999668598175 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 219.29599344730377, - "p90": 267.61600375175476, - "p95": 287.00798749923706, - "p99": 346.8160033226013 + "p50": 219.26400065422058, + "p90": 230.71999847888947, + "p95": 234.9119931459427, + "p99": 238.62400650978088 }, "combine": { - "p50": 63.840001821517944, - "p90": 79.77599650621414, - "p95": 84.95999872684479, - "p99": 98.49599748849869 + "p50": 274.04800057411194, + "p90": 280.5440127849579, + "p95": 281.69599175453186, + "p99": 284.1919958591461 }, "roundtrip": { - "p50": 265.4719948768616, - "p90": 309.9519908428192, - "p95": 323.8399922847748, - "p99": 397.8559970855713 + "p50": 467.4240052700043, + "p90": 473.2159972190857, + "p95": 475.8079946041107, + "p99": 479.2639911174774 }, "isolatedSum": { - "p50": 283.1359952688217, - "p90": 347.3920002579689, - "p95": 371.96798622608185, - "p99": 445.3120008111 + "p50": 493.3120012283325, + "p90": 511.26401126384735, + "p95": 516.6079849004745, + "p99": 522.816002368927 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 219.10400688648224, - "p90": 245.5040067434311, - "p95": 260.3200078010559, - "p99": 308.0959916114807 + "p50": 360.79999804496765, + "p90": 374.36801195144653, + "p95": 376.5760064125061, + "p99": 380.2880048751831 }, "combine": { - "p50": 69.50400024652481, - "p90": 78.33600044250488, - "p95": 83.96799862384796, - "p99": 95.8079993724823 + "p50": 465.88799357414246, + "p90": 475.77598690986633, + "p95": 478.4319996833801, + "p99": 481.53600096702576 }, "roundtrip": { - "p50": 275.2319872379303, - "p90": 308.9599907398224, - "p95": 331.07200264930725, - "p99": 425.6319999694824 + "p50": 799.1999983787537, + "p90": 816.6720271110535, + "p95": 819.8080062866211, + "p99": 824.7680068016052 }, "isolatedSum": { - "p50": 288.60800713300705, - "p90": 323.840007185936, - "p95": 344.28800642490387, - "p99": 403.903990983963 + "p50": 826.6879916191101, + "p90": 850.1439988613129, + "p95": 855.0080060958862, + "p99": 861.8240058422089 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 221.27999365329742, - "p90": 263.90400528907776, - "p95": 282.20799565315247, - "p99": 368.51200461387634 + "p50": 638.975977897644, + "p90": 648.1279730796814, + "p95": 652.7040004730225, + "p99": 661.1520051956177 }, "combine": { - "p50": 79.77599650621414, - "p90": 91.32800251245499, - "p95": 96.6079980134964, - "p99": 106.52799904346466 + "p50": 848.4799861907959, + "p90": 856.8000197410583, + "p95": 859.5520257949829, + "p99": 898.5919952392578 }, "roundtrip": { - "p50": 288.4159982204437, - "p90": 336.41600608825684, - "p95": 353.7920117378235, - "p99": 471.1360037326813 + "p50": 1462.623953819275, + "p90": 1474.079966545105, + "p95": 1478.4959554672241, + "p99": 1489.3120527267456 }, "isolatedSum": { - "p50": 301.05599015951157, - "p90": 355.23200780153275, - "p95": 378.81599366664886, - "p99": 475.040003657341 + "p50": 1487.45596408844, + "p90": 1504.9279928207397, + "p95": 1512.2560262680054, + "p99": 1559.7440004348755 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 233.024001121521, - "p90": 284.4479978084564, - "p95": 301.63198709487915, - "p99": 392.5760090351105 - }, - "combine": { - "p50": 97.50399738550186, - "p90": 109.76000130176544, - "p95": 115.99999666213989, - "p99": 127.93600559234619 - }, - "roundtrip": { - "p50": 316.6399896144867, - "p90": 356.06399178504944, - "p95": 368.5759902000427, - "p99": 464.352011680603 - }, - "isolatedSum": { - "p50": 330.52799850702286, - "p90": 394.20799911022186, - "p95": 417.63198375701904, - "p99": 520.5120146274567 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 } ] }, { - "id": "cx-180681db", - "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h200_87683f6c", - "comparisonKey": "3006922c66758d92", + "id": "cx-2dcc1e5c", + "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_ff7906f8", + "comparisonKey": "69b861c40f88be42", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:15.049258+00:00", + "generatedAt": "2026-06-26T23:51:59.492832+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 384, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -56546,8 +58512,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -56555,304 +58521,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271721386", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271721386", - "createdAt": "2026-06-26T23:51:33Z", + "id": "28271702702", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271702702", + "createdAt": "2026-06-26T23:51:59.492832+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 212.44800090789795, - "p90": 272.8320062160492, - "p95": 292.32001304626465, - "p99": 382.752001285553 + "p50": 99.45599734783173, + "p90": 105.05600273609161, + "p95": 106.04800283908844, + "p99": 110.23999750614166 }, "combine": { - "p50": 58.75200033187866, - "p90": 73.40800017118454, - "p95": 78.5600021481514, - "p99": 96.12800180912018 + "p50": 95.58399766683578, + "p90": 97.47199714183807, + "p95": 98.39999675750732, + "p99": 102.9760017991066 }, "roundtrip": { - "p50": 247.26399779319763, - "p90": 306.36799335479736, - "p95": 325.1200020313263, - "p99": 389.8560106754303 + "p50": 170.33599317073822, + "p90": 175.10400712490082, + "p95": 177.85599827766418, + "p99": 179.58399653434753 }, "isolatedSum": { - "p50": 271.2000012397766, - "p90": 346.24000638723373, - "p95": 370.88001519441605, - "p99": 478.88000309467316 + "p50": 195.0399950146675, + "p90": 202.5279998779297, + "p95": 204.44799959659576, + "p99": 213.21599930524826 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 212.09600567817688, - "p90": 273.69600534439087, - "p95": 297.791987657547, - "p99": 586.5920186042786 + "p50": 119.29599940776825, + "p90": 124.22399967908859, + "p95": 126.30400061607361, + "p99": 130.5599957704544 }, "combine": { - "p50": 58.17599967122078, - "p90": 74.81600344181061, - "p95": 79.71200346946716, - "p99": 97.120001912117 + "p50": 122.079998254776, + "p90": 127.80800461769104, + "p95": 128.67200374603271, + "p99": 132.9919993877411 }, "roundtrip": { - "p50": 265.3760015964508, - "p90": 339.6799862384796, - "p95": 375.5840063095093, - "p99": 458.8159918785095 + "p50": 219.32800114154816, + "p90": 223.1680005788803, + "p95": 224.5440036058426, + "p99": 228.7359982728958 }, "isolatedSum": { - "p50": 270.27200534939766, - "p90": 348.5120087862015, - "p95": 377.50399112701416, - "p99": 683.7120205163956 + "p50": 241.37599766254425, + "p90": 252.03200429677963, + "p95": 254.97600436210632, + "p99": 263.5519951581955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 3, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 197.6960003376007, - "p90": 252.8960108757019, - "p95": 267.64801144599915, - "p99": 318.59201192855835 + "p50": 165.53600132465363, + "p90": 178.1120002269745, + "p95": 180.12799322605133, + "p99": 184.25600230693817 }, "combine": { - "p50": 57.920001447200775, - "p90": 70.49600034952164, - "p95": 76.4160007238388, - "p99": 87.36000210046768 + "p50": 190.46400487422943, + "p90": 198.71999323368073, + "p95": 200.9280025959015, + "p99": 213.79199624061584 }, "roundtrip": { - "p50": 246.91200256347656, - "p90": 306.2080144882202, - "p95": 339.1680121421814, - "p99": 585.1519703865051 + "p50": 325.76000690460205, + "p90": 331.07200264930725, + "p95": 332.73598551750183, + "p99": 336.1920118331909 }, "isolatedSum": { - "p50": 255.61600178480148, - "p90": 323.39201122522354, - "p95": 344.06401216983795, - "p99": 405.95201402902603 + "p50": 356.00000619888306, + "p90": 376.8319934606552, + "p95": 381.0559958219528, + "p99": 398.047998547554 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 0, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 211.93599700927734, - "p90": 265.1520073413849, - "p95": 276.6079902648926, - "p99": 336.5760147571564 + "p50": 244.57600712776184, + "p90": 249.439999461174, + "p95": 253.56799364089966, + "p99": 409.56801176071167 }, "combine": { - "p50": 59.647999703884125, - "p90": 77.02399790287018, - "p95": 82.94399827718735, - "p99": 96.54399752616882 + "p50": 299.1040050983429, + "p90": 303.9360046386719, + "p95": 305.759996175766, + "p99": 311.0719919204712 }, "roundtrip": { - "p50": 259.5840096473694, - "p90": 317.6639974117279, - "p95": 331.9680094718933, - "p99": 400.06399154663086 - }, - "isolatedSum": { - "p50": 271.58399671316147, - "p90": 342.17600524425507, - "p95": 359.5519885420799, - "p99": 433.1200122833252 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 214.01600539684296, - "p90": 275.90399980545044, - "p95": 303.9039969444275, - "p99": 374.30399656295776 - }, - "combine": { - "p50": 61.76000088453293, - "p90": 80.4160013794899, - "p95": 84.79999750852585, - "p99": 99.16800260543823 - }, - "roundtrip": { - "p50": 258.59200954437256, - "p90": 322.9120075702667, - "p95": 347.104012966156, - "p99": 422.39999771118164 - }, - "isolatedSum": { - "p50": 275.7760062813759, - "p90": 356.32000118494034, - "p95": 388.70399445295334, - "p99": 473.471999168396 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 209.9200040102005, - "p90": 263.7439966201782, - "p95": 275.2639949321747, - "p99": 311.13600730895996 - }, - "combine": { - "p50": 67.58400052785873, - "p90": 84.09599959850311, - "p95": 87.42400258779526, - "p99": 103.90400141477585 - }, - "roundtrip": { - "p50": 263.5520100593567, - "p90": 318.30400228500366, - "p95": 334.5920145511627, - "p99": 403.80799770355225 + "p50": 515.7759785652161, + "p90": 522.2399830818176, + "p95": 524.1600275039673, + "p99": 528.8959741592407 }, "isolatedSum": { - "p50": 277.50400453805923, - "p90": 347.83999621868134, - "p95": 362.68799751996994, - "p99": 415.0400087237358 + "p50": 543.6800122261047, + "p90": 553.3760040998459, + "p95": 559.3279898166656, + "p99": 720.6400036811829 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 4, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 205.82400262355804, - "p90": 253.02401185035706, - "p95": 266.36800169944763, - "p99": 311.5200102329254 + "p50": 413.1520092487335, + "p90": 423.0720102787018, + "p95": 426.2399971485138, + "p99": 432.5760006904602 }, "combine": { - "p50": 78.40000092983246, - "p90": 92.76799857616425, - "p95": 98.04800152778625, - "p99": 111.07199639081955 + "p50": 515.7439708709717, + "p90": 523.7119793891907, + "p95": 526.4319777488708, + "p99": 530.3360223770142 }, "roundtrip": { - "p50": 272.7360129356384, - "p90": 325.50400495529175, - "p95": 342.6879942417145, - "p99": 378.6559998989105 + "p50": 898.2080221176147, + "p90": 911.0400080680847, + "p95": 915.2960181236267, + "p99": 921.6639995574951 }, "isolatedSum": { - "p50": 284.2240035533905, - "p90": 345.7920104265213, - "p95": 364.4160032272339, - "p99": 422.59200662374496 + "p50": 928.8959801197052, + "p90": 946.7839896678925, + "p95": 952.6719748973846, + "p99": 962.9120230674744 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 4, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 223.23200106620789, - "p90": 271.61601185798645, - "p95": 281.98400139808655, - "p99": 319.96798515319824 + "p50": 745.0559735298157, + "p90": 758.2719922065735, + "p95": 762.112021446228, + "p99": 772.4159955978394 }, "combine": { - "p50": 96.25600278377533, - "p90": 112.44799941778183, - "p95": 115.61600118875504, - "p99": 127.36000120639801 + "p50": 933.247983455658, + "p90": 941.9839978218079, + "p95": 945.1839923858643, + "p99": 951.3279795646667 }, "roundtrip": { - "p50": 324.864000082016, - "p90": 388.63998651504517, - "p95": 415.3279960155487, - "p99": 494.3999946117401 + "p50": 1646.2719440460205, + "p90": 1661.9199514389038, + "p95": 1667.3599481582642, + "p99": 1685.7600212097168 }, "isolatedSum": { - "p50": 319.4880038499832, - "p90": 384.0640112757683, - "p95": 397.6000025868416, - "p99": 447.32798635959625 + "p50": 1678.3039569854736, + "p90": 1700.2559900283813, + "p95": 1707.2960138320923, + "p99": 1723.743975162506 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -56860,30 +58752,31 @@ ] }, { - "id": "cx-b1b077c8", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_3a17d46b", - "comparisonKey": "f29f35383c05d38b", + "id": "cx-8f627a86", + "identity": "h100|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_42947950", + "comparisonKey": "68eaec6b4043581a", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:04.228393+00:00", + "generatedAt": "2026-06-27T11:13:20.359016+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm)", + "label": "H100 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -56891,19 +58784,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -56912,8 +58805,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -56921,304 +58814,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254401482", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", - "createdAt": "2026-06-26T17:28:31Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28287492752", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287492752", + "createdAt": "2026-06-27T11:13:20.359016+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.23999840021133, - "p90": 92.38400310277939, - "p95": 101.88800096511841, - "p99": 121.15199863910675 - }, - "combine": { - "p50": 58.88000130653381, - "p90": 70.3359991312027, - "p95": 78.65600287914276, - "p99": 101.43999755382538 - }, - "roundtrip": { - "p50": 159.32799875736237, - "p90": 200.3840059041977, - "p95": 213.69600296020508, - "p99": 243.58400702476501 - }, - "isolatedSum": { - "p50": 129.11999970674515, - "p90": 162.7200022339821, - "p95": 180.54400384426117, - "p99": 222.59199619293213 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 67.55200028419495, - "p90": 91.45600348711014, - "p95": 102.33599692583084, - "p99": 144.57599818706512 - }, - "combine": { - "p50": 59.42400172352791, - "p90": 71.6480016708374, - "p95": 81.24800026416779, - "p99": 105.43999820947647 - }, - "roundtrip": { - "p50": 156.12800419330597, - "p90": 199.13600385189056, - "p95": 215.32799303531647, - "p99": 382.4000060558319 - }, - "isolatedSum": { - "p50": 126.97600200772285, - "p90": 163.10400515794754, - "p95": 183.58399718999863, - "p99": 250.0159963965416 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 69.72800195217133, - "p90": 88.54400366544724, - "p95": 98.24000298976898, - "p99": 228.60799729824066 + "p50": 111.35999858379364, + "p90": 115.58400094509125, + "p95": 116.35199934244156, + "p99": 121.56800180673599 }, "combine": { - "p50": 60.92799827456474, - "p90": 72.92799651622772, - "p95": 77.7600035071373, - "p99": 90.91199934482574 + "p50": 97.72799909114838, + "p90": 103.45599800348282, + "p95": 104.3199971318245, + "p99": 108.25599730014801 }, "roundtrip": { - "p50": 160.67199409008026, - "p90": 186.20799481868744, - "p95": 196.44799828529358, - "p99": 242.14400351047516 + "p50": 183.9359998703003, + "p90": 187.96800076961517, + "p95": 189.31199610233307, + "p99": 192.76799261569977 }, "isolatedSum": { - "p50": 130.65600022673607, - "p90": 161.47200018167496, - "p95": 176.00000649690628, - "p99": 319.5199966430664 + "p50": 209.08799767494202, + "p90": 219.03999894857407, + "p95": 220.67199647426605, + "p99": 229.823999106884 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 70.49600034952164, - "p90": 97.47199714183807, - "p95": 107.84000158309937, - "p99": 151.90400183200836 + "p50": 133.15199315547943, + "p90": 150.27199685573578, + "p95": 157.56799280643463, + "p99": 168.2240068912506 }, "combine": { - "p50": 61.47199869155884, - "p90": 76.89599692821503, - "p95": 85.28000116348267, - "p99": 107.64800012111664 + "p50": 137.05599308013916, + "p90": 144.03200149536133, + "p95": 145.50399780273438, + "p99": 152.79999375343323 }, "roundtrip": { - "p50": 155.8080017566681, - "p90": 187.45599687099457, - "p95": 205.24799823760986, - "p99": 242.88000166416168 + "p50": 239.74399268627167, + "p90": 252.70399451255798, + "p95": 254.17599081993103, + "p99": 258.2400143146515 }, "isolatedSum": { - "p50": 131.96799904108047, - "p90": 174.3679940700531, - "p95": 193.12000274658203, - "p99": 259.552001953125 + "p50": 270.2079862356186, + "p90": 294.3039983510971, + "p95": 303.071990609169, + "p99": 321.02400064468384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 68.4799998998642, - "p90": 86.94399893283844, - "p95": 95.58399766683578, - "p99": 126.08000636100769 + "p50": 182.49599635601044, + "p90": 195.23200392723083, + "p95": 198.14400374889374, + "p99": 200.95999538898468 }, "combine": { - "p50": 63.391998410224915, - "p90": 77.34400033950806, - "p95": 86.62399649620056, - "p99": 119.55200135707855 + "p50": 208.44799280166626, + "p90": 217.98400580883026, + "p95": 219.10400688648224, + "p99": 253.76001000404358 }, "roundtrip": { - "p50": 164.2879992723465, - "p90": 188.09600174427032, - "p95": 203.64800095558167, - "p99": 272.7999985218048 + "p50": 361.6960048675537, + "p90": 376.0319948196411, + "p95": 379.71198558807373, + "p99": 384.6080005168915 }, "isolatedSum": { - "p50": 131.8719983100891, - "p90": 164.2879992723465, - "p95": 182.20799416303635, - "p99": 245.63200771808624 + "p50": 390.9439891576767, + "p90": 413.2160097360611, + "p95": 417.248010635376, + "p99": 454.72000539302826 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 68.25599819421768, - "p90": 91.13600105047226, - "p95": 98.91200065612793, - "p99": 114.78400230407715 + "p50": 274.1119861602783, + "p90": 283.3920121192932, + "p95": 286.1120104789734, + "p99": 290.8160090446472 }, "combine": { - "p50": 66.27199798822403, - "p90": 78.84799689054489, - "p95": 85.40800213813782, - "p99": 92.73599833250046 + "p50": 330.1120102405548, + "p90": 336.0320031642914, + "p95": 336.89600229263306, + "p99": 341.8560028076172 }, "roundtrip": { - "p50": 165.0879979133606, - "p90": 203.45599949359894, - "p95": 221.15199267864227, - "p99": 462.911993265152 + "p50": 577.344000339508, + "p90": 583.9359760284424, + "p95": 586.0480070114136, + "p99": 589.3440246582031 }, "isolatedSum": { - "p50": 134.5279961824417, - "p90": 169.98399794101715, - "p95": 184.32000279426575, - "p99": 207.5200006365776 + "p50": 604.2239964008331, + "p90": 619.4240152835846, + "p95": 623.0080127716064, + "p99": 632.6720118522644 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 74.46400076150894, - "p90": 89.21600133180618, - "p95": 99.32799637317657, - "p99": 120.57600170373917 + "p50": 464.03199434280396, + "p90": 478.59200835227966, + "p95": 481.6960096359253, + "p99": 491.5519952774048 }, "combine": { - "p50": 80.44800162315369, - "p90": 89.75999802350998, - "p95": 94.65599805116653, - "p99": 122.30399996042252 + "p50": 581.4080238342285, + "p90": 591.5840268135071, + "p95": 594.6879982948303, + "p99": 603.5839915275574 }, "roundtrip": { - "p50": 183.45600366592407, - "p90": 210.78400313854218, - "p95": 228.5439968109131, - "p99": 287.4239981174469 + "p50": 1013.3440494537354, + "p90": 1023.2000350952148, + "p95": 1027.008056640625, + "p99": 1076.6079425811768 }, "isolatedSum": { - "p50": 154.91200238466263, - "p90": 178.97599935531616, - "p95": 193.9839944243431, - "p99": 242.88000166416168 + "p50": 1045.4400181770325, + "p90": 1070.1760351657867, + "p95": 1076.3840079307556, + "p99": 1095.1359868049622 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 88.67199718952179, - "p90": 102.7199998497963, - "p95": 111.93600296974182, - "p99": 128.9920061826706 + "p50": 886.1759901046753, + "p90": 910.2720022201538, + "p95": 915.5840277671814, + "p99": 928.8960099220276 }, "combine": { - "p50": 96.83199971914291, - "p90": 108.86400192975998, - "p95": 114.43199962377548, - "p99": 124.1919994354248 + "p50": 1059.2319965362549, + "p90": 1067.520022392273, + "p95": 1070.0160264968872, + "p99": 1076.8640041351318 }, "roundtrip": { - "p50": 208.99200439453125, - "p90": 229.34399545192719, - "p95": 239.9040013551712, - "p99": 260.22401452064514 + "p50": 1908.6079597473145, + "p90": 1929.2479753494263, + "p95": 1936.3199472427368, + "p99": 1965.440034866333 }, "isolatedSum": { - "p50": 185.5039969086647, - "p90": 211.58400177955627, - "p95": 226.3680025935173, - "p99": 253.1840056180954 + "p50": 1945.4079866409302, + "p90": 1977.7920246124268, + "p95": 1985.6000542640686, + "p99": 2005.7600140571594 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -57226,30 +59045,31 @@ ] }, { - "id": "cx-a2649fd4", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_50a9ee63", - "comparisonKey": "aae31d5755e4ce66", + "id": "cx-29bbdbee", + "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ff7906f8", + "comparisonKey": "4401899311d5e08c", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:20.768220+00:00", + "generatedAt": "2026-06-26T23:52:30.177352+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm) [cl]", + "label": "H100 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -57257,19 +59077,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -57278,8 +59098,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -57287,304 +59107,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254418007", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", - "createdAt": "2026-06-26T17:28:51Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271717621", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271717621", + "createdAt": "2026-06-26T23:52:30.177352+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 56.09599873423576, - "p90": 86.7839977145195, - "p95": 94.97600048780441, - "p99": 109.98400300741196 - }, - "combine": { - "p50": 60.864001512527466, - "p90": 79.64800298213959, - "p95": 85.7279971241951, - "p99": 109.24799740314484 - }, - "roundtrip": { - "p50": 148.60799908638, - "p90": 199.42399859428406, - "p95": 207.45599269866943, - "p99": 260.5440020561218 - }, - "isolatedSum": { - "p50": 116.96000024676323, - "p90": 166.4320006966591, - "p95": 180.7039976119995, - "p99": 219.2320004105568 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 52.06400156021118, - "p90": 83.42400193214417, - "p95": 88.99199962615967, - "p99": 123.80799651145935 - }, - "combine": { - "p50": 59.808000922203064, - "p90": 77.91999727487564, - "p95": 84.48000252246857, - "p99": 130.78400492668152 - }, - "roundtrip": { - "p50": 145.82400023937225, - "p90": 194.91200149059296, - "p95": 215.10399878025055, - "p99": 273.79199862480164 - }, - "isolatedSum": { - "p50": 111.87200248241425, - "p90": 161.3439992070198, - "p95": 173.47200214862823, - "p99": 254.59200143814087 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 56.60799890756607, - "p90": 89.08800035715103, - "p95": 98.91200065612793, - "p99": 111.7440015077591 + "p50": 111.42399907112122, + "p90": 114.94400352239609, + "p95": 116.03199690580368, + "p99": 119.61600184440613 }, "combine": { - "p50": 60.7680007815361, - "p90": 78.52800190448761, - "p95": 84.22400057315826, - "p99": 97.95200079679489 + "p50": 98.33600372076035, + "p90": 103.71199995279312, + "p95": 104.67199981212616, + "p99": 106.4319983124733 }, "roundtrip": { - "p50": 143.74400675296783, - "p90": 192.7040070295334, - "p95": 212.0320051908493, - "p99": 294.46399211883545 + "p50": 184.9599927663803, + "p90": 188.63999843597412, + "p95": 189.66400623321533, + "p99": 194.11200284957886 }, "isolatedSum": { - "p50": 117.37599968910217, - "p90": 167.61600226163864, - "p95": 183.1360012292862, - "p99": 209.69600230455399 + "p50": 209.76000279188156, + "p90": 218.6560034751892, + "p95": 220.70399671792984, + "p99": 226.04800015687943 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 52.25599929690361, - "p90": 80.09599894285202, - "p95": 88.35200220346451, - "p99": 109.37599837779999 + "p50": 132.86399841308594, + "p90": 137.7599984407425, + "p95": 139.3280029296875, + "p99": 142.4960047006607 }, "combine": { - "p50": 60.736000537872314, - "p90": 79.48800176382065, - "p95": 85.60000360012054, - "p99": 108.64000022411346 + "p50": 137.69599795341492, + "p90": 140.4159963130951, + "p95": 141.37600362300873, + "p99": 145.53600549697876 }, "roundtrip": { - "p50": 141.12000167369843, - "p90": 183.87199938297272, - "p95": 195.23200392723083, - "p99": 286.24001145362854 + "p50": 237.2480034828186, + "p90": 242.08000302314758, + "p95": 243.1039959192276, + "p99": 246.24000489711761 }, "isolatedSum": { - "p50": 112.99199983477592, - "p90": 159.58400070667267, - "p95": 173.95200580358505, - "p99": 218.01599860191345 + "p50": 270.55999636650085, + "p90": 278.1759947538376, + "p95": 280.7040065526962, + "p99": 288.03201019763947 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 53.47200110554695, - "p90": 77.60000228881836, - "p95": 85.05599945783615, - "p99": 93.9520001411438 + "p50": 184.00000035762787, + "p90": 197.31199741363525, + "p95": 200.15999674797058, + "p99": 204.12799715995789 }, "combine": { - "p50": 62.49599903821945, - "p90": 77.34400033950806, - "p95": 82.11199939250946, - "p99": 95.77599912881851 + "p50": 209.6959948539734, + "p90": 216.86400473117828, + "p95": 217.92000532150269, + "p99": 221.95200622081757 }, "roundtrip": { - "p50": 142.17600226402283, - "p90": 183.77600610256195, - "p95": 197.79199361801147, - "p99": 241.5360063314438 + "p50": 365.02400040626526, + "p90": 377.21601128578186, + "p95": 380.5760145187378, + "p99": 388.12801241874695 }, "isolatedSum": { - "p50": 115.9680001437664, - "p90": 154.94400262832642, - "p95": 167.1679988503456, - "p99": 189.7279992699623 + "p50": 393.69599521160126, + "p90": 414.17600214481354, + "p95": 418.08000206947327, + "p99": 426.08000338077545 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 55.39200082421303, - "p90": 81.05599880218506, - "p95": 89.15200084447861, - "p99": 109.6000000834465 + "p50": 273.21600914001465, + "p90": 277.44001150131226, + "p95": 279.87200021743774, + "p99": 289.3120050430298 }, "combine": { - "p50": 66.39999896287918, - "p90": 84.927998483181, - "p95": 88.3840024471283, - "p99": 101.3759970664978 + "p50": 332.41599798202515, + "p90": 337.119996547699, + "p95": 338.20798993110657, + "p99": 341.66398644447327 }, "roundtrip": { - "p50": 148.15999567508698, - "p90": 191.23199582099915, - "p95": 200.57600736618042, - "p99": 228.4799963235855 + "p50": 577.6320099830627, + "p90": 582.751989364624, + "p95": 584.7679972648621, + "p99": 588.7680053710938 }, "isolatedSum": { - "p50": 121.79199978709221, - "p90": 165.98399728536606, - "p95": 177.5360032916069, - "p99": 210.9759971499443 + "p50": 605.6320071220398, + "p90": 614.5600080490112, + "p95": 618.0799901485443, + "p99": 630.975991487503 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 65.08799642324448, - "p90": 90.97599983215332, - "p95": 100.63999891281128, - "p99": 148.28799664974213 + "p50": 464.32000398635864, + "p90": 473.60000014305115, + "p95": 477.3760139942169, + "p99": 648.8320231437683 }, "combine": { - "p50": 81.05599880218506, - "p90": 96.54399752616882, - "p95": 99.23200309276581, - "p99": 106.52799904346466 + "p50": 584.384024143219, + "p90": 590.9119844436646, + "p95": 593.0560231208801, + "p99": 596.8000292778015 }, "roundtrip": { - "p50": 171.424001455307, - "p90": 216.8000042438507, - "p95": 232.1919947862625, - "p99": 288.38399052619934 + "p50": 1019.2320346832275, + "p90": 1029.6640396118164, + "p95": 1033.7599515914917, + "p99": 1037.984013557434 }, "isolatedSum": { - "p50": 146.14399522542953, - "p90": 187.51999735832214, - "p95": 199.8720020055771, - "p99": 254.8159956932068 + "p50": 1048.7040281295776, + "p90": 1064.5119845867157, + "p95": 1070.432037115097, + "p99": 1245.6320524215698 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 75.00799745321274, - "p90": 94.14400160312653, - "p95": 99.04000163078308, - "p99": 115.23199826478958 + "p50": 879.423975944519, + "p90": 904.6720266342163, + "p95": 913.2480025291443, + "p99": 928.991973400116 }, "combine": { - "p50": 97.34400361776352, - "p90": 115.84000289440155, - "p95": 119.03999745845795, - "p99": 133.56800377368927 + "p50": 1065.6960010528564, + "p90": 1075.3920078277588, + "p95": 1078.3040523529053, + "p99": 1084.2560529708862 }, "roundtrip": { - "p50": 197.79199361801147, - "p90": 227.80799865722656, - "p95": 237.8239929676056, - "p99": 276.8320143222809 + "p50": 1901.9520282745361, + "p90": 1920.7359552383423, + "p95": 1926.5919923782349, + "p99": 1940.1600360870361 }, "isolatedSum": { - "p50": 172.35200107097626, - "p90": 209.98400449752808, - "p95": 218.07999908924103, - "p99": 248.80000203847885 + "p50": 1945.1199769973755, + "p90": 1980.064034461975, + "p95": 1991.5520548820496, + "p99": 2013.2480263710022 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -57592,28 +59338,29 @@ ] }, { - "id": "cx-fdd09e42", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_4f483b60", - "comparisonKey": "95dcff383339100e", + "id": "cx-d524fd7e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", + "colorKey": "h100_42947950", + "comparisonKey": "4c920ba7523ac63b", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:13.723754+00:00", + "generatedAt": "2026-06-26T23:54:28.917588+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 [cl]", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -57623,7 +59370,7 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -57644,8 +59391,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "157ca81687ddb63", + "workloadId": "set:3:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -57653,304 +59400,412 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271629782", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271629782", - "createdAt": "2026-06-26T23:48:49Z", + "id": "28271785174", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271785174", + "createdAt": "2026-06-26T23:54:28.917588+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 51.04000121355057, - "p90": 76.64000242948532, - "p95": 84.48000252246857, - "p99": 115.32799899578094 + "p50": 130.52800297737122, + "p90": 135.55200397968292, + "p95": 138.43199610710144, + "p99": 176.79999768733978 }, "combine": { - "p50": 59.20000001788139, - "p90": 77.47200131416321, - "p95": 87.13600039482117, - "p99": 133.85599851608276 + "p50": 113.8560026884079, + "p90": 120.86399644613266, + "p95": 122.11199849843979, + "p99": 145.50399780273438 }, "roundtrip": { - "p50": 140.73599874973297, - "p90": 177.18400061130524, - "p95": 189.60000574588776, - "p99": 239.3919974565506 + "p50": 209.05600488185883, + "p90": 217.56799519062042, + "p95": 219.200000166893, + "p99": 275.04000067710876 }, "isolatedSum": { - "p50": 110.24000123143196, - "p90": 154.11200374364853, - "p95": 171.61600291728973, - "p99": 249.1839975118637 + "p50": 244.3840056657791, + "p90": 256.4160004258156, + "p95": 260.54399460554123, + "p99": 322.30399549007416 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 2, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 54.71999943256378, - "p90": 82.40000158548355, - "p95": 88.16000074148178, - "p99": 115.10399729013443 + "p50": 210.27199923992157, + "p90": 217.056006193161, + "p95": 220.22399306297302, + "p99": 256.99201226234436 }, "combine": { - "p50": 60.19200012087822, - "p90": 74.78400319814682, - "p95": 81.44000172615051, - "p99": 106.84800148010254 + "p50": 234.9119931459427, + "p90": 241.40800535678864, + "p95": 244.9920028448105, + "p99": 262.9759907722473 }, "roundtrip": { - "p50": 147.13600277900696, - "p90": 190.75199961662292, - "p95": 217.79200434684753, - "p99": 253.79198789596558 + "p50": 412.54401206970215, + "p90": 420.9280014038086, + "p95": 423.0720102787018, + "p99": 427.35999822616577 }, "isolatedSum": { - "p50": 114.911999553442, - "p90": 157.18400478363037, - "p95": 169.6000024676323, - "p99": 221.95199877023697 + "p50": 445.18399238586426, + "p90": 458.46401154994965, + "p95": 465.2159959077835, + "p99": 519.9680030345917 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 2, - "correct": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 54.048001766204834, - "p90": 77.53600180149078, - "p95": 84.99199897050858, - "p99": 106.4319983124733 + "p50": 526.5920162200928, + "p90": 541.4720177650452, + "p95": 545.9200143814087, + "p99": 552.3520112037659 }, "combine": { - "p50": 60.70400029420853, - "p90": 75.83999633789062, - "p95": 82.36800134181976, - "p99": 106.84800148010254 + "p50": 637.5679969787598, + "p90": 649.6959924697876, + "p95": 652.6079773902893, + "p99": 661.0879898071289 }, "roundtrip": { - "p50": 144.31999623775482, - "p90": 184.4799965620041, - "p95": 193.9840018749237, - "p99": 240.83200097084045 + "p50": 1134.6240043640137, + "p90": 1146.880030632019, + "p95": 1151.2320041656494, + "p99": 1158.5919857025146 }, "isolatedSum": { - "p50": 114.75200206041336, - "p90": 153.3759981393814, - "p95": 167.36000031232834, - "p99": 213.27999979257584 + "p50": 1164.1600131988525, + "p90": 1191.1680102348328, + "p95": 1198.527991771698, + "p99": 1213.4400010108948 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 2, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-efe3a643", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_42947950", + "comparisonKey": "cca7a3f5d9dbba36", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:12:09.407437+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286083501", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286083501", + "createdAt": "2026-06-27T10:12:09.407437+00:00", + "sha": "76a3032d20288ee17220eb6099346f74d56ce005" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.5119999051094, + "p90": 118.01599711179733, + "p95": 119.39200013875961, + "p99": 123.4240010380745 + }, + "combine": { + "p50": 107.77600109577179, + "p90": 113.40799927711487, + "p95": 114.1119971871376, + "p99": 116.2559986114502 + }, + "roundtrip": { + "p50": 200.57600736618042, + "p90": 204.73599433898926, + "p95": 206.36799931526184, + "p99": 209.85600352287292 + }, + "isolatedSum": { + "p50": 220.2880010008812, + "p90": 231.4239963889122, + "p95": 233.50399732589722, + "p99": 239.6799996495247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 54.687999188899994, - "p90": 88.25600147247314, - "p95": 94.46399658918381, - "p99": 120.19199877977371 + "p50": 143.77599954605103, + "p90": 148.12800288200378, + "p95": 149.6960073709488, + "p99": 152.51199901103973 }, "combine": { - "p50": 61.824001371860504, - "p90": 77.02399790287018, - "p95": 83.26400071382523, - "p99": 101.88800096511841 + "p50": 151.10400319099426, + "p90": 155.74400126934052, + "p95": 156.76799416542053, + "p99": 158.11200439929962 }, "roundtrip": { - "p50": 140.35199582576752, - "p90": 180.09600043296814, - "p95": 193.53599846363068, - "p99": 230.5919975042343 + "p50": 265.53601026535034, + "p90": 269.79199051856995, + "p95": 270.9760069847107, + "p99": 274.01599287986755 }, "isolatedSum": { - "p50": 116.5120005607605, - "p90": 165.27999937534332, - "p95": 177.72799730300903, - "p99": 222.07999974489212 + "p50": 294.8800027370453, + "p90": 303.8720041513443, + "p95": 306.4640015363693, + "p99": 310.62400341033936 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 54.17599901556969, - "p90": 81.11999928951263, - "p95": 88.8959988951683, - "p99": 129.4720023870468 + "p50": 199.96799528598785, + "p90": 204.12799715995789, + "p95": 205.31199872493744, + "p99": 209.72800254821777 }, "combine": { - "p50": 62.3680017888546, - "p90": 78.36800068616867, - "p95": 82.56000280380249, - "p99": 101.21600329875946 + "p50": 229.0560007095337, + "p90": 232.2559952735901, + "p95": 235.80799996852875, + "p99": 239.19999599456787 }, "roundtrip": { - "p50": 140.47999680042267, - "p90": 177.66399681568146, - "p95": 196.99199497699738, - "p99": 237.7600073814392 + "p50": 401.5359878540039, + "p90": 406.0159921646118, + "p95": 407.6800048351288, + "p99": 412.1280014514923 }, "isolatedSum": { - "p50": 116.54400080442429, - "p90": 159.4879999756813, - "p95": 171.4560016989708, - "p99": 230.68800568580627 + "p50": 429.02399599552155, + "p90": 436.383992433548, + "p95": 441.1199986934662, + "p99": 448.92799854278564 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 57.24800005555153, - "p90": 79.64800298213959, - "p95": 85.91999858617783, - "p99": 104.67199981212616 + "p50": 304.9600124359131, + "p90": 310.016006231308, + "p95": 311.3279938697815, + "p99": 313.2160007953644 }, "combine": { - "p50": 68.41599941253662, - "p90": 82.33600109815598, - "p95": 85.7279971241951, - "p99": 99.10400211811066 + "p50": 367.39200353622437, + "p90": 373.3440041542053, + "p95": 375.90399384498596, + "p99": 378.81600856781006 }, "roundtrip": { - "p50": 145.1520025730133, - "p90": 178.1120002269745, - "p95": 187.6479983329773, - "p99": 228.7359982728958 + "p50": 645.4079747200012, + "p90": 652.5760293006897, + "p95": 654.7200083732605, + "p99": 659.0719819068909 }, "isolatedSum": { - "p50": 125.66399946808815, - "p90": 161.98400408029556, - "p95": 171.64799571037292, - "p99": 203.77600193023682 + "p50": 672.3520159721375, + "p90": 683.3600103855133, + "p95": 687.2319877147675, + "p99": 692.0320093631744 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 66.30399823188782, - "p90": 83.90399813652039, - "p95": 90.17600119113922, - "p99": 149.1840034723282 + "p50": 532.0320129394531, + "p90": 541.8559908866882, + "p95": 545.4720258712769, + "p99": 554.0480017662048 }, "combine": { - "p50": 78.72000336647034, - "p90": 93.79199892282486, - "p95": 98.88000041246414, - "p99": 114.01599645614624 + "p50": 637.9200220108032, + "p90": 645.7599997520447, + "p95": 647.9679942131042, + "p99": 653.6639928817749 }, "roundtrip": { - "p50": 164.8319959640503, - "p90": 199.48799908161163, - "p95": 211.2639993429184, - "p99": 271.93599939346313 + "p50": 1139.6479606628418, + "p90": 1149.888038635254, + "p95": 1154.3359756469727, + "p99": 1160.032033920288 }, "isolatedSum": { - "p50": 145.02400159835815, - "p90": 177.69599705934525, - "p95": 189.05600160360336, - "p99": 263.1999999284744 + "p50": 1169.9520349502563, + "p90": 1187.615990638733, + "p95": 1193.440020084381, + "p99": 1207.7119946479797 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 2, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 73.82400333881378, - "p90": 90.62399715185165, - "p95": 95.39200365543365, - "p99": 114.52800035476685 + "p50": 999.4239807128906, + "p90": 1017.2480344772339, + "p95": 1023.8080024719238, + "p99": 1035.040020942688 }, "combine": { - "p50": 97.24800288677216, - "p90": 112.31999844312668, - "p95": 115.77600240707397, - "p99": 130.49599528312683 + "p50": 1168.544054031372, + "p90": 1176.8640279769897, + "p95": 1180.5119514465332, + "p99": 1186.1759424209595 }, "roundtrip": { - "p50": 199.77599382400513, - "p90": 228.32000255584717, - "p95": 247.29600548744202, - "p99": 297.88801074028015 + "p50": 2132.4799060821533, + "p90": 2148.47993850708, + "p95": 2154.9439430236816, + "p99": 2171.5519428253174 }, "isolatedSum": { - "p50": 171.07200622558594, - "p90": 202.94399559497833, - "p95": 211.16800606250763, - "p99": 245.02399563789368 + "p50": 2167.9680347442627, + "p90": 2194.1120624542236, + "p95": 2204.319953918457, + "p99": 2221.2159633636475 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -57958,28 +59813,29 @@ ] }, { - "id": "cx-39796825", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_ff232ea5", - "comparisonKey": "643e1b15925a53af", + "id": "cx-8a96205b", + "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ff7906f8", + "comparisonKey": "6a625438eb544ee8", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:34.222899+00:00", + "generatedAt": "2026-06-26T23:48:12.079136+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", "backend": "deepep", - "phase": "decode", - "mode": "ll", + "phase": "prefill", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 LL", + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -57989,18 +59845,18 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -58010,8 +59866,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -58019,304 +59875,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271653486", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", - "createdAt": "2026-06-26T23:49:28Z", + "id": "28271563151", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271563151", + "createdAt": "2026-06-26T23:48:12.079136+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 29.08799983561039, - "p90": 36.41600161790848, - "p95": 44.28799822926521, - "p99": 63.551999628543854 + "p50": 111.61600053310394, + "p90": 117.3119992017746, + "p95": 118.81600320339203, + "p99": 123.74400347471237 }, "combine": { - "p50": 40.95999896526337, - "p90": 64.70400094985962, - "p95": 74.8480036854744, - "p99": 125.69600343704224 + "p50": 105.85600137710571, + "p90": 107.07200318574905, + "p95": 111.16799712181091, + "p99": 113.8560026884079 }, "roundtrip": { - "p50": 1856.8320274353027, - "p90": 1879.7760009765625, - "p95": 1894.495964050293, - "p99": 2116.607904434204 + "p50": 193.02399456501007, + "p90": 199.52000677585602, + "p95": 200.9280025959015, + "p99": 204.96000349521637 }, "isolatedSum": { - "p50": 70.04799880087376, - "p90": 101.1200025677681, - "p95": 119.13600191473961, - "p99": 189.2480030655861 + "p50": 217.47200191020966, + "p90": 224.38400238752365, + "p95": 229.98400032520294, + "p99": 237.60000616312027 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 28.76799926161766, - "p90": 36.22400015592575, - "p95": 42.11200028657913, - "p99": 48.767998814582825 + "p50": 143.23200285434723, + "p90": 147.5200057029724, + "p95": 148.6400067806244, + "p99": 152.28800475597382 }, "combine": { - "p50": 36.06399893760681, - "p90": 45.75999826192856, - "p95": 52.2879995405674, - "p99": 84.1279998421669 + "p50": 148.76799285411835, + "p90": 154.4640064239502, + "p95": 155.29599785804749, + "p99": 156.76799416542053 }, "roundtrip": { - "p50": 1847.4880456924438, - "p90": 1861.0880374908447, - "p95": 1871.3279962539673, - "p99": 2004.607915878296 + "p50": 262.33598589897156, + "p90": 266.431987285614, + "p95": 268.12800765037537, + "p99": 271.1679935455322 }, "isolatedSum": { - "p50": 64.83199819922447, - "p90": 81.98399841785431, - "p95": 94.39999982714653, - "p99": 132.89599865674973 + "p50": 291.9999957084656, + "p90": 301.9840121269226, + "p95": 303.9360046386719, + "p99": 309.05599892139435 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 7, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 28.575999662280083, - "p90": 38.816001266241074, - "p95": 45.632001012563705, - "p99": 57.95200169086456 + "p50": 196.25599682331085, + "p90": 201.1840045452118, + "p95": 202.72000133991241, + "p99": 214.84799683094025 }, "combine": { - "p50": 41.69600084424019, - "p90": 59.93599817156792, - "p95": 68.06399673223495, - "p99": 170.30400037765503 + "p50": 230.49600422382355, + "p90": 236.12800240516663, + "p95": 237.2799962759018, + "p99": 241.15200340747833 }, "roundtrip": { - "p50": 1848.3840227127075, - "p90": 1869.920015335083, - "p95": 1881.9199800491333, - "p99": 1995.0400590896606 + "p50": 403.0719995498657, + "p90": 408.3839952945709, + "p95": 410.14400124549866, + "p99": 412.76800632476807 }, "isolatedSum": { - "p50": 70.27200050652027, - "p90": 98.75199943780899, - "p95": 113.69599774479866, - "p99": 228.2560020685196 + "p50": 426.7520010471344, + "p90": 437.3120069503784, + "p95": 439.9999976158142, + "p99": 456.0000002384186 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 5, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 29.37600016593933, - "p90": 37.21600025892258, - "p95": 50.65599828958511, - "p99": 62.65600025653839 + "p50": 301.6960024833679, + "p90": 306.43200874328613, + "p95": 307.9040050506592, + "p99": 312.1280074119568 }, "combine": { - "p50": 47.520000487565994, - "p90": 61.664000153541565, - "p95": 68.57600063085556, - "p99": 103.2319962978363 + "p50": 364.1279935836792, + "p90": 369.4399893283844, + "p95": 372.0319867134094, + "p99": 374.9760091304779 }, "roundtrip": { - "p50": 1859.2000007629395, - "p90": 1878.6879777908325, - "p95": 1886.1440420150757, - "p99": 1924.1600036621094 + "p50": 640.064001083374, + "p90": 646.8160152435303, + "p95": 648.5120058059692, + "p99": 653.6960005760193 }, "isolatedSum": { - "p50": 76.89600065350533, - "p90": 98.88000041246414, - "p95": 119.23199892044067, - "p99": 165.8879965543747 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 31.039999797940254, - "p90": 43.83999854326248, - "p95": 53.63199859857559, - "p99": 66.01600348949432 - }, - "combine": { - "p50": 52.25599929690361, - "p90": 69.43999975919724, - "p95": 82.40000158548355, - "p99": 131.99999928474426 - }, - "roundtrip": { - "p50": 1864.0960454940796, - "p90": 1884.160041809082, - "p95": 1898.1759548187256, - "p99": 1969.1519737243652 - }, - "isolatedSum": { - "p50": 83.29599909484386, - "p90": 113.27999830245972, - "p95": 136.03200018405914, - "p99": 198.0160027742386 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 33.79200026392937, - "p90": 45.27999833226204, - "p95": 49.31199923157692, - "p99": 58.14399942755699 - }, - "combine": { - "p50": 47.839999198913574, - "p90": 64.25599753856659, - "p95": 70.36799937486649, - "p99": 101.53599828481674 - }, - "roundtrip": { - "p50": 1865.056037902832, - "p90": 1881.5360069274902, - "p95": 1888.8959884643555, - "p99": 1917.7600145339966 - }, - "isolatedSum": { - "p50": 81.63199946284294, - "p90": 109.53599587082863, - "p95": 119.6799986064434, - "p99": 159.67999771237373 + "p50": 665.8239960670471, + "p90": 675.8719980716705, + "p95": 679.9359917640686, + "p99": 687.1040165424347 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 7, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 40.44799879193306, - "p90": 49.6320016682148, - "p95": 52.799999713897705, - "p99": 64.96000289916992 + "p50": 532.1599841117859, + "p90": 540.7040119171143, + "p95": 544.1280007362366, + "p99": 549.2799878120422 }, "combine": { - "p50": 63.58399987220764, - "p90": 81.31200075149536, - "p95": 98.7199991941452, - "p99": 231.1680018901825 + "p50": 637.503981590271, + "p90": 645.5039978027344, + "p95": 647.7760076522827, + "p99": 653.9520025253296 }, "roundtrip": { - "p50": 1885.632038116455, - "p90": 1903.3279418945312, - "p95": 1914.080023765564, - "p99": 2039.776086807251 + "p50": 1141.9199705123901, + "p90": 1154.4320583343506, + "p95": 1160.1920127868652, + "p99": 1180.9600591659546 }, "isolatedSum": { - "p50": 104.0319986641407, - "p90": 130.94400241971016, - "p95": 151.5199989080429, - "p99": 296.1280047893524 + "p50": 1169.6639657020569, + "p90": 1186.2080097198486, + "p95": 1191.9040083885193, + "p99": 1203.2319903373718 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 5, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 51.552001386880875, - "p90": 60.60799956321716, - "p95": 62.65600025653839, - "p99": 73.82400333881378 + "p50": 993.9200282096863, + "p90": 1017.2799825668335, + "p95": 1023.4240293502808, + "p99": 1036.8319749832153 }, "combine": { - "p50": 86.81599795818329, - "p90": 96.19200229644775, - "p95": 108.47999900579453, - "p99": 146.7839926481247 + "p50": 1165.0559902191162, + "p90": 1175.3599643707275, + "p95": 1177.9520511627197, + "p99": 1283.2640409469604 }, "roundtrip": { - "p50": 1922.6560592651367, - "p90": 1938.4959936141968, - "p95": 1957.0879936218262, - "p99": 2130.3679943084717 + "p50": 2117.6319122314453, + "p90": 2134.848117828369, + "p95": 2139.6799087524414, + "p99": 2151.5839099884033 }, "isolatedSum": { - "p50": 138.36799934506416, - "p90": 156.80000185966492, - "p95": 171.13599926233292, - "p99": 220.60799598693848 + "p50": 2158.9760184288025, + "p90": 2192.639946937561, + "p95": 2201.3760805130005, + "p99": 2320.096015930176 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 5, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -58324,49 +60106,50 @@ ] }, { - "id": "cx-dbb437b5", - "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_7ec76e6d", - "comparisonKey": "9a87b27b98bf2d7a", + "id": "cx-27ce5700", + "identity": "h100|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_42947950", + "comparisonKey": "4106e8f613d56fa1", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:35.330044+00:00", + "generatedAt": "2026-06-27T11:16:10.577708+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", "backend": "deepep", - "phase": "decode", - "mode": "ll", + "phase": "prefill", + "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 LL", + "label": "H100 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -58376,8 +60159,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -58385,304 +60168,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271656517", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", - "createdAt": "2026-06-26T23:49:35Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287499275", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287499275", + "createdAt": "2026-06-27T11:16:10.577708+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 28.76799926161766, - "p90": 38.88000175356865, - "p95": 44.73600164055824, - "p99": 61.15199998021126 - }, - "combine": { - "p50": 36.768000572919846, - "p90": 48.287998884916306, - "p95": 57.53599852323532, - "p99": 90.81599861383438 - }, - "roundtrip": { - "p50": 1847.7439880371094, - "p90": 1855.6159734725952, - "p95": 1860.543966293335, - "p99": 1893.2160139083862 - }, - "isolatedSum": { - "p50": 65.5359998345375, - "p90": 87.16800063848495, - "p95": 102.27200016379356, - "p99": 151.96799859404564 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 28.991999104619026, - "p90": 33.376000821590424, - "p95": 37.02399879693985, - "p99": 41.05599969625473 - }, - "combine": { - "p50": 37.59999945759773, - "p90": 49.375999718904495, - "p95": 58.62399935722351, - "p99": 235.83999276161194 - }, - "roundtrip": { - "p50": 1847.6799726486206, - "p90": 1855.936050415039, - "p95": 1861.4720106124878, - "p99": 1959.007978439331 - }, - "isolatedSum": { - "p50": 66.59199856221676, - "p90": 82.75200054049492, - "p95": 95.64799815416336, - "p99": 276.89599245786667 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 28.16000021994114, - "p90": 52.41600051522255, - "p95": 58.59199911355972, - "p99": 83.23200047016144 + "p50": 111.07199639081955, + "p90": 116.38399958610535, + "p95": 117.8240031003952, + "p99": 120.70400267839432 }, "combine": { - "p50": 36.959998309612274, - "p90": 48.06400090456009, - "p95": 54.59199845790863, - "p99": 94.59199756383896 + "p50": 106.36799782514572, + "p90": 107.64800012111664, + "p95": 111.77600175142288, + "p99": 114.656001329422 }, "roundtrip": { - "p50": 1848.3200073242188, - "p90": 1858.62398147583, - "p95": 1864.5440340042114, - "p99": 1925.9519577026367 + "p50": 195.10400295257568, + "p90": 200.6399929523468, + "p95": 202.2079974412918, + "p99": 206.68800175189972 }, "isolatedSum": { - "p50": 65.11999852955341, - "p90": 100.48000141978264, - "p95": 113.18399757146835, - "p99": 177.8239980340004 + "p50": 217.43999421596527, + "p90": 224.03199970722198, + "p95": 229.60000485181808, + "p99": 235.36000400781631 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 29.7279991209507, - "p90": 36.3520011305809, - "p95": 41.08799993991852, - "p99": 52.191998809576035 + "p50": 145.34400403499603, + "p90": 151.10400319099426, + "p95": 152.25599706172943, + "p99": 155.29599785804749 }, "combine": { - "p50": 37.88800165057182, - "p90": 50.52800104022026, - "p95": 61.24800071120262, - "p99": 175.7120043039322 + "p50": 149.63200688362122, + "p90": 155.16799688339233, + "p95": 155.71199357509613, + "p99": 156.76799416542053 }, "roundtrip": { - "p50": 1849.4080305099487, - "p90": 1862.7519607543945, - "p95": 1875.4240274429321, - "p99": 1930.5599927902222 + "p50": 268.12800765037537, + "p90": 272.99201488494873, + "p95": 274.6880054473877, + "p99": 278.78400683403015 }, "isolatedSum": { - "p50": 67.61600077152252, - "p90": 86.88000217080116, - "p95": 102.33600065112114, - "p99": 227.90400311350822 + "p50": 294.97601091861725, + "p90": 306.2720000743866, + "p95": 307.96799063682556, + "p99": 312.063992023468 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 7, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 31.775999814271927, - "p90": 37.856001406908035, - "p95": 43.007999658584595, - "p99": 52.2879995405674 + "p50": 199.42399859428406, + "p90": 204.41600680351257, + "p95": 207.87200331687927, + "p99": 219.93599832057953 }, "combine": { - "p50": 41.280001401901245, - "p90": 52.319999784231186, - "p95": 64.41599875688553, - "p99": 140.28799533843994 + "p50": 228.99200022220612, + "p90": 234.6239984035492, + "p95": 236.06400191783905, + "p99": 237.69600689411163 }, "roundtrip": { - "p50": 1854.848027229309, - "p90": 1876.3200044631958, - "p95": 1915.3599739074707, - "p99": 1982.6879501342773 + "p50": 400.89601278305054, + "p90": 405.11998534202576, + "p95": 406.49598836898804, + "p99": 409.88799929618835 }, "isolatedSum": { - "p50": 73.05600121617317, - "p90": 90.17600119113922, - "p95": 107.42399841547012, - "p99": 192.57599487900734 + "p50": 428.4159988164902, + "p90": 439.04000520706177, + "p95": 443.9360052347183, + "p99": 457.63200521469116 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 7, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 33.344000577926636, - "p90": 36.159999668598175, - "p95": 38.30400109291077, - "p99": 46.14400118589401 + "p50": 304.86398935317993, + "p90": 336.38399839401245, + "p95": 347.51999378204346, + "p99": 513.0239725112915 }, "combine": { - "p50": 46.30399867892265, - "p90": 56.223999708890915, - "p95": 66.49599969387054, - "p99": 109.24799740314484 + "p50": 366.33598804473877, + "p90": 372.0960021018982, + "p95": 374.91199374198914, + "p99": 474.7520089149475 }, "roundtrip": { - "p50": 1862.8159761428833, - "p90": 1875.2959966659546, - "p95": 1890.6559944152832, - "p99": 1946.6559886932373 + "p50": 644.0640091896057, + "p90": 650.9439945220947, + "p95": 653.823971748352, + "p99": 658.5919857025146 }, "isolatedSum": { - "p50": 79.64799925684929, - "p90": 92.38399937748909, - "p95": 104.80000078678131, - "p99": 155.39199858903885 + "p50": 671.1999773979187, + "p90": 708.4800004959106, + "p95": 722.4319875240326, + "p99": 987.775981426239 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 39.68000039458275, - "p90": 51.58400163054466, - "p95": 57.72799998521805, - "p99": 97.63199836015701 + "p50": 524.5440006256104, + "p90": 540.8959984779358, + "p95": 543.7120199203491, + "p99": 551.6800284385681 }, "combine": { - "p50": 60.70400029420853, - "p90": 75.29599964618683, - "p95": 94.2080020904541, - "p99": 319.7759985923767 + "p50": 630.8159828186035, + "p90": 639.8720145225525, + "p95": 642.4000263214111, + "p99": 648.1599807739258 }, "roundtrip": { - "p50": 1882.3360204696655, - "p90": 1892.0639753341675, - "p95": 1907.5520038604736, - "p99": 1997.3440170288086 + "p50": 1125.1519918441772, + "p90": 1138.8800144195557, + "p95": 1142.2719955444336, + "p99": 1151.6799926757812 }, "isolatedSum": { - "p50": 100.38400068879128, - "p90": 126.88000127673149, - "p95": 151.93600207567215, - "p99": 417.4079969525337 + "p50": 1155.3599834442139, + "p90": 1180.7680130004883, + "p95": 1186.1120462417603, + "p99": 1199.840009212494 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 51.552001386880875, - "p90": 55.07199838757515, - "p95": 59.007998555898666, - "p99": 66.11199676990509 + "p50": 1012.2560262680054, + "p90": 1040.9280061721802, + "p95": 1049.504041671753, + "p99": 1060.0320100784302 }, "combine": { - "p50": 86.43200248479843, - "p90": 93.08800101280212, - "p95": 100.89600086212158, - "p99": 167.10400581359863 + "p50": 1154.3359756469727, + "p90": 1163.2000207901, + "p95": 1166.8479442596436, + "p99": 1173.7279891967773 }, "roundtrip": { - "p50": 1921.3759899139404, - "p90": 1930.4640293121338, - "p95": 1935.968041419983, - "p99": 1968.6399698257446 + "p50": 2117.1839237213135, + "p90": 2141.8559551239014, + "p95": 2147.104024887085, + "p99": 2157.1199893951416 }, "isolatedSum": { - "p50": 137.9840038716793, - "p90": 148.15999940037727, - "p95": 159.90399941802025, - "p99": 233.21600258350372 + "p50": 2166.592001914978, + "p90": 2204.1280269622803, + "p95": 2216.3519859313965, + "p99": 2233.7599992752075 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 0, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -58690,49 +60399,50 @@ ] }, { - "id": "cx-1caa7ff5", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_df102230", - "comparisonKey": "2ce1d8f2e79d5005", + "id": "cx-32c90de8", + "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_ff7906f8", + "comparisonKey": "db866d0065c2a509", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:08.227503+00:00", + "generatedAt": "2026-06-26T23:51:05.825406+00:00", "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 LL (norm)", + "label": "H100 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { @@ -58742,8 +60452,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -58751,304 +60461,230 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254435010", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", - "createdAt": "2026-06-26T17:29:12Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271671786", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271671786", + "createdAt": "2026-06-26T23:51:05.825406+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 28.736000880599022, - "p90": 42.24000126123428, - "p95": 44.76799815893173, - "p99": 50.97600072622299 - }, - "combine": { - "p50": 37.087999284267426, - "p90": 44.256001710891724, - "p95": 49.6320016682148, - "p99": 65.60000032186508 - }, - "roundtrip": { - "p50": 1824.4800567626953, - "p90": 1831.7760229110718, - "p95": 1838.3680582046509, - "p99": 1884.1919898986816 - }, - "isolatedSum": { - "p50": 65.82400016486645, - "p90": 86.496002972126, - "p95": 94.39999982714653, - "p99": 116.57600104808807 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 28.00000086426735, - "p90": 33.31200033426285, - "p95": 37.66399994492531, - "p99": 50.36799982190132 - }, - "combine": { - "p50": 36.86400130391121, - "p90": 45.27999833226204, - "p95": 51.29599943757057, - "p99": 124.1919994354248 - }, - "roundtrip": { - "p50": 1824.9599933624268, - "p90": 1835.4239463806152, - "p95": 1843.8400030136108, - "p99": 1961.7279767990112 - }, - "isolatedSum": { - "p50": 64.86400216817856, - "p90": 78.59199866652489, - "p95": 88.95999938249588, - "p99": 174.55999925732613 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 28.48000079393387, - "p90": 33.59999880194664, - "p95": 36.41600161790848, - "p99": 42.33599826693535 + "p50": 112.96000331640244, + "p90": 118.78400295972824, + "p95": 120.28799951076508, + "p99": 130.40000200271606 }, "combine": { - "p50": 37.53599897027016, - "p90": 47.839999198913574, - "p95": 62.144000083208084, - "p99": 136.4479959011078 + "p50": 106.1440035700798, + "p90": 109.15199667215347, + "p95": 110.30399799346924, + "p99": 114.49600011110306 }, "roundtrip": { - "p50": 1825.8240222930908, - "p90": 1833.9519500732422, - "p95": 1842.0480489730835, - "p99": 1925.0880479812622 + "p50": 196.99199497699738, + "p90": 201.34399831295013, + "p95": 202.94399559497833, + "p99": 206.04799687862396 }, "isolatedSum": { - "p50": 66.01599976420403, - "p90": 81.43999800086021, - "p95": 98.56000170111656, - "p99": 178.78399416804314 + "p50": 219.10400688648224, + "p90": 227.9359996318817, + "p95": 230.5919975042343, + "p99": 244.89600211381912 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 4, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 28.28799933195114, - "p90": 32.00000151991844, - "p95": 34.55999866127968, - "p99": 39.744000881910324 + "p50": 149.21599626541138, + "p90": 155.03999590873718, + "p95": 157.05600380897522, + "p99": 159.4880074262619 }, "combine": { - "p50": 37.43999823927879, - "p90": 46.78399860858917, - "p95": 53.69599908590317, - "p99": 124.64000284671783 + "p50": 153.50399911403656, + "p90": 158.62399339675903, + "p95": 160.25599837303162, + "p99": 165.15199840068817 }, "roundtrip": { - "p50": 1826.3360261917114, - "p90": 1834.1439962387085, - "p95": 1840.1600122451782, - "p99": 1865.6320571899414 + "p50": 270.3999876976013, + "p90": 284.0000092983246, + "p95": 285.69599986076355, + "p99": 288.9600098133087 }, "isolatedSum": { - "p50": 65.72799757122993, - "p90": 78.78400012850761, - "p95": 88.25599774718285, - "p99": 164.38400372862816 + "p50": 302.71999537944794, + "p90": 313.6639893054962, + "p95": 317.31200218200684, + "p99": 324.6400058269501 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 5, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 31.136000528931618, - "p90": 34.94400158524513, - "p95": 37.856001406908035, - "p99": 46.39999940991402 + "p50": 201.92000269889832, + "p90": 212.5760018825531, + "p95": 214.59199488162994, + "p99": 217.8560048341751 }, "combine": { - "p50": 39.264000952243805, - "p90": 44.28799822926521, - "p95": 46.46399989724159, - "p99": 77.85599678754807 + "p50": 229.5999974012375, + "p90": 237.92000114917755, + "p95": 241.2479966878891, + "p99": 245.2159970998764 }, "roundtrip": { - "p50": 1830.4959535598755, - "p90": 1838.304042816162, - "p95": 1842.78404712677, - "p99": 1957.919955253601 + "p50": 404.2240083217621, + "p90": 417.5359904766083, + "p95": 419.3919897079468, + "p99": 424.1600036621094 }, "isolatedSum": { - "p50": 70.40000148117542, - "p90": 79.23199981451035, - "p95": 84.32000130414963, - "p99": 124.25599619746208 + "p50": 431.5200001001358, + "p90": 450.49600303173065, + "p95": 455.83999156951904, + "p99": 463.0720019340515 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 5, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 32.896000891923904, - "p90": 35.96799820661545, - "p95": 39.135999977588654, - "p99": 45.56800052523613 + "p50": 304.83201146125793, + "p90": 315.39198756217957, + "p95": 317.6319897174835, + "p99": 320.51199674606323 }, "combine": { - "p50": 45.791998505592346, - "p90": 54.016001522541046, - "p95": 83.0719992518425, - "p99": 153.56799960136414 + "p50": 367.48799681663513, + "p90": 376.96000933647156, + "p95": 381.9200098514557, + "p99": 392.192006111145 }, "roundtrip": { - "p50": 1840.1600122451782, - "p90": 1847.5840091705322, - "p95": 1853.9199829101562, - "p99": 1896.1600065231323 + "p50": 644.7039842605591, + "p90": 655.456006526947, + "p95": 677.951991558075, + "p99": 919.8399782180786 }, "isolatedSum": { - "p50": 78.68799939751625, - "p90": 89.9839997291565, - "p95": 122.20799922943115, - "p99": 199.13600012660027 + "p50": 672.3200082778931, + "p90": 692.3519968986511, + "p95": 699.5519995689392, + "p99": 712.7040028572083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 0, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 38.84800150990486, - "p90": 42.43199899792671, - "p95": 47.16800153255463, - "p99": 62.144000083208084 + "p50": 521.2799906730652, + "p90": 536.4800095558167, + "p95": 540.224015712738, + "p99": 549.3119955062866 }, "combine": { - "p50": 59.67999994754791, - "p90": 66.14399701356888, - "p95": 83.16799998283386, - "p99": 121.21599912643433 + "p50": 632.4160099029541, + "p90": 640.7679915428162, + "p95": 643.3600187301636, + "p99": 651.4559984207153 }, "roundtrip": { - "p50": 1859.5199584960938, - "p90": 1866.495966911316, - "p95": 1875.264048576355, - "p99": 1916.1280393600464 + "p50": 1126.431941986084, + "p90": 1137.8240585327148, + "p95": 1141.5679454803467, + "p99": 1157.6000452041626 }, "isolatedSum": { - "p50": 98.52800145745277, - "p90": 108.57599601149559, - "p95": 130.3360015153885, - "p99": 183.3599992096424 + "p50": 1153.6960005760193, + "p90": 1177.2480010986328, + "p95": 1183.5840344429016, + "p99": 1200.767993927002 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 4, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 51.80799961090088, - "p90": 55.135998874902725, - "p95": 59.776000678539276, - "p99": 68.83200258016586 + "p50": 1011.2960338592529, + "p90": 1036.895990371704, + "p95": 1044.3840026855469, + "p99": 1057.088017463684 }, "combine": { - "p50": 86.40000224113464, - "p90": 92.03200042247772, - "p95": 95.74399888515472, - "p99": 156.41599893569946 + "p50": 1154.8160314559937, + "p90": 1163.9360189437866, + "p95": 1166.5279865264893, + "p99": 1172.160029411316 }, "roundtrip": { - "p50": 1899.392008781433, - "p90": 1905.2480459213257, - "p95": 1909.440040588379, - "p99": 1973.3760356903076 + "p50": 2122.7200031280518, + "p90": 2144.9921131134033, + "p95": 2150.559902191162, + "p99": 2167.6158905029297 }, "isolatedSum": { - "p50": 138.20800185203552, - "p90": 147.16799929738045, - "p95": 155.519999563694, - "p99": 225.24800151586533 + "p50": 2166.1120653152466, + "p90": 2200.8320093154907, + "p95": 2210.911989212036, + "p99": 2229.248046875 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 4, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59056,34 +60692,35 @@ ] }, { - "id": "cx-5888aff1", - "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "a14fc35e02b01662", + "id": "cx-3c52549e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_16047c28", + "comparisonKey": "987d0ef30063bb5c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:49.842184+00:00", + "generatedAt": "2026-06-26T23:59:36.290170+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", + "runner": "h100-dgxc-slurm_11", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -59108,8 +60745,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -59117,9 +60754,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271748233", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271748233", - "createdAt": "2026-06-26T23:52:22Z", + "id": "28271938768", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271938768", + "createdAt": "2026-06-26T23:59:36.290170+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -59127,35 +60764,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 91.96799993515015, - "p90": 112.70400136709213, - "p95": 120.70400267839432, - "p99": 143.8400000333786 + "p50": 127.23200023174286, + "p90": 131.52000308036804, + "p95": 133.08799266815186, + "p99": 136.3839954137802 }, "combine": { - "p50": 83.29600095748901, - "p90": 93.40800344944, - "p95": 99.29600358009338, - "p99": 117.44000017642975 + "p50": 126.11199915409088, + "p90": 130.62399625778198, + "p95": 131.48799538612366, + "p99": 133.98399949073792 }, "roundtrip": { - "p50": 151.2639969587326, - "p90": 170.78399658203125, - "p95": 179.32799458503723, - "p99": 211.93599700927734 + "p50": 233.43999683856964, + "p90": 236.76800727844238, + "p95": 237.40799725055695, + "p99": 240.4160052537918 }, "isolatedSum": { - "p50": 175.26400089263916, - "p90": 206.11200481653214, - "p95": 220.0000062584877, - "p99": 261.28000020980835 + "p50": 253.34399938583374, + "p90": 262.14399933815, + "p95": 264.5759880542755, + "p99": 270.3679949045181 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 5, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59164,35 +60801,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 111.7120012640953, - "p90": 129.82399761676788, - "p95": 141.59999787807465, - "p99": 159.58400070667267 + "p50": 180.7039976119995, + "p90": 191.3280040025711, + "p95": 193.08799505233765, + "p99": 197.28000462055206 }, "combine": { - "p50": 104.35199737548828, - "p90": 119.93599683046341, - "p95": 123.83999675512314, - "p99": 136.22400164604187 + "p50": 183.26400220394135, + "p90": 190.97599387168884, + "p95": 192.3840045928955, + "p99": 197.66399264335632 }, "roundtrip": { - "p50": 195.42400538921356, - "p90": 218.4000015258789, - "p95": 231.51999711990356, - "p99": 307.16800689697266 + "p50": 332.15999603271484, + "p90": 344.35200691223145, + "p95": 346.3680148124695, + "p99": 348.83201122283936 }, "isolatedSum": { - "p50": 216.0639986395836, - "p90": 249.7599944472313, - "p95": 265.4399946331978, - "p99": 295.80800235271454 + "p50": 363.96799981594086, + "p90": 382.30399787425995, + "p95": 385.47199964523315, + "p99": 394.9439972639084 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 5, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59201,35 +60838,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 147.23199605941772, - "p90": 165.12000560760498, - "p95": 172.992005944252, - "p99": 204.6079933643341 + "p50": 272.41599559783936, + "p90": 284.0000092983246, + "p95": 286.46400570869446, + "p99": 290.1439964771271 }, "combine": { - "p50": 153.53600680828094, - "p90": 168.2240068912506, - "p95": 175.90400576591492, - "p99": 192.09599494934082 + "p50": 276.2239873409271, + "p90": 285.0880026817322, + "p95": 286.8799865245819, + "p99": 294.624000787735 }, "roundtrip": { - "p50": 270.8800137042999, - "p90": 295.1680123806, - "p95": 303.77599596977234, - "p99": 446.8800127506256 + "p50": 519.648015499115, + "p90": 533.2159996032715, + "p95": 535.1999998092651, + "p99": 538.0480289459229 }, "isolatedSum": { - "p50": 300.76800286769867, - "p90": 333.3440124988556, - "p95": 348.89601171016693, - "p99": 396.7039883136749 + "p50": 548.6399829387665, + "p90": 569.0880119800568, + "p95": 573.3439922332764, + "p99": 584.7679972648621 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 4, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59238,34 +60875,34 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 214.52799439430237, - "p90": 237.63200640678406, - "p95": 244.9920028448105, - "p99": 282.5919985771179 + "p50": 450.3679871559143, + "p90": 462.14398741722107, + "p95": 464.2559885978699, + "p99": 469.34399008750916 }, "combine": { - "p50": 249.08800423145294, - "p90": 261.0880136489868, - "p95": 267.8079903125763, - "p99": 287.7439856529236 + "p50": 469.11999583244324, + "p90": 477.53599286079407, + "p95": 479.0720045566559, + "p99": 484.0959906578064 }, "roundtrip": { - "p50": 438.27199935913086, - "p90": 458.24000239372253, - "p95": 469.88800168037415, - "p99": 508.1599950790405 + "p50": 892.3839926719666, + "p90": 904.3520092964172, + "p95": 909.0560078620911, + "p99": 1079.967975616455 }, "isolatedSum": { - "p50": 463.6159986257553, - "p90": 498.7200200557709, - "p95": 512.7999931573868, - "p99": 570.3359842300415 + "p50": 919.4879829883575, + "p90": 939.6799802780151, + "p95": 943.3279931545258, + "p99": 953.4399807453156 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -59275,35 +60912,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 352.1279990673065, - "p90": 375.0720024108887, - "p95": 387.1999979019165, - "p99": 523.360013961792 + "p50": 810.7200264930725, + "p90": 828.607976436615, + "p95": 831.3599824905396, + "p99": 837.2480273246765 }, "combine": { - "p50": 419.9039936065674, - "p90": 433.8560104370117, - "p95": 441.536009311676, - "p99": 501.6319751739502 + "p50": 854.8160195350647, + "p90": 863.6159896850586, + "p95": 865.9840226173401, + "p99": 870.3359961509705 }, "roundtrip": { - "p50": 744.5759773254395, - "p90": 766.4960026741028, - "p95": 777.3119807243347, - "p99": 837.7919793128967 + "p50": 1635.583996772766, + "p90": 1645.0239419937134, + "p95": 1648.095965385437, + "p99": 1656.7679643630981 }, "isolatedSum": { - "p50": 772.0319926738739, - "p90": 808.9280128479004, - "p95": 828.7360072135925, - "p99": 1024.9919891357422 + "p50": 1665.5360460281372, + "p90": 1692.2239661216736, + "p95": 1697.3440051078796, + "p99": 1707.584023475647 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59312,35 +60949,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 629.6319961547852, - "p90": 648.9279866218567, - "p95": 656.2560200691223, - "p99": 715.1039838790894 + "p50": 1546.623945236206, + "p90": 1554.0159940719604, + "p95": 1556.3839673995972, + "p99": 1562.559962272644 }, "combine": { - "p50": 754.368007183075, - "p90": 767.1359777450562, - "p95": 774.5919823646545, - "p99": 917.5040125846863 + "p50": 1599.552035331726, + "p90": 1609.2480421066284, + "p95": 1612.4800443649292, + "p99": 1621.6000318527222 }, "roundtrip": { - "p50": 1354.0480136871338, - "p90": 1376.4159679412842, - "p95": 1387.8079652786255, - "p99": 1428.8320541381836 + "p50": 3122.015953063965, + "p90": 3132.4799060821533, + "p95": 3136.352062225342, + "p99": 3144.4480419158936 }, "isolatedSum": { - "p50": 1384.00000333786, - "p90": 1416.0639643669128, - "p95": 1430.8480024337769, - "p99": 1632.6079964637756 + "p50": 3146.175980567932, + "p90": 3163.264036178589, + "p95": 3168.8640117645264, + "p99": 3184.159994125366 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 4, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59348,34 +60985,35 @@ ] }, { - "id": "cx-b183f57f", - "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "6953183723230449", + "id": "cx-05271e8a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", + "colorKey": "h100_16047c28", + "comparisonKey": "987d0ef30063bb5c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:18.715974+00:00", + "generatedAt": "2026-06-26T23:55:32.762651+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -59400,8 +61038,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", + "traceSignature": "9e6ac678a09f7f8", + "workloadId": "set:3:2dad1a73ff872905", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -59409,9 +61047,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271763623", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271763623", - "createdAt": "2026-06-26T23:52:49Z", + "id": "28271791847", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271791847", + "createdAt": "2026-06-26T23:55:32.762651+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -59419,72 +61057,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 97.69599884748459, - "p90": 105.95200210809708, - "p95": 110.68800091743469, - "p99": 117.37599968910217 - }, - "combine": { - "p50": 90.33600240945816, - "p90": 95.64799815416336, - "p95": 98.65599870681763, - "p99": 108.03200304508209 - }, - "roundtrip": { - "p50": 164.32000696659088, - "p90": 174.01599884033203, - "p95": 181.0240000486374, - "p99": 201.56799256801605 - }, - "isolatedSum": { - "p50": 188.03200125694275, - "p90": 201.60000026226044, - "p95": 209.34399962425232, - "p99": 225.40800273418427 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 124.79999661445618, - "p90": 143.96800100803375, - "p95": 151.96800231933594, - "p99": 176.57600343227386 + "p50": 141.9840008020401, + "p90": 148.15999567508698, + "p95": 150.43200552463531, + "p99": 159.71200168132782 }, "combine": { - "p50": 119.71200257539749, - "p90": 133.56800377368927, - "p95": 140.09599387645721, - "p99": 156.70399367809296 + "p50": 131.77600502967834, + "p90": 138.7840062379837, + "p95": 139.80799913406372, + "p99": 147.07200229167938 }, "roundtrip": { - "p50": 216.48000180721283, - "p90": 235.35999655723572, - "p95": 243.00800263881683, - "p99": 263.71198892593384 + "p50": 243.1039959192276, + "p90": 250.71999430656433, + "p95": 252.03201174736023, + "p99": 257.9840123653412 }, "isolatedSum": { - "p50": 244.51199918985367, - "p90": 277.536004781723, - "p95": 292.06399619579315, - "p99": 333.2799971103668 + "p50": 273.76000583171844, + "p90": 286.9440019130707, + "p95": 290.24000465869904, + "p99": 306.7840039730072 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 111104000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 3, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59493,72 +61094,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 161.6320013999939, - "p90": 176.54399573802948, - "p95": 185.47199666500092, - "p99": 204.96000349521637 - }, - "combine": { - "p50": 177.47199535369873, - "p90": 187.74400651454926, - "p95": 193.88799369335175, - "p99": 218.27200055122375 - }, - "roundtrip": { - "p50": 309.2159926891327, - "p90": 327.2320032119751, - "p95": 333.1199884414673, - "p99": 373.1519877910614 - }, - "isolatedSum": { - "p50": 339.1039967536926, - "p90": 364.28800225257874, - "p95": 379.35999035835266, - "p99": 423.2320040464401 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 223098880, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 242.97599494457245, - "p90": 263.264000415802, - "p95": 271.10400795936584, - "p99": 296.54398560523987 + "p50": 282.20799565315247, + "p90": 291.04000329971313, + "p95": 293.3439910411835, + "p99": 299.3920147418976 }, "combine": { - "p50": 279.6800136566162, - "p90": 291.55200719833374, - "p95": 296.7039942741394, - "p99": 321.82401418685913 + "p50": 282.71999955177307, + "p90": 287.4560058116913, + "p95": 288.9600098133087, + "p99": 297.5040078163147 }, "roundtrip": { - "p50": 498.30400943756104, - "p90": 516.0959959030151, - "p95": 529.4719934463501, - "p99": 696.6400146484375 + "p50": 530.239999294281, + "p90": 536.9600057601929, + "p95": 540.0320291519165, + "p99": 549.3119955062866 }, "isolatedSum": { - "p50": 522.6560086011887, - "p90": 554.8160076141357, - "p95": 567.8080022335052, - "p99": 618.367999792099 + "p50": 564.9279952049255, + "p90": 578.4960091114044, + "p95": 582.3040008544922, + "p99": 596.8960225582123 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 446730240, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 6, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59567,107 +61131,71 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 402.52798795700073, - "p90": 414.46399688720703, - "p95": 420.28799653053284, - "p99": 450.72001218795776 + "p50": 815.7439827919006, + "p90": 825.2800107002258, + "p95": 828.5760283470154, + "p99": 835.0080251693726 }, "combine": { - "p50": 478.7839949131012, - "p90": 488.22399973869324, - "p95": 490.4960095882416, - "p99": 499.07198548316956 + "p50": 857.9840064048767, + "p90": 866.27197265625, + "p95": 869.6320056915283, + "p99": 877.8560161590576 }, "roundtrip": { - "p50": 857.6639890670776, - "p90": 869.3439960479736, - "p95": 882.3680281639099, - "p99": 1592.25594997406 + "p50": 1642.5280570983887, + "p90": 1654.5920372009277, + "p95": 1658.944010734558, + "p99": 1692.7039623260498 }, "isolatedSum": { - "p50": 881.3119828701019, - "p90": 902.6879966259003, - "p95": 910.7840061187744, - "p99": 949.7919976711273 + "p50": 1673.7279891967773, + "p90": 1691.5519833564758, + "p95": 1698.2080340385437, + "p99": 1712.8640413284302 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 893634560, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 750.4640221595764, - "p90": 770.9119915962219, - "p95": 780.6079983711243, - "p99": 812.3199939727783 - }, - "combine": { - "p50": 873.1840252876282, - "p90": 885.6319785118103, - "p95": 893.4080004692078, - "p99": 941.9839978218079 - }, - "roundtrip": { - "p50": 1586.143970489502, - "p90": 1606.112003326416, - "p95": 1623.5840320587158, - "p99": 1662.7839803695679 - }, - "isolatedSum": { - "p50": 1623.6480474472046, - "p90": 1656.5439701080322, - "p95": 1674.015998840332, - "p99": 1754.3039917945862 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1786265600, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 } ] }, { - "id": "cx-96267e21", - "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "27afbf0ad63e86ca", + "id": "cx-06b4b084", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_0c515f8b", + "comparisonKey": "e2c5b47e428e10b6", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:01.688428+00:00", + "generatedAt": "2026-06-26T23:54:50.950252+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -59692,8 +61220,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", + "traceSignature": "7aa44c7b86748b9", + "workloadId": "set:3:388ff74baef05c72", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -59701,9 +61229,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271778692", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271778692", - "createdAt": "2026-06-26T23:53:16Z", + "id": "28271798809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271798809", + "createdAt": "2026-06-26T23:54:50.950252+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -59711,146 +61239,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 108.64000022411346, - "p90": 120.70400267839432, - "p95": 131.58400356769562, - "p99": 146.2399959564209 + "p50": 102.52799838781357, + "p90": 106.52799904346466, + "p95": 108.31999778747559, + "p99": 112.44799941778183 }, "combine": { - "p50": 95.71199864149094, - "p90": 103.67999970912933, - "p95": 112.73600161075592, - "p99": 121.50400131940842 + "p50": 81.31200075149536, + "p90": 88.128000497818, + "p95": 88.48000317811966, + "p99": 90.4960036277771 }, "roundtrip": { - "p50": 181.0240000486374, - "p90": 199.2959976196289, - "p95": 207.16799795627594, - "p99": 244.9280023574829 + "p50": 155.32800555229187, + "p90": 160.92799603939056, + "p95": 161.79199516773224, + "p99": 165.40800034999847 }, "isolatedSum": { - "p50": 204.3519988656044, - "p90": 224.38400238752365, - "p95": 244.32000517845154, - "p99": 267.7439972758293 + "p50": 183.83999913930893, + "p90": 194.65599954128265, + "p95": 196.80000096559525, + "p99": 202.94400304555893 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 135.29600203037262, - "p90": 148.00000190734863, - "p95": 157.72800147533417, - "p99": 182.20800161361694 + "p50": 124.95999783277512, + "p90": 130.36799430847168, + "p95": 131.9040060043335, + "p99": 142.17600226402283 }, "combine": { - "p50": 128.31999361515045, - "p90": 139.74399864673615, - "p95": 145.7280069589615, - "p99": 158.75199437141418 - }, - "roundtrip": { - "p50": 235.6480062007904, - "p90": 248.6400008201599, - "p95": 259.16799902915955, - "p99": 301.60000920295715 - }, - "isolatedSum": { - "p50": 263.61599564552307, - "p90": 287.7440005540848, - "p95": 303.45600843429565, - "p99": 340.9599959850311 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 184.1920018196106, - "p90": 199.8080015182495, - "p95": 208.48000049591064, - "p99": 231.90400004386902 - }, - "combine": { - "p50": 198.62399995326996, - "p90": 212.0320051908493, - "p95": 221.18400037288666, - "p99": 289.7599935531616 - }, - "roundtrip": { - "p50": 349.4719862937927, - "p90": 366.3040101528168, - "p95": 376.8320083618164, - "p99": 431.2959909439087 - }, - "isolatedSum": { - "p50": 382.81600177288055, - "p90": 411.8400067090988, - "p95": 429.6640008687973, - "p99": 521.6639935970306 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 278.0799865722656, - "p90": 296.671986579895, - "p95": 305.759996175766, - "p99": 346.8799889087677 - }, - "combine": { - "p50": 313.1519854068756, - "p90": 324.6079981327057, - "p95": 331.9680094718933, - "p99": 350.5600094795227 + "p50": 128.7039965391159, + "p90": 130.43199479579926, + "p95": 136.80000603199005, + "p99": 147.67999947071075 }, "roundtrip": { - "p50": 563.1999969482422, - "p90": 577.9839754104614, - "p95": 589.5040035247803, - "p99": 688.9920234680176 + "p50": 216.25599265098572, + "p90": 220.57600319385529, + "p95": 223.4880030155182, + "p99": 267.8399980068207 }, "isolatedSum": { - "p50": 591.2319719791412, - "p90": 621.2799847126007, - "p95": 637.7280056476593, - "p99": 697.4399983882904 + "p50": 253.66399437189102, + "p90": 260.79998910427094, + "p95": 268.70401203632355, + "p99": 289.8560017347336 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59859,72 +61313,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 465.8240079879761, - "p90": 484.3200147151947, - "p95": 496.2559938430786, - "p99": 558.8799715042114 - }, - "combine": { - "p50": 544.3519949913025, - "p90": 560.1599812507629, - "p95": 564.9600028991699, - "p99": 624.0959763526917 - }, - "roundtrip": { - "p50": 981.0879826545715, - "p90": 996.3520169258118, - "p95": 1007.7439546585083, - "p99": 1077.1839618682861 - }, - "isolatedSum": { - "p50": 1010.1760029792786, - "p90": 1044.4799959659576, - "p95": 1061.2159967422485, - "p99": 1182.975947856903 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 858.2080006599426, - "p90": 877.9839873313904, - "p95": 884.0000033378601, - "p99": 925.6640076637268 + "p50": 205.6639939546585, + "p90": 211.13599836826324, + "p95": 216.48000180721283, + "p99": 269.1200077533722 }, "combine": { - "p50": 981.98401927948, - "p90": 994.4959878921509, - "p95": 1000.9280443191528, - "p99": 1111.9040250778198 + "p50": 295.80798745155334, + "p90": 300.54399371147156, + "p95": 305.2160143852234, + "p99": 337.3439908027649 }, "roundtrip": { - "p50": 1810.1119995117188, - "p90": 1826.0159492492676, - "p95": 1833.7279558181763, - "p99": 1947.551965713501 + "p50": 464.4800126552582, + "p90": 471.45599126815796, + "p95": 474.047988653183, + "p99": 503.35997343063354 }, "isolatedSum": { - "p50": 1840.1920199394226, - "p90": 1872.4799752235413, - "p95": 1884.928047657013, - "p99": 2037.5680327415466 + "p50": 501.47198140621185, + "p90": 511.6799920797348, + "p95": 521.6960161924362, + "p99": 606.4639985561371 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -59932,16 +61349,16 @@ ] }, { - "id": "cx-bc48bfe5", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", - "colorKey": "h200_d982b749", - "comparisonKey": "6da1f9e2ab025dbe", + "id": "cx-4058f6f5", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_c0c0ad86", + "comparisonKey": "252e0af9287be53d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:28.417730+00:00", + "generatedAt": "2026-06-26T23:59:35.979250+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -59949,20 +61366,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -59984,18 +61402,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "157ca81687ddb63", - "workloadId": "set:3:a426d66e479dc893", + "traceSignature": "df54a9510825f71", + "workloadId": "set:6:2dad1a73ff872905", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271827040", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271827040", - "createdAt": "2026-06-26T23:54:52Z", + "id": "28271942138", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271942138", + "createdAt": "2026-06-26T23:59:35.979250+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -60003,35 +61421,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.80000275373459, - "p90": 135.80800592899323, - "p95": 142.14399456977844, - "p99": 172.7679967880249 + "p50": 94.14400160312653, + "p90": 98.01600128412247, + "p95": 99.74399954080582, + "p99": 103.29599678516388 }, "combine": { - "p50": 104.35199737548828, - "p90": 121.56800180673599, - "p95": 125.72799623012543, - "p99": 150.65599977970123 + "p50": 83.03999900817871, + "p90": 88.22400122880936, + "p95": 89.15200084447861, + "p99": 90.81599861383438 }, "roundtrip": { - "p50": 195.77600061893463, - "p90": 216.22399985790253, - "p95": 222.9440063238144, - "p99": 267.67998933792114 + "p50": 157.79200196266174, + "p90": 161.9199961423874, + "p95": 163.5199934244156, + "p99": 167.67999529838562 }, "isolatedSum": { - "p50": 221.15200012922287, - "p90": 257.3760077357292, - "p95": 267.87199079990387, - "p99": 323.42399656772614 + "p50": 177.18400061130524, + "p90": 186.24000251293182, + "p95": 188.89600038528442, + "p99": 194.11199539899826 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 122.40000069141388, + "p90": 129.95199859142303, + "p95": 143.10400187969208, + "p99": 173.95199835300446 + }, + "combine": { + "p50": 104.41599786281586, + "p90": 106.65600001811981, + "p95": 120.51200121641159, + "p99": 144.28800344467163 + }, + "roundtrip": { + "p50": 198.43199849128723, + "p90": 202.36800611019135, + "p95": 205.1839977502823, + "p99": 235.32800376415253 + }, + "isolatedSum": { + "p50": 226.81599855422974, + "p90": 236.60799860954285, + "p95": 263.61600309610367, + "p99": 318.2400017976761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60040,71 +61495,145 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 202.36800611019135, - "p90": 223.1999933719635, - "p95": 231.58399760723114, - "p99": 263.7439966201782 + "p50": 150.43200552463531, + "p90": 154.7199934720993, + "p95": 158.4320068359375, + "p99": 386.1120045185089 }, "combine": { - "p50": 223.93600642681122, - "p90": 236.32000386714935, - "p95": 241.88800156116486, - "p99": 258.7839961051941 + "p50": 141.15199446678162, + "p90": 145.91999351978302, + "p95": 146.55999839305878, + "p99": 147.5200057029724 }, "roundtrip": { - "p50": 399.58399534225464, - "p90": 417.279988527298, - "p95": 424.4160056114197, - "p99": 459.77601408958435 + "p50": 266.1440074443817, + "p90": 274.9119997024536, + "p95": 278.3679962158203, + "p99": 286.9440019130707 }, "isolatedSum": { - "p50": 426.30401253700256, - "p90": 459.51999723911285, - "p95": 473.471999168396, - "p99": 522.5279927253723 + "p50": 291.58399999141693, + "p90": 300.6399869918823, + "p95": 304.9920052289963, + "p99": 533.6320102214813 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 207.61600136756897, + "p90": 213.44000101089478, + "p95": 217.98400580883026, + "p99": 245.5040067434311 + }, + "combine": { + "p50": 219.93599832057953, + "p90": 225.0880002975464, + "p95": 227.2000014781952, + "p99": 244.86400187015533 + }, + "roundtrip": { + "p50": 405.023992061615, + "p90": 410.0480079650879, + "p95": 412.31998801231384, + "p99": 437.6640021800995 + }, + "isolatedSum": { + "p50": 427.5519996881485, + "p90": 438.52800130844116, + "p95": 445.18400728702545, + "p99": 490.3680086135864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 542.5919890403748, - "p90": 557.5039982795715, - "p95": 563.3280277252197, - "p99": 587.8080129623413 + "p50": 329.47200536727905, + "p90": 336.60799264907837, + "p95": 339.04001116752625, + "p99": 460.4159891605377 }, "combine": { - "p50": 619.1999912261963, - "p90": 634.5599889755249, - "p95": 646.3040113449097, - "p99": 683.8080286979675 + "p50": 368.3199882507324, + "p90": 375.2639889717102, + "p95": 377.6960074901581, + "p99": 383.07198882102966 }, "roundtrip": { - "p50": 1131.1999559402466, - "p90": 1146.720051765442, - "p95": 1155.743956565857, - "p99": 1289.952039718628 + "p50": 670.0159907341003, + "p90": 675.8400201797485, + "p95": 678.3360242843628, + "p99": 682.3359727859497 }, "isolatedSum": { - "p50": 1161.791980266571, - "p90": 1192.0639872550964, - "p95": 1209.6320390701294, - "p99": 1271.6160416603088 + "p50": 697.7919936180115, + "p90": 711.8719816207886, + "p95": 716.7360186576843, + "p99": 843.4879779815674 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 580.8960199356079, + "p90": 590.1119709014893, + "p95": 592.6079750061035, + "p99": 597.5040197372437 + }, + "combine": { + "p50": 647.9039788246155, + "p90": 655.0719738006592, + "p95": 657.2480201721191, + "p99": 660.863995552063 + }, + "roundtrip": { + "p50": 1207.4559926986694, + "p90": 1217.087984085083, + "p95": 1224.0639925003052, + "p99": 1241.312026977539 + }, + "isolatedSum": { + "p50": 1228.7999987602234, + "p90": 1245.1839447021484, + "p95": 1249.8559951782227, + "p99": 1258.3680152893066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -60113,16 +61642,16 @@ ] }, { - "id": "cx-5553e87c", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_d982b749", - "comparisonKey": "6da1f9e2ab025dbe", + "id": "cx-b89c63a5", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "colorKey": "h100_b654f9b2", + "comparisonKey": "37db9a5137981152", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:31.030615+00:00", + "generatedAt": "2026-06-26T23:55:36.358305+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -60130,17 +61659,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -60165,8 +61695,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "38fd0bcf7109c32", + "workloadId": "set:3:b952d4a43d688b50", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -60174,9 +61704,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271605214", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271605214", - "createdAt": "2026-06-26T23:47:59Z", + "id": "28271820121", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271820121", + "createdAt": "2026-06-26T23:55:36.358305+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -60184,72 +61714,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.64000153541565, - "p90": 132.9600065946579, - "p95": 139.80799913406372, - "p99": 183.1039935350418 + "p50": 123.61600250005722, + "p90": 127.48800218105316, + "p95": 131.1040073633194, + "p99": 136.19199395179749 }, "combine": { - "p50": 106.11200332641602, - "p90": 121.08799815177917, - "p95": 127.61600315570831, - "p99": 162.7199947834015 + "p50": 116.95999652147293, + "p90": 122.46400117874146, + "p95": 124.95999783277512, + "p99": 131.26400113105774 }, "roundtrip": { - "p50": 197.11999595165253, - "p90": 216.67200326919556, - "p95": 225.2800017595291, - "p99": 246.75199389457703 + "p50": 217.72800385951996, + "p90": 224.89599883556366, + "p95": 229.24800217151642, + "p99": 245.37600576877594 }, "isolatedSum": { - "p50": 222.75200486183167, - "p90": 254.04800474643707, - "p95": 267.42400228977203, - "p99": 345.8239883184433 + "p50": 240.57599902153015, + "p90": 249.95200335979462, + "p95": 256.0640051960945, + "p99": 267.4559950828552 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 143.8719928264618, - "p90": 166.143998503685, - "p95": 172.7360039949417, - "p99": 195.8719938993454 - }, - "combine": { - "p50": 143.327996134758, - "p90": 159.743994474411, - "p95": 162.81600296497345, - "p99": 171.7119961977005 - }, - "roundtrip": { - "p50": 260.70401072502136, - "p90": 280.8319926261902, - "p95": 286.27198934555054, - "p99": 329.3119966983795 - }, - "isolatedSum": { - "p50": 287.1999889612198, - "p90": 325.887992978096, - "p95": 335.55200695991516, - "p99": 367.5839900970459 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60258,71 +61751,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 203.23200523853302, - "p90": 227.00800001621246, - "p95": 239.07199501991272, - "p99": 277.1199941635132 - }, - "combine": { - "p50": 224.60800409317017, - "p90": 241.31199717521667, - "p95": 248.44799935817719, - "p99": 268.22400093078613 - }, - "roundtrip": { - "p50": 403.0719995498657, - "p90": 426.68798565864563, - "p95": 434.4640076160431, - "p99": 486.01600527763367 - }, - "isolatedSum": { - "p50": 427.8400093317032, - "p90": 468.31999719142914, - "p95": 487.5199943780899, - "p99": 545.3439950942993 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 314.7520124912262, - "p90": 335.90400218963623, - "p95": 347.51999378204346, - "p99": 390.9119963645935 + "p50": 238.91200125217438, + "p90": 257.24801421165466, + "p95": 259.2960000038147, + "p99": 261.9520127773285 }, "combine": { - "p50": 357.9519987106323, - "p90": 372.1280097961426, - "p95": 378.9440095424652, - "p99": 416.6080057621002 + "p50": 271.93599939346313, + "p90": 282.1759879589081, + "p95": 284.8320007324219, + "p99": 288.5119915008545 }, "roundtrip": { - "p50": 646.7199921607971, - "p90": 668.3200001716614, - "p95": 684.4800114631653, - "p99": 754.4959783554077 + "p50": 486.04801297187805, + "p90": 500.8959770202637, + "p95": 503.55201959609985, + "p99": 509.2160105705261 }, "isolatedSum": { - "p50": 672.7040112018585, - "p90": 708.0320119857788, - "p95": 726.4640033245087, - "p99": 807.5200021266937 + "p50": 510.8480006456375, + "p90": 539.4240021705627, + "p95": 544.1280007362366, + "p99": 550.464004278183 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -60332,72 +61788,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 542.0799851417542, - "p90": 560.1279735565186, - "p95": 575.3600001335144, - "p99": 736.2880110740662 - }, - "combine": { - "p50": 621.8879818916321, - "p90": 636.031985282898, - "p95": 641.6959762573242, - "p99": 732.7359914779663 - }, - "roundtrip": { - "p50": 1137.279987335205, - "p90": 1170.591950416565, - "p95": 1213.7600183486938, - "p99": 1369.6320056915283 - }, - "isolatedSum": { - "p50": 1163.9679670333862, - "p90": 1196.1599588394165, - "p95": 1217.0559763908386, - "p99": 1469.0240025520325 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 997.3120093345642, - "p90": 1021.28005027771, - "p95": 1029.7919511795044, - "p99": 1212.8000259399414 + "p50": 718.3039784431458, + "p90": 732.3840260505676, + "p95": 736.3520264625549, + "p99": 740.4159903526306 }, "combine": { - "p50": 1121.6000318527222, - "p90": 1139.456033706665, - "p95": 1149.2160558700562, - "p99": 1185.4079961776733 + "p50": 829.9520015716553, + "p90": 838.047981262207, + "p95": 840.2559757232666, + "p99": 846.6879725456238 }, "roundtrip": { - "p50": 2089.888095855713, - "p90": 2112.6720905303955, - "p95": 2126.431941986084, - "p99": 2277.951955795288 + "p50": 1516.2559747695923, + "p90": 1525.3759622573853, + "p95": 1528.223991394043, + "p99": 1535.2319478988647 }, "isolatedSum": { - "p50": 2118.9120411872864, - "p90": 2160.736083984375, - "p95": 2179.0080070495605, - "p99": 2398.2080221176147 + "p50": 1548.255980014801, + "p90": 1570.4320073127747, + "p95": 1576.6080021858215, + "p99": 1587.1039628982544 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60405,34 +61824,35 @@ ] }, { - "id": "cx-71f62108", - "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "c80c3e7446de9680", + "id": "cx-fa73d33e", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_b654f9b2", + "comparisonKey": "37db9a5137981152", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:05.486154+00:00", + "generatedAt": "2026-06-27T00:01:55.460957+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -60457,8 +61877,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "bfbb64a166e9f1c", + "workloadId": "set:6:b952d4a43d688b50", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -60466,9 +61886,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271618490", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271618490", - "createdAt": "2026-06-26T23:48:27Z", + "id": "28272012738", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272012738", + "createdAt": "2026-06-27T00:01:55.460957+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -60476,35 +61896,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.68800222873688, - "p90": 127.3919939994812, - "p95": 133.31200182437897, - "p99": 144.57599818706512 + "p50": 129.08799946308136, + "p90": 131.42399489879608, + "p95": 132.03200697898865, + "p99": 135.903999209404 }, "combine": { - "p50": 105.8880016207695, - "p90": 112.76800185441971, - "p95": 117.79200285673141, - "p99": 129.72800433635712 + "p50": 119.87199634313583, + "p90": 121.98399752378464, + "p95": 122.36800044775009, + "p99": 125.72799623012543 }, "roundtrip": { - "p50": 199.35999810695648, - "p90": 209.4399929046631, - "p95": 215.7440036535263, - "p99": 257.82400369644165 + "p50": 219.200000166893, + "p90": 223.80800545215607, + "p95": 224.7679978609085, + "p99": 228.0000001192093 }, "isolatedSum": { - "p50": 224.57600384950638, - "p90": 240.1599958539009, - "p95": 251.10400468111038, - "p99": 274.30400252342224 + "p50": 248.9599958062172, + "p90": 253.40799242258072, + "p95": 254.40000742673874, + "p99": 261.6319954395294 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 1, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60513,35 +61933,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 145.4080045223236, - "p90": 154.91199493408203, - "p95": 159.2639982700348, - "p99": 170.6559956073761 + "p50": 163.96799683570862, + "p90": 168.19199919700623, + "p95": 170.43200135231018, + "p99": 173.12000691890717 }, "combine": { - "p50": 144.3520039319992, - "p90": 150.59199929237366, - "p95": 153.05599570274353, - "p99": 167.4879938364029 + "p50": 171.55200242996216, + "p90": 176.83200538158417, + "p95": 178.3680021762848, + "p99": 180.60800433158875 }, "roundtrip": { - "p50": 263.5200023651123, - "p90": 270.3680098056793, - "p95": 274.7200131416321, - "p99": 291.1039888858795 + "p50": 306.7840039730072, + "p90": 310.94399094581604, + "p95": 312.3199939727783, + "p99": 314.7839903831482 }, "isolatedSum": { - "p50": 289.7600084543228, - "p90": 305.5039942264557, - "p95": 312.3199939727783, - "p99": 338.143989443779 + "p50": 335.5199992656708, + "p90": 345.0240045785904, + "p95": 348.80000352859497, + "p99": 353.7280112504959 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60550,35 +61970,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 205.1839977502823, - "p90": 219.04000639915466, - "p95": 227.743998169899, - "p99": 242.5920069217682 + "p50": 237.34399676322937, + "p90": 242.11199581623077, + "p95": 244.1920042037964, + "p99": 248.28800559043884 }, "combine": { - "p50": 221.50400280952454, - "p90": 232.96000063419342, - "p95": 239.58399891853333, - "p99": 263.0400061607361 + "p50": 268.22400093078613, + "p90": 273.53599667549133, + "p95": 274.84801411628723, + "p99": 277.69601345062256 }, "roundtrip": { - "p50": 397.8239893913269, - "p90": 412.03200817108154, - "p95": 421.08801007270813, - "p99": 463.8400077819824 + "p50": 482.7519953250885, + "p90": 488.44799399375916, + "p95": 490.4319941997528, + "p99": 495.07200717926025 }, "isolatedSum": { - "p50": 426.6880005598068, - "p90": 452.0000070333481, - "p95": 467.3279970884323, - "p99": 505.6320130825043 + "p50": 505.5679976940155, + "p90": 515.6479924917221, + "p95": 519.0400183200836, + "p99": 525.9840190410614 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60587,35 +62007,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 316.0000145435333, - "p90": 324.0959942340851, - "p95": 328.99200916290283, - "p99": 351.6159951686859 + "p50": 391.61598682403564, + "p90": 397.5679874420166, + "p95": 399.9040126800537, + "p99": 407.1039855480194 }, "combine": { - "p50": 350.17600655555725, - "p90": 358.5599958896637, - "p95": 363.2960021495819, - "p99": 392.8639888763428 + "p50": 455.6480050086975, + "p90": 461.5359902381897, + "p95": 463.0720019340515, + "p99": 466.5600061416626 }, "roundtrip": { - "p50": 639.4559741020203, - "p90": 655.1039814949036, - "p95": 665.3760075569153, - "p99": 768.8000202178955 + "p50": 823.2960104942322, + "p90": 829.5040130615234, + "p95": 831.5839767456055, + "p99": 835.4560136795044 }, "isolatedSum": { - "p50": 666.1760210990906, - "p90": 682.6559901237488, - "p95": 692.2880113124847, - "p99": 744.4799840450287 + "p50": 847.2639918327332, + "p90": 859.1039776802063, + "p95": 862.9760146141052, + "p99": 873.663991689682 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 1, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60624,35 +62044,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 527.4559855461121, - "p90": 543.936014175415, - "p95": 551.3280034065247, - "p99": 568.5439705848694 + "p50": 720.6720113754272, + "p90": 733.6320281028748, + "p95": 737.5680208206177, + "p99": 744.9280023574829 }, "combine": { - "p50": 612.384021282196, - "p90": 627.3279786109924, - "p95": 639.519989490509, - "p99": 984.5119714736938 + "p50": 825.7279992103577, + "p90": 834.559977054596, + "p95": 837.3759984970093, + "p99": 841.2479758262634 }, "roundtrip": { - "p50": 1111.6160154342651, - "p90": 1130.6240558624268, - "p95": 1139.7759914398193, - "p99": 1297.5679636001587 + "p50": 1514.240026473999, + "p90": 1523.7120389938354, + "p95": 1526.6239643096924, + "p99": 1534.3999862670898 }, "isolatedSum": { - "p50": 1139.840006828308, - "p90": 1171.2639927864075, - "p95": 1190.8479928970337, - "p99": 1553.0559420585632 + "p50": 1546.400010585785, + "p90": 1568.1920051574707, + "p95": 1574.944019317627, + "p99": 1586.1759781837463 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 1, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60661,35 +62081,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 994.4959878921509, - "p90": 1017.6960229873657, - "p95": 1024.7360467910767, - "p99": 1044.8640584945679 + "p50": 1379.1359663009644, + "p90": 1390.1439905166626, + "p95": 1393.280029296875, + "p99": 1400.480031967163 }, "combine": { - "p50": 1103.9680242538452, - "p90": 1115.7439947128296, - "p95": 1122.3039627075195, - "p99": 1306.1439990997314 + "p50": 1540.5759811401367, + "p90": 1547.4879741668701, + "p95": 1549.7599840164185, + "p99": 1553.1519651412964 }, "roundtrip": { - "p50": 2064.448118209839, - "p90": 2089.344024658203, - "p95": 2106.0800552368164, - "p99": 2285.504102706909 + "p50": 2893.3119773864746, + "p90": 2902.30393409729, + "p95": 2905.695915222168, + "p99": 2912.480115890503 }, "isolatedSum": { - "p50": 2098.464012145996, - "p90": 2133.4400177001953, - "p95": 2147.040009498596, - "p99": 2351.0080575942993 + "p50": 2919.711947441101, + "p90": 2937.6319646835327, + "p95": 2943.0400133132935, + "p99": 2953.6319971084595 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60697,37 +62117,38 @@ ] }, { - "id": "cx-a8fb4d9b", - "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "f6581a3621ac6cd2", + "id": "cx-e91dfe75", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_456a963c", + "comparisonKey": "54b53207b090a644", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:25.459367+00:00", + "generatedAt": "2026-06-27T00:01:57.841646+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16", + "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -60749,18 +62170,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", + "traceSignature": "29ae5ace13636f8", + "workloadId": "set:6:b952d4a43d688b50", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271732597", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271732597", - "createdAt": "2026-06-26T23:51:54Z", + "id": "28272016505", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272016505", + "createdAt": "2026-06-27T00:01:57.841646+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -60768,35 +62189,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 115.90400338172913, - "p90": 134.14399325847626, - "p95": 140.35199582576752, - "p99": 160.38399934768677 + "p50": 109.69600081443787, + "p90": 113.98400366306305, + "p95": 115.77600240707397, + "p99": 122.43200093507767 }, "combine": { - "p50": 104.09600287675858, - "p90": 119.71200257539749, - "p95": 124.64000284671783, - "p99": 145.31199634075165 + "p50": 105.50399869680405, + "p90": 111.10399663448334, + "p95": 112.31999844312668, + "p99": 114.27199840545654 }, "roundtrip": { - "p50": 195.64799964427948, - "p90": 212.8639966249466, - "p95": 219.9999988079071, - "p99": 230.3680032491684 + "p50": 196.6720074415207, + "p90": 203.2960057258606, + "p95": 204.0960043668747, + "p99": 207.64799416065216 }, "isolatedSum": { - "p50": 220.0000062584877, - "p90": 253.85599583387375, - "p95": 264.99199867248535, - "p99": 305.6959956884384 + "p50": 215.1999995112419, + "p90": 225.0880002975464, + "p95": 228.09600085020065, + "p99": 236.7039993405342 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60805,35 +62226,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 145.53600549697876, - "p90": 162.30399906635284, - "p95": 170.3999936580658, - "p99": 184.64000523090363 + "p50": 144.44799721240997, + "p90": 149.63200688362122, + "p95": 151.2320041656494, + "p99": 155.83999454975128 }, "combine": { - "p50": 143.77599954605103, - "p90": 157.21599757671356, - "p95": 162.27200627326965, - "p99": 175.64800381660461 + "p50": 152.0639955997467, + "p90": 153.60000729560852, + "p95": 154.4640064239502, + "p99": 158.52800011634827 }, "roundtrip": { - "p50": 265.1199996471405, - "p90": 283.90398621559143, - "p95": 289.0239953994751, - "p99": 302.0159900188446 + "p50": 265.0560140609741, + "p90": 268.92799139022827, + "p95": 270.687997341156, + "p99": 273.21600914001465 }, "isolatedSum": { - "p50": 289.3120050430298, - "p90": 319.5199966430664, - "p95": 332.67199993133545, - "p99": 360.28800904750824 + "p50": 296.5119928121567, + "p90": 303.23201417922974, + "p95": 305.6960105895996, + "p99": 314.36799466609955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 5, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60842,35 +62263,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 205.4399996995926, - "p90": 224.0000069141388, - "p95": 232.06399381160736, - "p99": 262.719988822937 + "p50": 199.64799284934998, + "p90": 203.45599949359894, + "p95": 204.79999482631683, + "p99": 208.76799523830414 }, "combine": { - "p50": 225.0880002975464, - "p90": 243.96799504756927, - "p95": 250.0160038471222, - "p99": 335.55200695991516 + "p50": 228.5120040178299, + "p90": 234.23999547958374, + "p95": 235.167995095253, + "p99": 236.95999383926392 }, "roundtrip": { - "p50": 403.55199575424194, - "p90": 432.8959882259369, - "p95": 447.1360146999359, - "p99": 589.6000266075134 + "p50": 403.80799770355225, + "p90": 408.35198760032654, + "p95": 410.0799858570099, + "p99": 413.88800740242004 }, "isolatedSum": { - "p50": 430.527999997139, - "p90": 467.96800196170807, - "p95": 482.07999765872955, - "p99": 598.2719957828522 + "p50": 428.15999686717987, + "p90": 437.6959949731827, + "p95": 439.9679899215698, + "p99": 445.72798907756805 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 5, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60879,35 +62300,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 312.79999017715454, - "p90": 334.7199857234955, - "p95": 340.1919901371002, - "p99": 368.73599886894226 + "p50": 305.4080009460449, + "p90": 310.016006231308, + "p95": 311.7760121822357, + "p99": 316.76799058914185 }, "combine": { - "p50": 356.1280071735382, - "p90": 367.45598912239075, - "p95": 372.6719915866852, - "p99": 395.77600359916687 + "p50": 367.19998717308044, + "p90": 374.0159869194031, + "p95": 375.5199909210205, + "p99": 379.2960047721863 }, "roundtrip": { - "p50": 643.1999802589417, - "p90": 657.3759913444519, - "p95": 663.7439727783203, - "p99": 708.1599831581116 + "p50": 649.1199731826782, + "p90": 655.6479930877686, + "p95": 658.4640145301819, + "p99": 661.9840264320374 }, "isolatedSum": { - "p50": 668.9279973506927, - "p90": 702.1759748458862, - "p95": 712.8639817237854, - "p99": 764.5120024681091 + "p50": 672.6079881191254, + "p90": 684.0319931507111, + "p95": 687.2960031032562, + "p99": 696.0639953613281 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 6, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60916,35 +62337,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 523.2639908790588, - "p90": 541.9520139694214, - "p95": 552.2559881210327, - "p99": 611.3280057907104 + "p50": 528.8640260696411, + "p90": 539.3919944763184, + "p95": 543.8079833984375, + "p99": 805.9520125389099 }, "combine": { - "p50": 611.0079884529114, - "p90": 623.0080127716064, - "p95": 630.3359866142273, - "p99": 657.2480201721191 + "p50": 633.184015750885, + "p90": 640.9919857978821, + "p95": 643.9039707183838, + "p99": 648.5440135002136 }, "roundtrip": { - "p50": 1108.7679862976074, - "p90": 1123.9999532699585, - "p95": 1132.3200464248657, - "p99": 1233.63196849823 + "p50": 1132.032036781311, + "p90": 1143.8720226287842, + "p95": 1147.3920345306396, + "p99": 1154.8160314559937 }, "isolatedSum": { - "p50": 1134.2719793319702, - "p90": 1164.9600267410278, - "p95": 1182.59197473526, - "p99": 1268.5760259628296 + "p50": 1162.0480418205261, + "p90": 1180.3839802742004, + "p95": 1187.7119541168213, + "p99": 1454.4960260391235 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, "recvTokensMax": 10883, - "stragglerRank": 5, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60953,35 +62374,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 984.5119714736938, - "p90": 1019.4560289382935, - "p95": 1036.128044128418, - "p99": 1103.0399799346924 + "p50": 985.8880043029785, + "p90": 1005.5680274963379, + "p95": 1010.9119415283203, + "p99": 1020.5440521240234 }, "combine": { - "p50": 1114.6559715270996, - "p90": 1129.472017288208, - "p95": 1136.896014213562, - "p99": 1180.3200244903564 + "p50": 1144.1919803619385, + "p90": 1153.92005443573, + "p95": 1157.439947128296, + "p99": 1163.6799573898315 }, "roundtrip": { - "p50": 2057.408094406128, - "p90": 2091.423988342285, - "p95": 2103.264093399048, - "p99": 2406.8479537963867 + "p50": 2094.464063644409, + "p90": 2109.8880767822266, + "p95": 2115.295886993408, + "p99": 2124.5760917663574 }, "isolatedSum": { - "p50": 2099.1679430007935, - "p90": 2148.9280462265015, - "p95": 2173.02405834198, - "p99": 2283.360004425049 + "p50": 2130.079984664917, + "p90": 2159.488081932068, + "p95": 2168.351888656616, + "p99": 2184.224009513855 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 6, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -60989,16 +62410,16 @@ ] }, { - "id": "cx-ad612267", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", - "colorKey": "h200_b5c683eb", - "comparisonKey": "b18bebc70bf6167d", + "id": "cx-f8095d72", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_fb5b86de", + "comparisonKey": "cd6da73322e03923", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:03.036669+00:00", + "generatedAt": "2026-06-26T23:59:17.404659+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -61006,20 +62427,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", + "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -61041,18 +62463,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", + "traceSignature": "2225dbbdab9bf2d", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272035224", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272035224", - "createdAt": "2026-06-27T00:01:30Z", + "id": "28271927356", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271927356", + "createdAt": "2026-06-26T23:59:17.404659+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -61060,35 +62482,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 132.60799646377563, - "p90": 151.8400013446808, - "p95": 157.3760062456131, - "p99": 181.47200345993042 + "p50": 113.02399635314941, + "p90": 118.40000003576279, + "p95": 120.12799829244614, + "p99": 126.0479986667633 }, "combine": { - "p50": 125.40799379348755, - "p90": 146.59200608730316, - "p95": 152.73599326610565, - "p99": 228.5439968109131 + "p50": 105.66399991512299, + "p90": 108.89600217342377, + "p95": 112.06399649381638, + "p99": 115.9679964184761 }, "roundtrip": { - "p50": 230.20799458026886, - "p90": 244.51200664043427, - "p95": 253.4080147743225, - "p99": 302.2719919681549 + "p50": 195.8719938993454, + "p90": 201.24800503253937, + "p95": 202.62399315834045, + "p99": 207.39200711250305 }, "isolatedSum": { - "p50": 258.0159902572632, - "p90": 298.43200743198395, - "p95": 310.11199951171875, - "p99": 410.0160002708435 + "p50": 218.6879962682724, + "p90": 227.29600220918655, + "p95": 232.1919947862625, + "p99": 242.0159950852394 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61097,35 +62519,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 175.99999904632568, - "p90": 196.83200120925903, - "p95": 202.2400051355362, - "p99": 229.5680046081543 + "p50": 146.97599411010742, + "p90": 150.91200172901154, + "p95": 151.7760008573532, + "p99": 155.39200603961945 }, "combine": { - "p50": 175.58400332927704, - "p90": 189.82400000095367, - "p95": 193.79200041294098, - "p99": 265.5999958515167 + "p50": 148.3519971370697, + "p90": 153.82400155067444, + "p95": 154.4959992170334, + "p99": 156.67200088500977 }, "roundtrip": { - "p50": 323.0719864368439, - "p90": 339.29601311683655, - "p95": 345.3119993209839, - "p99": 369.4399893283844 + "p50": 265.9200131893158, + "p90": 270.9760069847107, + "p95": 273.1199860572815, + "p99": 278.4000039100647 }, "isolatedSum": { - "p50": 351.5840023756027, - "p90": 386.6560012102127, - "p95": 396.0320055484772, - "p99": 495.168000459671 + "p50": 295.3279912471771, + "p90": 304.736003279686, + "p95": 306.2720000743866, + "p99": 312.0640069246292 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 5, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61134,35 +62556,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 275.35998821258545, - "p90": 301.472008228302, - "p95": 311.19999289512634, - "p99": 359.0080142021179 + "p50": 200.32000541687012, + "p90": 203.3279985189438, + "p95": 204.57600057125092, + "p99": 208.28799903392792 }, "combine": { - "p50": 268.5120105743408, - "p90": 284.38401222229004, - "p95": 289.3120050430298, - "p99": 321.6319978237152 + "p50": 229.8559993505478, + "p90": 235.4239970445633, + "p95": 236.4480048418045, + "p99": 237.98400163650513 }, "roundtrip": { - "p50": 519.9040174484253, - "p90": 549.2479801177979, - "p95": 559.6160292625427, - "p99": 602.4960279464722 + "p50": 402.46400237083435, + "p90": 407.9360067844391, + "p95": 410.0480079650879, + "p99": 413.1839871406555 }, "isolatedSum": { - "p50": 543.8719987869263, - "p90": 585.856020450592, - "p95": 600.5119979381561, - "p99": 680.6400120258331 + "p50": 430.1760047674179, + "p90": 438.7519955635071, + "p95": 441.0240054130554, + "p99": 446.27200067043304 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 4, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61171,35 +62593,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 458.75200629234314, - "p90": 610.4320287704468, - "p95": 643.1999802589417, - "p99": 663.7120246887207 + "p50": 303.51999402046204, + "p90": 308.6720108985901, + "p95": 310.2720081806183, + "p99": 315.8400058746338 }, "combine": { - "p50": 451.3919949531555, - "p90": 462.911993265152, - "p95": 471.23199701309204, - "p99": 480.8639883995056 + "p50": 366.3040101528168, + "p90": 374.33600425720215, + "p95": 375.99998712539673, + "p99": 380.0320029258728 }, "roundtrip": { - "p50": 882.0160031318665, - "p90": 899.4879722595215, - "p95": 906.6879749298096, - "p99": 926.688015460968 + "p50": 643.9679861068726, + "p90": 650.9119868278503, + "p95": 653.4720063209534, + "p99": 656.9280028343201 }, "isolatedSum": { - "p50": 910.1440012454987, - "p90": 1073.3440220355988, - "p95": 1114.4319772720337, - "p99": 1144.5760130882263 + "p50": 669.8240041732788, + "p90": 683.0080151557922, + "p95": 686.271995306015, + "p99": 695.8720088005066 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 5, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61208,35 +62630,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 819.5520043373108, - "p90": 837.664008140564, - "p95": 856.3200235366821, - "p99": 920.5440282821655 + "p50": 526.6559720039368, + "p90": 533.8879823684692, + "p95": 536.0000133514404, + "p99": 542.4000024795532 }, "combine": { - "p50": 816.6080117225647, - "p90": 834.879994392395, - "p95": 846.9439744949341, - "p99": 919.264018535614 + "p50": 628.607988357544, + "p90": 636.5759968757629, + "p95": 639.3600106239319, + "p99": 643.455982208252 }, "roundtrip": { - "p50": 1605.247974395752, - "p90": 1634.1760158538818, - "p95": 1654.9760103225708, - "p99": 1745.8560466766357 + "p50": 1128.5760402679443, + "p90": 1137.984037399292, + "p95": 1141.5679454803467, + "p99": 1146.1759805679321 }, "isolatedSum": { - "p50": 1636.1600160598755, - "p90": 1672.544002532959, - "p95": 1703.2639980316162, - "p99": 1839.8080468177795 + "p50": 1155.2639603614807, + "p90": 1170.4639792442322, + "p95": 1175.3600239753723, + "p99": 1185.8559846878052 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 5, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61245,35 +62667,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1564.7679567337036, - "p90": 1586.0799551010132, - "p95": 1601.8879413604736, - "p99": 1723.0720520019531 + "p50": 1018.4320211410522, + "p90": 1046.496033668518, + "p95": 1056.1920404434204, + "p99": 1073.5039710998535 }, "combine": { - "p50": 1521.9199657440186, - "p90": 1538.7840270996094, - "p95": 1547.104001045227, - "p99": 1626.911997795105 + "p50": 1148.5120058059692, + "p90": 1156.3199758529663, + "p95": 1158.784031867981, + "p99": 1164.031982421875 }, "roundtrip": { - "p50": 3057.663917541504, - "p90": 3078.3679485321045, - "p95": 3098.1760025024414, - "p99": 3246.783971786499 + "p50": 2113.408088684082, + "p90": 2138.5281085968018, + "p95": 2143.807888031006, + "p99": 2155.679941177368 }, "isolatedSum": { - "p50": 3086.687922477722, - "p90": 3124.8639822006226, - "p95": 3148.9919424057007, - "p99": 3349.984049797058 + "p50": 2166.9440269470215, + "p90": 2202.8160095214844, + "p95": 2214.9760723114014, + "p99": 2237.5359535217285 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 5, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61281,16 +62703,16 @@ ] }, { - "id": "cx-30f874f3", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", - "colorKey": "h200_b5c683eb", - "comparisonKey": "b18bebc70bf6167d", + "id": "cx-ff5c49bb", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", + "colorKey": "h100_aa268d13", + "comparisonKey": "927a6d7282665742", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:38.753854+00:00", + "generatedAt": "2026-06-26T23:55:17.079494+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -61298,17 +62720,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", + "label": "H100 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -61333,8 +62756,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "9e6ac678a09f7f8", - "workloadId": "set:3:2dad1a73ff872905", + "traceSignature": "4caecd33bedf786", + "workloadId": "set:3:830e36e88869e222", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -61342,9 +62765,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271834221", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271834221", - "createdAt": "2026-06-26T23:55:06Z", + "id": "28271806404", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271806404", + "createdAt": "2026-06-26T23:55:17.079494+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -61352,35 +62775,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 131.99999928474426, - "p90": 147.74399995803833, - "p95": 155.68000078201294, - "p99": 168.7680035829544 + "p50": 125.37600100040436, + "p90": 131.9040060043335, + "p95": 132.7359974384308, + "p99": 137.08800077438354 }, "combine": { - "p50": 126.01600587368011, - "p90": 139.74399864673615, - "p95": 146.08000218868256, - "p99": 156.73600137233734 + "p50": 113.0559965968132, + "p90": 114.04799669981003, + "p95": 114.56000059843063, + "p99": 120.67200243473053 }, "roundtrip": { - "p50": 229.8559993505478, - "p90": 251.583993434906, - "p95": 260.0319981575012, - "p99": 275.07200837135315 + "p50": 216.2880003452301, + "p90": 219.67999637126923, + "p95": 221.15199267864227, + "p99": 226.17599368095398 }, "isolatedSum": { - "p50": 258.0160051584244, - "p90": 287.4879986047745, - "p95": 301.7600029706955, - "p99": 325.50400495529175 + "p50": 238.43199759721756, + "p90": 245.95200270414352, + "p95": 247.29599803686142, + "p99": 257.7600032091141 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61389,34 +62812,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 270.7520127296448, - "p90": 292.89600253105164, - "p95": 304.9600124359131, - "p99": 352.6400029659271 + "p50": 239.42400515079498, + "p90": 255.5519938468933, + "p95": 258.14399123191833, + "p99": 261.9200050830841 }, "combine": { - "p50": 268.5759961605072, - "p90": 281.76000714302063, - "p95": 287.200003862381, - "p99": 301.31199955940247 + "p50": 267.07199215888977, + "p90": 276.63999795913696, + "p95": 277.536004781723, + "p99": 279.90400791168213 }, "roundtrip": { - "p50": 514.4960284233093, - "p90": 532.7360033988953, - "p95": 542.1119928359985, - "p99": 571.615993976593 + "p50": 476.22400522232056, + "p90": 492.3520088195801, + "p95": 495.03999948501587, + "p99": 499.55201148986816 }, "isolatedSum": { - "p50": 539.328008890152, - "p90": 574.6560096740723, - "p95": 592.1600162982941, - "p99": 653.9520025253296 + "p50": 506.49599730968475, + "p90": 532.1919918060303, + "p95": 535.6799960136414, + "p99": 541.8240129947662 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -61426,34 +62849,34 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 822.0800161361694, - "p90": 845.1840281486511, - "p95": 862.5919818878174, - "p99": 1313.3759498596191 + "p50": 677.183985710144, + "p90": 691.3599967956543, + "p95": 694.8800086975098, + "p99": 701.2479901313782 }, "combine": { - "p50": 820.032000541687, - "p90": 837.7919793128967, - "p95": 846.3680148124695, - "p99": 873.3440041542053 + "p50": 816.2879943847656, + "p90": 828.607976436615, + "p95": 832.5759768486023, + "p99": 837.8239870071411 }, "roundtrip": { - "p50": 1605.9520244598389, - "p90": 1629.3439865112305, - "p95": 1645.1200246810913, - "p99": 1737.1840476989746 + "p50": 1460.4159593582153, + "p90": 1474.176049232483, + "p95": 1478.4640073776245, + "p99": 1485.8880043029785 }, "isolatedSum": { - "p50": 1642.1120166778564, - "p90": 1682.9760074615479, - "p95": 1708.9599967002869, - "p99": 2186.7199540138245 + "p50": 1493.4719800949097, + "p90": 1519.9679732322693, + "p95": 1527.455985546112, + "p99": 1539.0719771385193 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -61462,16 +62885,16 @@ ] }, { - "id": "cx-a2c76343", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", - "colorKey": "h200_d0dfa19a", - "comparisonKey": "4ade4ca52869383d", + "id": "cx-f5264491", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_aa268d13", + "comparisonKey": "927a6d7282665742", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:42.077253+00:00", + "generatedAt": "2026-06-27T00:00:04.176924+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -61479,17 +62902,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "label": "H100 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -61514,8 +62938,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "7aa44c7b86748b9", - "workloadId": "set:3:388ff74baef05c72", + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -61523,9 +62947,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271841288", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271841288", - "createdAt": "2026-06-26T23:55:19Z", + "id": "28271951888", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271951888", + "createdAt": "2026-06-27T00:00:04.176924+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -61533,35 +62957,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 85.08799970149994, - "p90": 109.40799862146378, - "p95": 117.47200042009354, - "p99": 164.38399255275726 + "p50": 124.70400333404541, + "p90": 128.86400520801544, + "p95": 131.071999669075, + "p99": 132.9600065946579 }, "combine": { - "p50": 71.45600020885468, - "p90": 86.496002972126, - "p95": 91.26400202512741, - "p99": 106.20799660682678 + "p50": 112.5119999051094, + "p90": 114.01599645614624, + "p95": 114.3679991364479, + "p99": 116.5120005607605 }, "roundtrip": { - "p50": 132.51200318336487, - "p90": 162.7199947834015, - "p95": 173.8560050725937, - "p99": 221.5680032968521 + "p50": 216.22399985790253, + "p90": 219.90400552749634, + "p95": 221.02400660514832, + "p99": 223.90399873256683 }, "isolatedSum": { - "p50": 156.54399991035461, - "p90": 195.90400159358978, - "p95": 208.73600244522095, - "p99": 270.59198915958405 + "p50": 237.21600323915482, + "p90": 242.88000166416168, + "p95": 245.43999880552292, + "p99": 249.4720071554184 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 6, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.57599401474, + "p90": 167.93599724769592, + "p95": 169.5680022239685, + "p99": 229.15199398994446 + }, + "combine": { + "p50": 162.6559942960739, + "p90": 168.64000260829926, + "p95": 169.98399794101715, + "p99": 171.29600048065186 + }, + "roundtrip": { + "p50": 299.80799555778503, + "p90": 305.11999130249023, + "p95": 306.71998858451843, + "p99": 308.9919984340668 + }, + "isolatedSum": { + "p50": 327.2319883108139, + "p90": 336.5759998559952, + "p95": 339.55200016498566, + "p99": 400.4479944705963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61570,72 +63031,146 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 111.00800335407257, - "p90": 130.43199479579926, - "p95": 136.80000603199005, - "p99": 165.75999557971954 + "p50": 237.92000114917755, + "p90": 242.3039972782135, + "p95": 244.4159984588623, + "p99": 250.14400482177734 }, "combine": { - "p50": 118.1119978427887, - "p90": 134.62400436401367, - "p95": 139.67999815940857, - "p99": 149.6639996767044 + "p50": 260.9280049800873, + "p90": 265.6640112400055, + "p95": 267.67998933792114, + "p99": 272.7360129356384 }, "roundtrip": { - "p50": 202.30400562286377, - "p90": 223.83999824523926, - "p95": 241.85599386692047, - "p99": 296.25600576400757 + "p50": 471.77600860595703, + "p90": 476.8959879875183, + "p95": 479.2639911174774, + "p99": 495.2000081539154 }, "isolatedSum": { - "p50": 229.12000119686127, - "p90": 265.0559991598129, - "p95": 276.4800041913986, - "p99": 315.42399525642395 + "p50": 498.84800612926483, + "p90": 507.968008518219, + "p95": 512.0959877967834, + "p99": 522.8800177574158 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 1, - "recvTokensMax": 512, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.85598635673523, + "p90": 384.38400626182556, + "p95": 385.8239948749542, + "p99": 390.6880021095276 + }, + "combine": { + "p50": 442.1760141849518, + "p90": 447.80799746513367, + "p95": 449.3120014667511, + "p99": 452.86399126052856 + }, + "roundtrip": { + "p50": 795.6799864768982, + "p90": 803.167998790741, + "p95": 806.3039779663086, + "p99": 813.0559921264648 + }, + "isolatedSum": { + "p50": 820.032000541687, + "p90": 832.1920037269592, + "p95": 835.1359963417053, + "p99": 843.5519933700562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 189.63199853897095, - "p90": 209.72800254821777, - "p95": 218.4319943189621, - "p99": 254.14401292800903 + "p50": 675.3919720649719, + "p90": 690.4320120811462, + "p95": 694.1120028495789, + "p99": 700.8320093154907 }, "combine": { - "p50": 284.960001707077, - "p90": 298.7520098686218, - "p95": 303.2959997653961, - "p99": 331.9999873638153 + "p50": 806.1439990997314, + "p90": 816.5119886398315, + "p95": 818.5279965400696, + "p99": 824.5440125465393 }, "roundtrip": { - "p50": 447.3919868469238, - "p90": 475.42399168014526, - "p95": 484.8639965057373, - "p99": 529.9519896507263 + "p50": 1447.1999406814575, + "p90": 1458.143949508667, + "p95": 1462.5600576400757, + "p99": 1468.991994857788 }, "isolatedSum": { - "p50": 474.592000246048, - "p90": 508.4800124168396, - "p95": 521.7279940843582, - "p99": 586.1440002918243 + "p50": 1481.5359711647034, + "p90": 1506.9440007209778, + "p95": 1512.6399993896484, + "p99": 1525.37602186203 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 1, - "recvTokensMax": 2048, - "stragglerRank": 3, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1273.7280130386353, + "p90": 1286.1759662628174, + "p95": 1290.2400493621826, + "p99": 1300.3519773483276 + }, + "combine": { + "p50": 1515.6480073928833, + "p90": 1529.1199684143066, + "p95": 1554.6239614486694, + "p99": 1575.2639770507812 + }, + "roundtrip": { + "p50": 2763.0081176757812, + "p90": 2772.9599475860596, + "p95": 2776.3519287109375, + "p99": 2782.464027404785 + }, + "isolatedSum": { + "p50": 2789.3760204315186, + "p90": 2815.295934677124, + "p95": 2844.864010810852, + "p99": 2875.615954399109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61643,16 +63178,16 @@ ] }, { - "id": "cx-79209ba6", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", - "colorKey": "h200_06544e53", - "comparisonKey": "822fd37c7222ef9b", + "id": "cx-f680673f", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", + "colorKey": "h100_002beb29", + "comparisonKey": "3715210183d38757", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:05.638717+00:00", + "generatedAt": "2026-06-26T23:55:20.108988+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -61660,20 +63195,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -61695,18 +63231,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "df54a9510825f71", - "workloadId": "set:6:2dad1a73ff872905", + "traceSignature": "3dd868cb33839a3", + "workloadId": "set:3:1ca614e23cc66be1", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272038593", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272038593", - "createdAt": "2026-06-27T00:01:37Z", + "id": "28271813470", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271813470", + "createdAt": "2026-06-26T23:55:20.108988+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -61714,146 +63250,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 95.83999961614609, - "p90": 112.73600161075592, - "p95": 121.15199863910675, - "p99": 153.4080058336258 + "p50": 117.63200163841248, + "p90": 122.43200093507767, + "p95": 123.55200201272964, + "p99": 126.5919953584671 }, "combine": { - "p50": 83.99999886751175, - "p90": 97.6639986038208, - "p95": 104.22399640083313, - "p99": 116.89600348472595 + "p50": 106.62399977445602, + "p90": 112.31999844312668, + "p95": 113.27999830245972, + "p99": 115.9679964184761 }, "roundtrip": { - "p50": 154.1759967803955, - "p90": 176.32000148296356, - "p95": 183.45600366592407, - "p99": 211.29600703716278 + "p50": 207.58399367332458, + "p90": 211.84000372886658, + "p95": 213.18399906158447, + "p99": 216.35200083255768 }, "isolatedSum": { - "p50": 179.83999848365784, - "p90": 210.40000021457672, - "p95": 225.37599503993988, - "p99": 270.30400931835175 + "p50": 224.2560014128685, + "p90": 234.75199937820435, + "p95": 236.83200031518936, + "p99": 242.5599917769432 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 5, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 115.55200070142746, - "p90": 135.68000495433807, - "p95": 143.39199662208557, - "p99": 163.26400637626648 + "p50": 236.35199666023254, + "p90": 249.82400238513947, + "p95": 253.88801097869873, + "p99": 257.02399015426636 }, "combine": { - "p50": 103.35999727249146, - "p90": 120.03199756145477, - "p95": 127.32799351215363, - "p99": 154.4319987297058 + "p50": 251.583993434906, + "p90": 259.7759962081909, + "p95": 260.47998666763306, + "p99": 262.2080147266388 }, "roundtrip": { - "p50": 197.2160041332245, - "p90": 215.58399498462677, - "p95": 223.26399385929108, - "p99": 242.5599992275238 + "p50": 459.29598808288574, + "p90": 472.1919894218445, + "p95": 474.88000988960266, + "p99": 478.5279929637909 }, "isolatedSum": { - "p50": 218.91199797391891, - "p90": 255.71200251579285, - "p95": 270.7199901342392, - "p99": 317.6960051059723 + "p50": 487.93599009513855, + "p90": 509.5999985933304, + "p95": 514.3679976463318, + "p99": 519.2320048809052 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 2, - "recvTokensMax": 768, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 144.6080058813095, - "p90": 169.0559983253479, - "p95": 176.7680048942566, - "p99": 208.064004778862 - }, - "combine": { - "p50": 140.47999680042267, - "p90": 155.74400126934052, - "p95": 161.6639941930771, - "p99": 184.1920018196106 - }, - "roundtrip": { - "p50": 262.9440128803253, - "p90": 282.24000334739685, - "p95": 290.6560003757477, - "p99": 320.0640082359314 - }, - "isolatedSum": { - "p50": 285.0880026817322, - "p90": 324.7999995946884, - "p95": 338.4319990873337, - "p99": 392.2560065984726 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 2, - "recvTokensMax": 1536, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 205.4080069065094, - "p90": 229.21599447727203, - "p95": 238.49600553512573, - "p99": 255.67999482154846 - }, - "combine": { - "p50": 214.7199958562851, - "p90": 231.90400004386902, - "p95": 236.86400055885315, - "p99": 255.64798712730408 - }, - "roundtrip": { - "p50": 399.4239866733551, - "p90": 420.48001289367676, - "p95": 429.6000003814697, - "p99": 593.7280058860779 - }, - "isolatedSum": { - "p50": 420.1280027627945, - "p90": 461.11999452114105, - "p95": 475.3600060939789, - "p99": 511.32798194885254 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 2, - "recvTokensMax": 3072, - "stragglerRank": 6, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61862,72 +63324,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 331.743985414505, - "p90": 350.23999214172363, - "p95": 361.08800768852234, - "p99": 406.0479998588562 - }, - "combine": { - "p50": 360.54399609565735, - "p90": 375.90399384498596, - "p95": 382.78400897979736, - "p99": 404.4159948825836 - }, - "roundtrip": { - "p50": 664.0639901161194, - "p90": 679.9039840698242, - "p95": 693.5679912567139, - "p99": 743.1359887123108 - }, - "isolatedSum": { - "p50": 692.2879815101624, - "p90": 726.1439859867096, - "p95": 743.8720166683197, - "p99": 810.4639947414398 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 2, - "recvTokensMax": 6144, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 580.5439949035645, - "p90": 593.3759808540344, - "p95": 602.3679971694946, - "p99": 637.6640200614929 + "p50": 659.3279838562012, + "p90": 669.0239906311035, + "p95": 672.0960140228271, + "p99": 678.4319877624512 }, "combine": { - "p50": 628.3519864082336, - "p90": 641.1839723587036, - "p95": 648.3839750289917, - "p99": 680.9279918670654 + "p50": 783.456027507782, + "p90": 794.6239709854126, + "p95": 799.0720272064209, + "p99": 807.6800107955933 }, "roundtrip": { - "p50": 1181.7920207977295, - "p90": 1199.295997619629, - "p95": 1210.3359699249268, - "p99": 1255.4240226745605 + "p50": 1412.6399755477905, + "p90": 1421.8239784240723, + "p95": 1426.0480403900146, + "p99": 1434.0159893035889 }, "isolatedSum": { - "p50": 1208.895981311798, - "p90": 1234.559953212738, - "p95": 1250.7519721984863, - "p99": 1318.5920119285583 + "p50": 1442.7840113639832, + "p90": 1463.647961616516, + "p95": 1471.168041229248, + "p99": 1486.1119985580444 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 2, - "recvTokensMax": 12288, - "stragglerRank": 5, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -61935,16 +63360,16 @@ ] }, { - "id": "cx-c14326f0", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", - "colorKey": "h200_189562cd", - "comparisonKey": "b9475bb176588857", + "id": "cx-329395ff", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_002beb29", + "comparisonKey": "3715210183d38757", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:32.803411+00:00", + "generatedAt": "2026-06-27T00:01:29.454209+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -61952,17 +63377,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -61987,8 +63413,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "38fd0bcf7109c32", - "workloadId": "set:3:b952d4a43d688b50", + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": "set:6:1ca614e23cc66be1", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -61996,9 +63422,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271862413", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271862413", - "createdAt": "2026-06-26T23:56:00Z", + "id": "28271996602", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271996602", + "createdAt": "2026-06-27T00:01:29.454209+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -62006,35 +63432,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 125.11999905109406, - "p90": 140.00000059604645, - "p95": 149.88799393177032, - "p99": 171.23199999332428 + "p50": 115.87200313806534, + "p90": 122.14399874210358, + "p95": 123.10399860143661, + "p99": 127.16799974441528 }, "combine": { - "p50": 118.65600198507309, - "p90": 132.64000415802002, - "p95": 137.60000467300415, - "p99": 164.95999693870544 + "p50": 106.72000050544739, + "p90": 111.7120012640953, + "p95": 112.57600039243698, + "p99": 114.46399986743927 }, "roundtrip": { - "p50": 221.5680032968521, - "p90": 238.14399540424347, - "p95": 251.71199440956116, - "p99": 291.6480004787445 + "p50": 207.07200467586517, + "p90": 210.91200411319733, + "p95": 212.54399418830872, + "p99": 243.52000653743744 }, "isolatedSum": { - "p50": 243.77600103616714, - "p90": 272.64000475406647, - "p95": 287.4879986047745, - "p99": 336.1919969320297 + "p50": 222.59200364351273, + "p90": 233.85600000619888, + "p95": 235.6799989938736, + "p99": 241.63199961185455 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, "recvTokensMax": 1024, - "stragglerRank": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.29600596427917, + "p90": 166.62399470806122, + "p95": 167.4560010433197, + "p99": 169.21600699424744 + }, + "combine": { + "p50": 154.65599298477173, + "p90": 163.10399770736694, + "p95": 163.7759953737259, + "p99": 165.0560051202774 + }, + "roundtrip": { + "p50": 289.44000601768494, + "p90": 301.66399478912354, + "p95": 303.5840094089508, + "p99": 308.03200602531433 + }, + "isolatedSum": { + "p50": 313.9519989490509, + "p90": 329.72799241542816, + "p95": 331.2319964170456, + "p99": 334.27201211452484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62043,35 +63506,72 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 245.02399563789368, - "p90": 257.31199979782104, - "p95": 265.8880054950714, - "p99": 298.72000217437744 + "p50": 231.1680018901825, + "p90": 236.4799976348877, + "p95": 237.40799725055695, + "p99": 240.7039999961853 }, "combine": { - "p50": 263.68001103401184, - "p90": 275.32801032066345, - "p95": 281.9199860095978, - "p99": 299.1679906845093 + "p50": 252.73600220680237, + "p90": 260.8639895915985, + "p95": 261.8879973888397, + "p99": 263.64800333976746 }, "roundtrip": { - "p50": 481.9839894771576, - "p90": 495.6800043582916, - "p95": 506.1759948730469, - "p99": 808.3199858665466 + "p50": 461.34400367736816, + "p90": 475.39201378822327, + "p95": 476.639986038208, + "p99": 479.45600748062134 }, "isolatedSum": { - "p50": 508.7040066719055, - "p90": 532.6400101184845, - "p95": 547.8079915046692, - "p99": 597.8879928588867 + "p50": 483.90400409698486, + "p90": 497.3439872264862, + "p95": 499.29599463939667, + "p99": 504.35200333595276 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, "recvTokensMax": 4096, - "stragglerRank": 5, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.2400109767914, + "p90": 379.040002822876, + "p95": 381.98399543762207, + "p99": 387.4559998512268 + }, + "combine": { + "p50": 431.2640130519867, + "p90": 439.8399889469147, + "p95": 443.07199120521545, + "p99": 446.78398966789246 + }, + "roundtrip": { + "p50": 779.2320251464844, + "p90": 791.3600206375122, + "p95": 794.0160036087036, + "p99": 801.0240197181702 + }, + "isolatedSum": { + "p50": 805.5040240287781, + "p90": 818.8799917697906, + "p95": 825.0559866428375, + "p99": 834.2399895191193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62080,35 +63580,72 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 725.1200079917908, - "p90": 748.0959892272949, - "p95": 763.0079984664917, - "p99": 812.0959997177124 + "p50": 661.1520051956177, + "p90": 676.1919856071472, + "p95": 679.6479821205139, + "p99": 685.9520077705383 }, "combine": { - "p50": 799.3280291557312, - "p90": 813.9839768409729, - "p95": 823.5200047492981, - "p99": 875.6160140037537 + "p50": 789.9519801139832, + "p90": 800.0959753990173, + "p95": 803.1359910964966, + "p99": 808.7360262870789 }, "roundtrip": { - "p50": 1494.3679571151733, - "p90": 1516.1919593811035, - "p95": 1528.2560586929321, - "p99": 1709.8560333251953 + "p50": 1422.271966934204, + "p90": 1435.1680278778076, + "p95": 1439.1039609909058, + "p99": 1454.367995262146 }, "isolatedSum": { - "p50": 1524.448037147522, - "p90": 1562.0799660682678, - "p95": 1586.5280032157898, - "p99": 1687.712013721466 + "p50": 1451.1039853096008, + "p90": 1476.2879610061646, + "p95": 1482.7839732170105, + "p99": 1494.6880340576172 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, "recvTokensMax": 16384, - "stragglerRank": 5, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1241.8559789657593, + "p90": 1251.871943473816, + "p95": 1256.4799785614014, + "p99": 1264.0639543533325 + }, + "combine": { + "p50": 1471.4560508728027, + "p90": 1480.1599979400635, + "p95": 1482.6240539550781, + "p99": 1489.8879528045654 + }, + "roundtrip": { + "p50": 2687.9680156707764, + "p90": 2698.848009109497, + "p95": 2703.104019165039, + "p99": 2708.928108215332 + }, + "isolatedSum": { + "p50": 2713.312029838562, + "p90": 2732.0319414138794, + "p95": 2739.1040325164795, + "p99": 2753.951907157898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62116,16 +63653,16 @@ ] }, { - "id": "cx-17171887", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", - "colorKey": "h200_189562cd", - "comparisonKey": "b9475bb176588857", + "id": "cx-c90a67e2", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_c44978e5", + "comparisonKey": "6c5c69e3474ec552", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:28.346517+00:00", + "generatedAt": "2026-06-27T00:01:29.771027+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -62133,20 +63670,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -62168,18 +63706,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "bfbb64a166e9f1c", - "workloadId": "set:6:b952d4a43d688b50", + "traceSignature": "46855e7fa6754eb", + "workloadId": "set:6:1ca614e23cc66be1", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272106904", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272106904", - "createdAt": "2026-06-27T00:03:47Z", + "id": "28272000459", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272000459", + "createdAt": "2026-06-27T00:01:29.771027+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -62187,35 +63725,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 124.41600114107132, - "p90": 159.9999964237213, - "p95": 167.39200055599213, - "p99": 211.42399311065674 + "p50": 110.75200140476227, + "p90": 114.97599631547928, + "p95": 116.95999652147293, + "p99": 122.01599776744843 }, "combine": { - "p50": 118.01599711179733, - "p90": 146.5280055999756, - "p95": 150.27199685573578, - "p99": 162.9759967327118 + "p50": 105.92000186443329, + "p90": 109.56799983978271, + "p95": 111.23199760913849, + "p99": 114.14399743080139 }, "roundtrip": { - "p50": 220.2560007572174, - "p90": 253.91998887062073, - "p95": 258.432000875473, - "p99": 271.42399549484253 + "p50": 193.1840032339096, + "p90": 198.7520009279251, + "p95": 200.19200444221497, + "p99": 204.44799959659576 }, "isolatedSum": { - "p50": 242.43199825286865, - "p90": 306.5280020236969, - "p95": 317.6639974117279, - "p99": 374.39998984336853 + "p50": 216.67200326919556, + "p90": 224.543996155262, + "p95": 228.19199413061142, + "p99": 236.15999519824982 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62224,35 +63762,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 162.432000041008, - "p90": 170.6240028142929, - "p95": 175.04000663757324, - "p99": 188.38399648666382 + "p50": 146.81600034236908, + "p90": 151.48800611495972, + "p95": 152.44799852371216, + "p99": 156.80000185966492 }, "combine": { - "p50": 165.0879979133606, - "p90": 175.7120043039322, - "p95": 179.83999848365784, - "p99": 191.77600741386414 + "p50": 150.62400698661804, + "p90": 154.7520011663437, + "p95": 155.39200603961945, + "p99": 161.31199896335602 }, "roundtrip": { - "p50": 301.66399478912354, - "p90": 317.3759877681732, - "p95": 322.6880133152008, - "p99": 333.69600772857666 + "p50": 266.59199595451355, + "p90": 270.4640030860901, + "p95": 271.64798974990845, + "p99": 274.84801411628723 }, "isolatedSum": { - "p50": 327.5199979543686, - "p90": 346.3360071182251, - "p95": 354.8800051212311, - "p99": 380.16000390052795 + "p50": 297.4400073289871, + "p90": 306.2400072813034, + "p95": 307.8400045633316, + "p99": 318.11200082302094 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156090368, - "combineLogicalBytes": 156090368, - "fanoutMean": 5.31640625, - "recvTokensMax": 2048, - "stragglerRank": 7, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62261,35 +63799,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 238.97600173950195, - "p90": 247.0400035381317, - "p95": 250.0160038471222, - "p99": 259.39199328422546 + "p50": 201.05600357055664, + "p90": 204.70400154590607, + "p95": 205.63200116157532, + "p99": 209.1200053691864 }, "combine": { - "p50": 261.9200050830841, - "p90": 275.2000093460083, - "p95": 279.58399057388306, - "p99": 300.4480004310608 + "p50": 227.64800488948822, + "p90": 231.99999332427979, + "p95": 234.17599499225616, + "p99": 235.83999276161194 }, "roundtrip": { - "p50": 482.33601450920105, - "p90": 499.1999864578247, - "p95": 507.3919892311096, - "p99": 570.527970790863 + "p50": 403.55199575424194, + "p90": 408.160001039505, + "p95": 409.15200114250183, + "p99": 411.77600622177124 }, "isolatedSum": { - "p50": 500.89600682258606, - "p90": 522.24001288414, - "p95": 529.5999944210052, - "p99": 559.8399937152863 + "p50": 428.70400846004486, + "p90": 436.70399487018585, + "p95": 439.8079961538315, + "p99": 444.95999813079834 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 5, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62298,35 +63836,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 392.8639888763428, - "p90": 402.72000432014465, - "p95": 406.23998641967773, - "p99": 445.3760087490082 + "p50": 309.1840147972107, + "p90": 313.2160007953644, + "p95": 314.62401151657104, + "p99": 317.79199838638306 }, "combine": { - "p50": 443.1680142879486, - "p90": 455.80801367759705, - "p95": 461.5040123462677, - "p99": 481.53600096702576 + "p50": 368.5440123081207, + "p90": 374.9440014362335, + "p95": 376.22401118278503, + "p99": 380.7680010795593 }, "roundtrip": { - "p50": 817.5680041313171, - "p90": 835.2320194244385, - "p95": 845.3760147094727, - "p99": 893.887996673584 + "p50": 652.2560119628906, + "p90": 658.9760184288025, + "p95": 661.3759994506836, + "p99": 665.2479767799377 }, "isolatedSum": { - "p50": 836.0320031642914, - "p90": 858.5280179977417, - "p95": 867.7439987659454, - "p99": 926.9120097160339 + "p50": 677.7280271053314, + "p90": 688.1600022315979, + "p95": 690.8480226993561, + "p99": 698.5599994659424 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620648448, - "combineLogicalBytes": 620648448, - "fanoutMean": 5.2847900390625, - "recvTokensMax": 8192, - "stragglerRank": 5, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62335,35 +63873,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 718.783974647522, - "p90": 730.3680181503296, - "p95": 737.280011177063, - "p99": 808.1920146942139 + "p50": 532.6079726219177, + "p90": 546.5599894523621, + "p95": 550.495982170105, + "p99": 557.7600002288818 }, "combine": { - "p50": 797.4399924278259, - "p90": 810.8800053596497, - "p95": 820.032000541687, - "p99": 849.3760228157043 + "p50": 642.5279974937439, + "p90": 649.9519944190979, + "p95": 652.2560119628906, + "p99": 658.8159799575806 }, "roundtrip": { - "p50": 1490.3680086135864, - "p90": 1507.5839757919312, - "p95": 1519.2960500717163, - "p99": 1630.944013595581 + "p50": 1146.399974822998, + "p90": 1156.9600105285645, + "p95": 1160.9920263290405, + "p99": 1168.511986732483 }, "isolatedSum": { - "p50": 1516.223967075348, - "p90": 1541.2480235099792, - "p95": 1557.31201171875, - "p99": 1657.5680375099182 + "p50": 1175.1359701156616, + "p90": 1196.51198387146, + "p95": 1202.7519941329956, + "p99": 1216.5759801864624 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 5, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62372,35 +63910,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1386.0160112380981, - "p90": 1401.0560512542725, - "p95": 1406.1440229415894, - "p99": 1621.7279434204102 + "p50": 1024.351954460144, + "p90": 1048.5440492630005, + "p95": 1056.9599866867065, + "p99": 1069.3119764328003 }, "combine": { - "p50": 1483.199954032898, - "p90": 1497.5999593734741, - "p95": 1502.17604637146, - "p99": 1538.0480289459229 + "p50": 1185.9840154647827, + "p90": 1194.1759586334229, + "p95": 1196.5759992599487, + "p99": 1201.5680074691772 }, "roundtrip": { - "p50": 2845.855951309204, - "p90": 2863.840103149414, - "p95": 2879.647970199585, - "p99": 3068.063974380493 + "p50": 2167.520046234131, + "p90": 2183.3600997924805, + "p95": 2188.8959407806396, + "p99": 2197.727918624878 }, "isolatedSum": { - "p50": 2869.215965270996, - "p90": 2898.6560106277466, - "p95": 2908.3200693130493, - "p99": 3159.775972366333 + "p50": 2210.3359699249268, + "p90": 2242.7200078964233, + "p95": 2253.5359859466553, + "p99": 2270.8799839019775 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2484242432, - "combineLogicalBytes": 2484242432, - "fanoutMean": 5.288299560546875, - "recvTokensMax": 32768, - "stragglerRank": 6, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62408,16 +63946,16 @@ ] }, { - "id": "cx-f354b9c6", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", - "colorKey": "h200_80a72891", - "comparisonKey": "52b3ac7f405659bf", + "id": "cx-fe520015", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_9aa30544", + "comparisonKey": "212a6f0661f5d2d6", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:25.966329+00:00", + "generatedAt": "2026-06-27T00:00:29.937355+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -62425,20 +63963,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -62460,18 +63999,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "29ae5ace13636f8", - "workloadId": "set:6:b952d4a43d688b50", + "traceSignature": "cf93f8f6b52e428", + "workloadId": "set:6:a224603e5a1640b8", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.8466796875, - "eplbImbalanceAfter": 1.0002700343276514, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272110404", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272110404", - "createdAt": "2026-06-27T00:03:54Z", + "id": "28271965088", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271965088", + "createdAt": "2026-06-27T00:00:29.937355+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -62479,34 +64018,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.24799871444702, - "p90": 134.17600095272064, - "p95": 140.25600254535675, - "p99": 158.84800255298615 + "p50": 123.71200323104858, + "p90": 127.6479959487915, + "p95": 131.20000064373016, + "p99": 133.7279975414276 }, "combine": { - "p50": 107.68000036478043, - "p90": 119.39200013875961, - "p95": 123.99999797344208, - "p99": 129.82399761676788 + "p50": 113.76000195741653, + "p90": 115.13599753379822, + "p95": 119.48800086975098, + "p99": 121.56800180673599 }, "roundtrip": { - "p50": 196.60800695419312, - "p90": 215.16799926757812, - "p95": 223.07200729846954, - "p99": 271.232008934021 + "p50": 214.65599536895752, + "p90": 219.29599344730377, + "p95": 220.12799978256226, + "p99": 223.61600399017334 }, "isolatedSum": { - "p50": 224.92799907922745, - "p90": 253.56800109148026, - "p95": 264.2560005187988, - "p99": 288.672000169754 + "p50": 237.47200518846512, + "p90": 242.78399348258972, + "p95": 250.68800151348114, + "p99": 255.2959993481636 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77701120, - "combineLogicalBytes": 77701120, - "fanoutMean": 5.29296875, - "recvTokensMax": 697, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, "stragglerRank": 2, "correct": true, "samplesPooled": 600, @@ -62516,35 +64055,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 145.9520012140274, - "p90": 165.69599509239197, - "p95": 176.35199427604675, - "p99": 214.49600160121918 + "p50": 160.19199788570404, + "p90": 166.4000004529953, + "p95": 167.61599481105804, + "p99": 170.43200135231018 }, "combine": { - "p50": 143.61600577831268, - "p90": 153.28000485897064, - "p95": 157.3439985513687, - "p99": 169.91999745368958 + "p50": 169.37600076198578, + "p90": 172.5119948387146, + "p95": 173.40800166130066, + "p99": 177.50400304794312 }, "roundtrip": { - "p50": 263.7439966201782, - "p90": 279.1680097579956, - "p95": 287.07200288772583, - "p99": 316.0960078239441 + "p50": 299.5840013027191, + "p90": 303.42400074005127, + "p95": 305.1519989967346, + "p99": 310.8479976654053 }, "isolatedSum": { - "p50": 289.5680069923401, - "p90": 318.9759999513626, - "p95": 333.69599282741547, - "p99": 384.41599905490875 + "p50": 329.5679986476898, + "p90": 338.9119952917099, + "p95": 341.0239964723587, + "p99": 347.9360044002533 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155187200, - "combineLogicalBytes": 155187200, - "fanoutMean": 5.28564453125, - "recvTokensMax": 1372, - "stragglerRank": 4, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62553,35 +64092,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 204.96000349521637, - "p90": 222.81600534915924, - "p95": 232.1919947862625, - "p99": 259.552001953125 + "p50": 233.18399488925934, + "p90": 239.26399648189545, + "p95": 240.28800427913666, + "p99": 242.94400215148926 }, "combine": { - "p50": 222.4320024251938, - "p90": 239.51999843120575, - "p95": 245.2480047941208, - "p99": 269.3760097026825 + "p50": 263.5839879512787, + "p90": 268.70399713516235, + "p95": 270.27198672294617, + "p99": 274.1760015487671 }, "roundtrip": { - "p50": 400.83199739456177, - "p90": 421.7279851436615, - "p95": 431.3279986381531, - "p99": 482.14399814605713 + "p50": 471.71199321746826, + "p90": 476.639986038208, + "p95": 478.5600006580353, + "p99": 481.3440144062042 }, "isolatedSum": { - "p50": 427.39200592041016, - "p90": 462.336003780365, - "p95": 477.4399995803833, - "p99": 528.9280116558075 + "p50": 496.767982840538, + "p90": 507.9679936170578, + "p95": 510.5599910020828, + "p99": 517.1200037002563 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311162880, - "combineLogicalBytes": 311162880, - "fanoutMean": 5.299072265625, - "recvTokensMax": 2761, - "stragglerRank": 4, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62590,35 +64129,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 313.76001238822937, - "p90": 342.24000573158264, - "p95": 361.11998558044434, - "p99": 480.3520143032074 + "p50": 377.27999687194824, + "p90": 383.35999846458435, + "p95": 385.18399000167847, + "p99": 387.84000277519226 }, "combine": { - "p50": 359.20000076293945, - "p90": 373.79199266433716, - "p95": 381.9519877433777, - "p99": 407.77599811553955 + "p50": 446.30399346351624, + "p90": 453.44001054763794, + "p95": 455.52000403404236, + "p99": 460.89598536491394 }, "roundtrip": { - "p50": 644.2880034446716, - "p90": 664.1600131988525, - "p95": 676.4799952507019, - "p99": 748.8639950752258 + "p50": 797.0240116119385, + "p90": 804.4800162315369, + "p95": 807.1039915084839, + "p99": 811.6480112075806 }, "isolatedSum": { - "p50": 672.9600131511688, - "p90": 716.0319983959198, - "p95": 743.071973323822, - "p99": 888.128012418747 + "p50": 823.5839903354645, + "p90": 836.8000090122223, + "p95": 840.7039940357208, + "p99": 848.7359881401062 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619974656, - "combineLogicalBytes": 619974656, - "fanoutMean": 5.279052734375, - "recvTokensMax": 5481, - "stragglerRank": 5, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62627,35 +64166,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 538.3679866790771, - "p90": 557.1839809417725, - "p95": 566.0160183906555, - "p99": 608.0639958381653 + "p50": 676.4479875564575, + "p90": 686.8799924850464, + "p95": 690.5279755592346, + "p99": 791.9679880142212 }, "combine": { - "p50": 618.9759969711304, - "p90": 630.3359866142273, - "p95": 636.2559795379639, - "p99": 653.5680294036865 + "p50": 796.3520288467407, + "p90": 808.4160089492798, + "p95": 811.3920092582703, + "p99": 820.5440044403076 }, "roundtrip": { - "p50": 1131.2960386276245, - "p90": 1151.263952255249, - "p95": 1159.0080261230469, - "p99": 1297.9520559310913 + "p50": 1445.5360174179077, + "p90": 1457.311987876892, + "p95": 1460.6399536132812, + "p99": 1468.2879447937012 }, "isolatedSum": { - "p50": 1157.3439836502075, - "p90": 1187.5199675559998, - "p95": 1202.2719979286194, - "p99": 1261.6320252418518 + "p50": 1472.8000164031982, + "p90": 1495.2960014343262, + "p95": 1501.9199848175049, + "p99": 1612.5119924545288 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240020992, - "combineLogicalBytes": 1240020992, - "fanoutMean": 5.27935791015625, - "recvTokensMax": 10883, - "stragglerRank": 5, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62664,35 +64203,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 984.2240214347839, - "p90": 1003.5840272903442, - "p95": 1015.2319669723511, - "p99": 1056.480050086975 + "p50": 1284.8639488220215, + "p90": 1296.3199615478516, + "p95": 1299.7759580612183, + "p99": 1306.5279722213745 }, "combine": { - "p50": 1093.9840078353882, - "p90": 1107.9679727554321, - "p95": 1119.9040412902832, - "p99": 1297.055959701538 + "p50": 1503.5840272903442, + "p90": 1517.2799825668335, + "p95": 1524.2880582809448, + "p99": 1540.0960445404053 }, "roundtrip": { - "p50": 2046.5600490570068, - "p90": 2070.3680515289307, - "p95": 2092.5118923187256, - "p99": 2573.024034500122 + "p50": 2760.960102081299, + "p90": 2775.10404586792, + "p95": 2783.936023712158, + "p99": 2810.0481033325195 }, "isolatedSum": { - "p50": 2078.208029270172, - "p90": 2111.5520000457764, - "p95": 2135.1360082626343, - "p99": 2353.536009788513 + "p50": 2788.4479761123657, + "p90": 2813.599944114685, + "p95": 2824.064016342163, + "p99": 2846.62401676178 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480414720, - "combineLogicalBytes": 2480414720, - "fanoutMean": 5.2801513671875, - "recvTokensMax": 21702, - "stragglerRank": 4, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62700,16 +64239,16 @@ ] }, { - "id": "cx-db979d37", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", - "colorKey": "h200_580d7b05", - "comparisonKey": "b1de1efab41abbdf", + "id": "cx-2b98c773", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_e8b903ea", + "comparisonKey": "5961b4bc09451ca4", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:37.856020+00:00", + "generatedAt": "2026-06-27T00:00:35.470349+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -62717,17 +64256,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, @@ -62752,18 +64292,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2225dbbdab9bf2d", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "27ddc85ded0add9", + "workloadId": "set:6:a224603e5a1640b8", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.006072998046875, - "eplbImbalanceAfter": 1.0000152587890625, + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272024348", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272024348", - "createdAt": "2026-06-27T00:01:10Z", + "id": "28271968791", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271968791", + "createdAt": "2026-06-27T00:00:35.470349+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -62771,35 +64311,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 115.87200313806534, - "p90": 125.88800489902496, - "p95": 131.71200454235077, - "p99": 142.46399700641632 + "p50": 112.41599917411804, + "p90": 117.18399822711945, + "p95": 118.9119964838028, + "p99": 122.91199713945389 }, "combine": { - "p50": 103.96800190210342, - "p90": 115.48800021409988, - "p95": 122.68800288438797, - "p99": 204.3199986219406 + "p50": 106.33599758148193, + "p90": 112.12799698114395, + "p95": 113.0559965968132, + "p99": 114.43199962377548 }, "roundtrip": { - "p50": 195.5839991569519, - "p90": 206.65599405765533, - "p95": 212.25599944591522, - "p99": 236.03199422359467 + "p50": 198.81600141525269, + "p90": 204.03200387954712, + "p95": 205.4080069065094, + "p99": 207.58399367332458 }, "isolatedSum": { - "p50": 219.84000504016876, - "p90": 241.37600511312485, - "p95": 254.40000742673874, - "p99": 346.78399562835693 + "p50": 218.75199675559998, + "p90": 229.3119952082634, + "p95": 231.967993080616, + "p99": 237.34399676322937 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77041664, - "combineLogicalBytes": 77041664, - "fanoutMean": 5.248046875, - "recvTokensMax": 686, - "stragglerRank": 6, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62808,35 +64348,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 147.39200472831726, - "p90": 165.50399363040924, - "p95": 174.20800030231476, - "p99": 197.11999595165253 + "p50": 148.44800531864166, + "p90": 151.99999511241913, + "p95": 153.3759981393814, + "p99": 156.3519984483719 }, "combine": { - "p50": 146.7839926481247, - "p90": 158.55999290943146, - "p95": 162.9440039396286, - "p99": 175.20000040531158 + "p50": 149.47199821472168, + "p90": 155.39200603961945, + "p95": 159.39199924468994, + "p99": 164.06400501728058 }, "roundtrip": { - "p50": 266.7520046234131, - "p90": 286.24001145362854, - "p95": 293.1840121746063, - "p99": 322.33598828315735 + "p50": 267.4880027770996, + "p90": 272.2879946231842, + "p95": 274.04800057411194, + "p99": 279.4879972934723 }, "isolatedSum": { - "p50": 294.17599737644196, - "p90": 324.0639865398407, - "p95": 337.15200424194336, - "p99": 372.3199963569641 + "p50": 297.92000353336334, + "p90": 307.3920011520386, + "p95": 312.76799738407135, + "p99": 320.41600346565247 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154542080, - "combineLogicalBytes": 154542080, - "fanoutMean": 5.263671875, - "recvTokensMax": 1365, - "stragglerRank": 2, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62845,35 +64385,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 203.74399423599243, - "p90": 221.11999988555908, - "p95": 229.95199263095856, - "p99": 253.08799743652344 + "p50": 201.12000405788422, + "p90": 204.48000729084015, + "p95": 206.04799687862396, + "p99": 212.22400665283203 }, "combine": { - "p50": 222.52799570560455, - "p90": 234.72000658512115, - "p95": 238.24000358581543, - "p99": 259.3280076980591 + "p50": 229.0239930152893, + "p90": 233.95200073719025, + "p95": 236.4480048418045, + "p99": 238.52799832820892 }, "roundtrip": { - "p50": 398.17601442337036, - "p90": 415.74400663375854, - "p95": 422.04800248146057, - "p99": 459.26401019096375 + "p50": 404.06399965286255, + "p90": 408.86399149894714, + "p95": 411.0719859600067, + "p99": 431.5840005874634 }, "isolatedSum": { - "p50": 426.271989941597, - "p90": 455.84000647068024, - "p95": 468.191996216774, - "p99": 512.4160051345825 + "p50": 430.1439970731735, + "p90": 438.4320080280304, + "p95": 442.49600172042847, + "p99": 450.75200498104095 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310589440, - "combineLogicalBytes": 310589440, - "fanoutMean": 5.289306640625, - "recvTokensMax": 2746, - "stragglerRank": 0, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62882,35 +64422,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 310.36800146102905, - "p90": 327.93599367141724, - "p95": 333.72798562049866, - "p99": 371.8079924583435 + "p50": 304.0960133075714, + "p90": 309.28000807762146, + "p95": 311.64801120758057, + "p99": 479.5520007610321 }, "combine": { - "p50": 355.9679985046387, - "p90": 369.4719970226288, - "p95": 383.07198882102966, - "p99": 431.4880073070526 + "p50": 366.11199378967285, + "p90": 372.8959858417511, + "p95": 374.55999851226807, + "p99": 383.4559917449951 }, "roundtrip": { - "p50": 641.9199705123901, - "p90": 660.9920263290405, - "p95": 668.9280271530151, - "p99": 718.9760208129883 + "p50": 644.0640091896057, + "p90": 650.1439809799194, + "p95": 652.1919965744019, + "p99": 656.5120220184326 }, "isolatedSum": { - "p50": 666.3359999656677, - "p90": 697.407990694046, - "p95": 716.7999744415283, - "p99": 803.2959997653961 + "p50": 670.2080070972443, + "p90": 682.1759939193726, + "p95": 686.2080097198486, + "p99": 863.0079925060272 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619171840, - "combineLogicalBytes": 619171840, - "fanoutMean": 5.272216796875, - "recvTokensMax": 5467, - "stragglerRank": 0, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62919,35 +64459,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 537.1519923210144, - "p90": 553.5680055618286, - "p95": 562.6559853553772, - "p99": 586.9759917259216 + "p50": 521.5039849281311, + "p90": 530.1120281219482, + "p95": 533.3759784698486, + "p99": 540.5120253562927 }, "combine": { - "p50": 612.1600270271301, - "p90": 625.0240206718445, - "p95": 633.8880062103271, - "p99": 660.863995552063 + "p50": 632.1920156478882, + "p90": 639.3280029296875, + "p95": 640.9599781036377, + "p99": 647.2960114479065 }, "roundtrip": { - "p50": 1119.968056678772, - "p90": 1136.064052581787, - "p95": 1145.2480554580688, - "p99": 1263.4880542755127 + "p50": 1123.9999532699585, + "p90": 1132.8959465026855, + "p95": 1135.807991027832, + "p99": 1143.5840129852295 }, "isolatedSum": { - "p50": 1149.3120193481445, - "p90": 1178.592026233673, - "p95": 1196.5439915657043, - "p99": 1247.8399872779846 + "p50": 1153.6960005760193, + "p90": 1169.4400310516357, + "p95": 1174.3359565734863, + "p99": 1187.8080368041992 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1238945792, - "combineLogicalBytes": 1238945792, - "fanoutMean": 5.2747802734375, - "recvTokensMax": 10913, - "stragglerRank": 0, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62956,35 +64496,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1003.8080215454102, - "p90": 1027.008056640625, - "p95": 1034.432053565979, - "p99": 1060.1919889450073 + "p50": 990.2399778366089, + "p90": 1009.4720125198364, + "p95": 1016.1279439926147, + "p99": 1026.8160104751587 }, "combine": { - "p50": 1111.0399961471558, - "p90": 1125.8879899978638, - "p95": 1135.3280544281006, - "p99": 1165.727972984314 + "p50": 1164.736032485962, + "p90": 1174.015998840332, + "p95": 1177.2799491882324, + "p99": 1183.9359998703003 }, "roundtrip": { - "p50": 2077.5039196014404, - "p90": 2101.6640663146973, - "p95": 2114.016056060791, - "p99": 2324.8000144958496 + "p50": 2116.895914077759, + "p90": 2137.7599239349365, + "p95": 2143.712043762207, + "p99": 2157.8240394592285 }, "isolatedSum": { - "p50": 2114.848017692566, - "p90": 2152.8960466384888, - "p95": 2169.7601079940796, - "p99": 2225.9199619293213 + "p50": 2154.976010322571, + "p90": 2183.4880113601685, + "p95": 2193.407893180847, + "p99": 2210.752010345459 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481747968, - "combineLogicalBytes": 2481747968, - "fanoutMean": 5.282989501953125, - "recvTokensMax": 21789, - "stragglerRank": 0, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -62992,16 +64532,16 @@ ] }, { - "id": "cx-59b7e35e", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", - "colorKey": "h200_b6aa6110", - "comparisonKey": "b89b8b0279afe699", + "id": "cx-0a66c8a3", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_552a4b73", + "comparisonKey": "44cbfb11e1668dc5", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:59.891356+00:00", + "generatedAt": "2026-06-27T00:01:00.044863+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -63009,17 +64549,18 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", + "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -63044,8 +64585,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "4caecd33bedf786", - "workloadId": "set:3:830e36e88869e222", + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:6709a02c31933a9f", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -63053,9 +64594,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271848591", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271848591", - "createdAt": "2026-06-26T23:55:33Z", + "id": "28271978834", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271978834", + "createdAt": "2026-06-27T00:01:00.044863+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -63063,35 +64604,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.46400117874146, - "p90": 133.85599851608276, - "p95": 141.15199446678162, - "p99": 168.12799870967865 + "p50": 124.7360035777092, + "p90": 130.68799674510956, + "p95": 132.03200697898865, + "p99": 136.4479959011078 }, "combine": { - "p50": 112.5440001487732, - "p90": 125.791996717453, - "p95": 132.1599930524826, - "p99": 143.327996134758 + "p50": 112.5119999051094, + "p90": 114.17599767446518, + "p95": 115.07199704647064, + "p99": 120.67200243473053 }, "roundtrip": { - "p50": 215.7440036535263, - "p90": 240.03200232982635, - "p95": 247.13599681854248, - "p99": 281.5360128879547 + "p50": 215.16799926757812, + "p90": 219.35999393463135, + "p95": 221.11999988555908, + "p99": 229.18400168418884 }, "isolatedSum": { - "p50": 235.00800132751465, - "p90": 259.64799523353577, - "p95": 273.3119875192642, - "p99": 311.45599484443665 + "p50": 237.2480034828186, + "p90": 244.86399441957474, + "p95": 247.1040040254593, + "p99": 257.1199983358383 }, "roundtripMeasured": true, "dispatchLogicalBytes": 49946624, "combineLogicalBytes": 49946624, "fanoutMean": 3.40234375, "recvTokensMax": 1022, - "stragglerRank": 5, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.7440025806427, + "p90": 167.26399958133698, + "p95": 168.44800114631653, + "p99": 174.6560037136078 + }, + "combine": { + "p50": 164.51199352741241, + "p90": 169.50400173664093, + "p95": 170.1440066099167, + "p99": 174.14399981498718 + }, + "roundtrip": { + "p50": 297.91998863220215, + "p90": 302.72001028060913, + "p95": 304.32000756263733, + "p99": 306.5600097179413 + }, + "isolatedSum": { + "p50": 328.2559961080551, + "p90": 336.7680013179779, + "p95": 338.5920077562332, + "p99": 348.80000352859497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63100,28 +64678,28 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 238.0480021238327, - "p90": 249.439999461174, - "p95": 253.34399938583374, - "p99": 271.39198780059814 + "p50": 237.44000494480133, + "p90": 241.82400107383728, + "p95": 243.0720031261444, + "p99": 247.74399399757385 }, "combine": { - "p50": 259.3280076980591, - "p90": 273.6639976501465, - "p95": 278.1440019607544, - "p99": 748.5759854316711 + "p50": 264.51200246810913, + "p90": 268.41598749160767, + "p95": 271.5519964694977, + "p99": 281.6320061683655 }, "roundtrip": { - "p50": 472.7039933204651, - "p90": 492.76798963546753, - "p95": 497.5360035896301, - "p99": 524.8640179634094 + "p50": 475.5840003490448, + "p90": 482.59198665618896, + "p95": 490.30399322509766, + "p99": 504.96000051498413 }, "isolatedSum": { - "p50": 497.3760098218918, - "p90": 523.1039971113205, - "p95": 531.4880013465881, - "p99": 1019.9679732322693 + "p50": 501.95200741291046, + "p90": 510.23998856544495, + "p95": 514.6239995956421, + "p99": 529.3760001659393 }, "roundtripMeasured": true, "dispatchLogicalBytes": 201678848, @@ -63133,39 +64711,113 @@ "samplesPooled": 600, "trials": 3 }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.040002822876, + "p90": 385.72800159454346, + "p95": 388.2240056991577, + "p99": 414.3359959125519 + }, + "combine": { + "p50": 447.00801372528076, + "p90": 452.4799883365631, + "p95": 453.5039961338043, + "p99": 456.89600706100464 + }, + "roundtrip": { + "p50": 800.2240061759949, + "p90": 805.791974067688, + "p95": 807.744026184082, + "p99": 811.680018901825 + }, + "isolatedSum": { + "p50": 826.0480165481567, + "p90": 838.2079899311066, + "p95": 841.728001832962, + "p99": 871.2320029735565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 671.0079908370972, - "p90": 682.7840209007263, - "p95": 691.6159987449646, - "p99": 782.4000120162964 + "p50": 675.3919720649719, + "p90": 695.6800222396851, + "p95": 707.8400254249573, + "p99": 910.8160138130188 }, "combine": { - "p50": 788.0319952964783, - "p90": 803.0400276184082, - "p95": 810.4000091552734, - "p99": 879.2639970779419 + "p50": 819.2319869995117, + "p90": 829.6639919281006, + "p95": 833.2160115242004, + "p99": 841.3439989089966 }, "roundtrip": { - "p50": 1432.5439929962158, - "p90": 1457.2800397872925, - "p95": 1470.2719449996948, - "p99": 1641.3120031356812 + "p50": 1459.9679708480835, + "p90": 1476.9599437713623, + "p95": 1481.8559885025024, + "p99": 1501.2799501419067 }, "isolatedSum": { - "p50": 1459.0399861335754, - "p90": 1485.8240485191345, - "p95": 1502.016007900238, - "p99": 1661.6640090942383 + "p50": 1494.6239590644836, + "p90": 1525.3440141677856, + "p95": 1541.0560369491577, + "p99": 1752.1600127220154 }, "roundtripMeasured": true, "dispatchLogicalBytes": 808822784, "combineLogicalBytes": 808822784, "fanoutMean": 3.44354248046875, "recvTokensMax": 16380, - "stragglerRank": 5, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1275.10404586792, + "p90": 1287.5200510025024, + "p95": 1291.8399572372437, + "p99": 1346.0479974746704 + }, + "combine": { + "p50": 1538.7200117111206, + "p90": 1550.3679513931274, + "p95": 1555.232048034668, + "p99": 1607.9360246658325 + }, + "roundtrip": { + "p50": 2787.168025970459, + "p90": 2798.784017562866, + "p95": 2802.9439449310303, + "p99": 2818.4640407562256 + }, + "isolatedSum": { + "p50": 2813.8240575790405, + "p90": 2837.88800239563, + "p95": 2847.0720052719116, + "p99": 2953.984022140503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63173,16 +64825,16 @@ ] }, { - "id": "cx-520b6c38", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h200_b6aa6110", - "comparisonKey": "b89b8b0279afe699", + "id": "cx-7114a01f", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_106a51ab", + "comparisonKey": "80b7db884aaf5a8c", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:30.997265+00:00", + "generatedAt": "2026-06-27T00:01:17.822701+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -63190,20 +64842,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", + "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -63225,18 +64878,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:6709a02c31933a9f", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272049186", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272049186", - "createdAt": "2026-06-27T00:01:57Z", + "id": "28271982260", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271982260", + "createdAt": "2026-06-27T00:01:17.822701+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -63244,35 +64897,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.14399874210358, - "p90": 134.14399325847626, - "p95": 146.01600170135498, - "p99": 162.62400150299072 + "p50": 114.30399864912033, + "p90": 120.31999975442886, + "p95": 121.56800180673599, + "p99": 125.02400577068329 }, "combine": { - "p50": 112.92800307273865, - "p90": 121.11999839544296, - "p95": 126.68800354003906, - "p99": 141.50400459766388 + "p50": 106.27199709415436, + "p90": 111.48799955844879, + "p95": 111.77600175142288, + "p99": 114.1119971871376 }, "roundtrip": { - "p50": 214.30400013923645, - "p90": 228.28799486160278, - "p95": 232.57599771022797, - "p99": 247.48800694942474 + "p50": 198.0160027742386, + "p90": 201.82399451732635, + "p95": 203.36000621318817, + "p99": 207.35999941825867 }, "isolatedSum": { - "p50": 235.07200181484222, - "p90": 255.26399165391922, - "p95": 272.70400524139404, - "p99": 304.1280061006546 + "p50": 220.5759957432747, + "p90": 231.80799931287766, + "p95": 233.34400355815887, + "p99": 239.1360029578209 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 0, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63281,35 +64934,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 170.43200135231018, - "p90": 187.71199882030487, - "p95": 196.0960030555725, - "p99": 223.00800681114197 + "p50": 144.9279934167862, + "p90": 149.85600113868713, + "p95": 151.45599842071533, + "p99": 155.87200224399567 }, "combine": { - "p50": 163.87200355529785, - "p90": 181.60000443458557, - "p95": 186.36800348758698, - "p99": 197.02400267124176 + "p50": 151.19999647140503, + "p90": 154.84799444675446, + "p95": 156.63999319076538, + "p99": 160.73599457740784 }, "roundtrip": { - "p50": 303.8400113582611, - "p90": 328.000009059906, - "p95": 333.0560028553009, - "p99": 366.2079870700836 + "p50": 266.11199975013733, + "p90": 271.5519964694977, + "p95": 273.6000120639801, + "p99": 277.1199941635132 }, "isolatedSum": { - "p50": 334.30400490760803, - "p90": 369.31200325489044, - "p95": 382.4640065431595, - "p99": 420.0320094823837 + "p50": 296.1279898881912, + "p90": 304.7039955854416, + "p95": 308.0959916114807, + "p99": 316.6079968214035 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63318,34 +64971,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 235.83999276161194, - "p90": 244.6720004081726, - "p95": 248.86399507522583, - "p99": 265.4080092906952 + "p50": 200.32000541687012, + "p90": 204.12799715995789, + "p95": 205.4399996995926, + "p99": 208.38400721549988 }, "combine": { - "p50": 259.90399718284607, - "p90": 269.6639895439148, - "p95": 276.06400847435, - "p99": 299.0399897098541 + "p50": 227.58400440216064, + "p90": 233.75999927520752, + "p95": 234.55999791622162, + "p99": 238.3359968662262 }, "roundtrip": { - "p50": 473.471999168396, - "p90": 492.12801456451416, - "p95": 498.3679950237274, - "p99": 528.544008731842 + "p50": 402.0479917526245, + "p90": 407.1039855480194, + "p95": 408.735990524292, + "p99": 412.06398606300354 }, "isolatedSum": { - "p50": 495.743989944458, - "p90": 514.3359899520874, - "p95": 524.9280035495758, - "p99": 564.4479990005493 + "p50": 427.90400981903076, + "p90": 437.8879964351654, + "p95": 439.9999976158142, + "p99": 446.7200040817261 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -63355,35 +65008,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 381.087988615036, - "p90": 397.47199416160583, - "p95": 404.35200929641724, - "p99": 493.4079945087433 + "p50": 303.16799879074097, + "p90": 307.3920011520386, + "p95": 308.76800417900085, + "p99": 313.27998638153076 }, "combine": { - "p50": 437.27999925613403, - "p90": 450.8799910545349, - "p95": 458.3039879798889, - "p99": 476.25601291656494 + "p50": 362.2399866580963, + "p90": 368.76800656318665, + "p95": 370.3039884567261, + "p99": 372.70399928092957 }, "roundtrip": { - "p50": 790.5600070953369, - "p90": 804.9920201301575, - "p95": 813.9200210571289, - "p99": 841.5359854698181 + "p50": 641.1839723587036, + "p90": 647.9359865188599, + "p95": 650.7520079612732, + "p99": 656.6399931907654 }, "isolatedSum": { - "p50": 818.36798787117, - "p90": 848.3519852161407, - "p95": 862.6559972763062, - "p99": 969.6640074253082 + "p50": 665.4079854488373, + "p90": 676.1600077152252, + "p95": 679.0719926357269, + "p99": 685.9839856624603 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 5, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63392,35 +65045,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 672.1280217170715, - "p90": 685.0879788398743, - "p95": 689.9200081825256, - "p99": 743.4560060501099 + "p50": 520.9919810295105, + "p90": 531.4239859580994, + "p95": 534.4640016555786, + "p99": 541.1840081214905 }, "combine": { - "p50": 783.1360101699829, - "p90": 793.0560111999512, - "p95": 796.6399788856506, - "p99": 806.5599799156189 + "p50": 639.3600106239319, + "p90": 650.592029094696, + "p95": 654.5600295066833, + "p99": 660.4800224304199 }, "roundtrip": { - "p50": 1425.7919788360596, - "p90": 1442.0160055160522, - "p95": 1455.4879665374756, - "p99": 1550.75204372406 + "p50": 1128.864049911499, + "p90": 1138.2720470428467, + "p95": 1141.2479877471924, + "p99": 1146.3040113449097 }, "isolatedSum": { - "p50": 1455.2640318870544, - "p90": 1478.1439900398254, - "p95": 1486.5599870681763, - "p99": 1550.0159859657288 + "p50": 1160.3519916534424, + "p90": 1182.0160150527954, + "p95": 1189.024031162262, + "p99": 1201.6640305519104 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 7, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63429,35 +65082,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1269.1839933395386, - "p90": 1284.1919660568237, - "p95": 1291.8720245361328, - "p99": 1339.2640352249146 + "p50": 1005.5999755859375, + "p90": 1031.7120552062988, + "p95": 1038.3360385894775, + "p99": 1051.103949546814 }, "combine": { - "p50": 1472.8000164031982, - "p90": 1489.8879528045654, - "p95": 1502.17604637146, - "p99": 1692.639946937561 + "p50": 1158.9759588241577, + "p90": 1167.8719520568848, + "p95": 1169.9199676513672, + "p99": 1174.6560335159302 }, "roundtrip": { - "p50": 2711.7760181427, - "p90": 2730.015993118286, - "p95": 2753.5040378570557, - "p99": 2926.464080810547 + "p50": 2121.5360164642334, + "p90": 2138.2720470428467, + "p95": 2142.6239013671875, + "p99": 2150.0160694122314 }, "isolatedSum": { - "p50": 2741.984009742737, - "p90": 2774.079918861389, - "p95": 2794.048070907593, - "p99": 3031.9039821624756 + "p50": 2164.575934410095, + "p90": 2199.5840072631836, + "p95": 2208.2560062408447, + "p99": 2225.759983062744 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 2, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63465,16 +65118,16 @@ ] }, { - "id": "cx-f5907eae", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", - "colorKey": "h200_c5b3365a", - "comparisonKey": "d19848fb38a35ed8", + "id": "cx-71b6107f", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_769b9c4b", + "comparisonKey": "24fc2cc385891299", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:20.998823+00:00", + "generatedAt": "2026-06-27T00:00:08.090138+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -63482,20 +65135,21 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -63517,18 +65171,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3dd868cb33839a3", - "workloadId": "set:3:1ca614e23cc66be1", + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271855852", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271855852", - "createdAt": "2026-06-26T23:55:47Z", + "id": "28271955196", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271955196", + "createdAt": "2026-06-27T00:00:08.090138+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -63536,35 +65190,72 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.72800236940384, - "p90": 127.48800218105316, - "p95": 133.91999900341034, - "p99": 146.11199498176575 + "p50": 111.07199639081955, + "p90": 115.93600362539291, + "p95": 118.14399808645248, + "p99": 121.08799815177917 }, "combine": { - "p50": 107.29599744081497, - "p90": 117.3119992017746, - "p95": 122.43200093507767, - "p99": 134.11200046539307 + "p50": 106.08000308275223, + "p90": 111.26399785280228, + "p95": 112.38399893045425, + "p99": 114.14399743080139 }, "roundtrip": { - "p50": 205.85599541664124, - "p90": 220.09600698947906, - "p95": 228.5120040178299, - "p99": 244.09599602222443 + "p50": 195.68000733852386, + "p90": 201.1840045452118, + "p95": 202.39999890327454, + "p99": 204.96000349521637 }, "isolatedSum": { - "p50": 225.0239998102188, - "p90": 244.80000138282776, - "p95": 256.351999938488, - "p99": 280.2239954471588 + "p50": 217.15199947357178, + "p90": 227.2000014781952, + "p95": 230.52799701690674, + "p99": 235.23199558258057 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.48000490665436, + "p90": 148.0640023946762, + "p95": 149.6960073709488, + "p99": 153.60000729560852 + }, + "combine": { + "p50": 148.92800152301788, + "p90": 154.33600544929504, + "p95": 155.008003115654, + "p99": 157.8879952430725 + }, + "roundtrip": { + "p50": 262.81601190567017, + "p90": 266.975998878479, + "p95": 268.3199942111969, + "p99": 272.44800329208374 + }, + "isolatedSum": { + "p50": 293.40800642967224, + "p90": 302.40000784397125, + "p95": 304.7040104866028, + "p99": 311.48800253868103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63573,35 +65264,72 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 232.89600014686584, - "p90": 245.12000381946564, - "p95": 253.4399926662445, - "p99": 292.03200340270996 + "p50": 199.68000054359436, + "p90": 203.42400670051575, + "p95": 205.47200739383698, + "p99": 222.52799570560455 }, "combine": { - "p50": 245.34399807453156, - "p90": 260.25599241256714, - "p95": 269.27998661994934, - "p99": 297.37600684165955 + "p50": 227.80799865722656, + "p90": 232.9919934272766, + "p95": 234.3679964542389, + "p99": 237.34399676322937 }, "roundtrip": { - "p50": 454.68801259994507, - "p90": 472.6080000400543, - "p95": 486.6560101509094, - "p99": 522.4639773368835 + "p50": 399.83999729156494, + "p90": 405.023992061615, + "p95": 406.3040018081665, + "p99": 414.43198919296265 }, "isolatedSum": { - "p50": 478.2399982213974, - "p90": 505.3759962320328, - "p95": 522.7199792861938, - "p99": 589.4080102443695 + "p50": 427.4879992008209, + "p90": 436.41600012779236, + "p95": 439.84000384807587, + "p99": 459.8719924688339 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.5840094089508, + "p90": 309.471994638443, + "p95": 310.4960024356842, + "p99": 313.82399797439575 + }, + "combine": { + "p50": 362.8480136394501, + "p90": 367.74399876594543, + "p95": 369.6320056915283, + "p99": 523.7119793891907 + }, + "roundtrip": { + "p50": 640.8320069313049, + "p90": 648.576021194458, + "p95": 651.2960195541382, + "p99": 733.4399819374084 + }, + "isolatedSum": { + "p50": 666.4320230484009, + "p90": 677.2159934043884, + "p95": 680.1280081272125, + "p99": 837.5359773635864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63610,70 +65338,108 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 662.335991859436, - "p90": 673.632025718689, - "p95": 681.2160015106201, - "p99": 744.5759773254395 + "p50": 524.3200063705444, + "p90": 533.5680246353149, + "p95": 536.191999912262, + "p99": 542.2080159187317 }, "combine": { - "p50": 772.5759744644165, - "p90": 791.8720245361328, - "p95": 806.6239953041077, - "p99": 855.2640080451965 + "p50": 643.9039707183838, + "p90": 653.1839966773987, + "p95": 655.8719873428345, + "p99": 661.1520051956177 }, "roundtrip": { - "p50": 1405.9840440750122, - "p90": 1435.2960586547852, - "p95": 1455.7119607925415, - "p99": 1716.3519859313965 + "p50": 1135.2959871292114, + "p90": 1144.8320150375366, + "p95": 1148.4800577163696, + "p99": 1153.92005443573 }, "isolatedSum": { - "p50": 1434.9119663238525, - "p90": 1465.5040502548218, - "p95": 1487.8399968147278, - "p99": 1599.839985370636 + "p50": 1168.2239770889282, + "p90": 1186.7520213127136, + "p95": 1192.0639872550964, + "p99": 1203.3600211143494 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 4, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1020.4800367355347, + "p90": 1048.8959550857544, + "p95": 1056.2560558319092, + "p99": 1071.4880228042603 + }, + "combine": { + "p50": 1164.6720170974731, + "p90": 1173.375964164734, + "p95": 1177.024006843567, + "p99": 1183.135986328125 + }, + "roundtrip": { + "p50": 2140.575885772705, + "p90": 2157.248020172119, + "p95": 2164.031982421875, + "p99": 2171.4560985565186 + }, + "isolatedSum": { + "p50": 2185.152053833008, + "p90": 2222.2719192504883, + "p95": 2233.280062675476, + "p99": 2254.6240091323853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, { - "id": "cx-75dcaec2", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", - "colorKey": "h200_c5b3365a", - "comparisonKey": "d19848fb38a35ed8", + "id": "cx-19a8d159", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_7b3247bf", + "comparisonKey": "0ac8f8817cb63abb", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:55.820445+00:00", + "generatedAt": "2026-06-26T17:30:47.651979+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -63682,14 +65448,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -63698,8 +65464,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "bbcd1d9d8d1e4fe", - "workloadId": "set:6:1ca614e23cc66be1", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -63707,45 +65473,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272093905", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272093905", - "createdAt": "2026-06-27T00:03:20Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:30:47.651979+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.14399808645248, - "p90": 130.17599284648895, - "p95": 135.5839967727661, - "p99": 147.07200229167938 + "p50": 110.46399921178818, + "p90": 116.35199934244156, + "p95": 117.8240031003952, + "p99": 166.01599752902985 }, "combine": { - "p50": 108.83200168609619, - "p90": 120.57600170373917, - "p95": 127.55200266838074, - "p99": 140.73599874973297 + "p50": 106.1440035700798, + "p90": 111.51999980211258, + "p95": 112.06399649381638, + "p99": 114.07999694347382 }, "roundtrip": { - "p50": 206.65599405765533, - "p90": 219.04000639915466, - "p95": 224.48000311851501, - "p99": 242.0479953289032 + "p50": 197.40800559520721, + "p90": 200.9280025959015, + "p95": 203.0400037765503, + "p99": 206.01600408554077 }, "isolatedSum": { - "p50": 226.97599977254868, - "p90": 250.75199455022812, - "p95": 263.13599944114685, - "p99": 287.80800104141235 + "p50": 216.60800278186798, + "p90": 227.87199914455414, + "p95": 229.88799959421158, + "p99": 280.09599447250366 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 1, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63754,35 +65520,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 155.45600652694702, - "p90": 165.95199704170227, - "p95": 173.92000555992126, - "p99": 202.39999890327454 + "p50": 147.39200472831726, + "p90": 150.68799257278442, + "p95": 151.7760008573532, + "p99": 154.33600544929504 }, "combine": { - "p50": 150.94399452209473, - "p90": 162.59199380874634, - "p95": 170.3680008649826, - "p99": 186.24000251293182 + "p50": 145.1839953660965, + "p90": 149.88799393177032, + "p95": 151.67999267578125, + "p99": 154.7199934720993 }, "roundtrip": { - "p50": 287.6800000667572, - "p90": 302.94400453567505, - "p95": 309.7279965877533, - "p99": 357.7919900417328 + "p50": 262.4000012874603, + "p90": 267.2640085220337, + "p95": 269.27998661994934, + "p99": 357.34400153160095 }, "isolatedSum": { - "p50": 306.40000104904175, - "p90": 328.5439908504486, - "p95": 344.28800642490387, - "p99": 388.64000141620636 + "p50": 292.57600009441376, + "p90": 300.57598650455475, + "p95": 303.45599353313446, + "p99": 309.05599892139435 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 45688832, - "combineLogicalBytes": 45688832, - "fanoutMean": 1.55615234375, - "recvTokensMax": 2048, - "stragglerRank": 5, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63791,34 +65557,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 230.9119999408722, - "p90": 242.11199581623077, - "p95": 249.66399371623993, - "p99": 269.8880136013031 + "p50": 204.92799580097198, + "p90": 219.39200162887573, + "p95": 221.76000475883484, + "p99": 226.4000028371811 }, "combine": { - "p50": 247.16800451278687, - "p90": 260.5760097503662, - "p95": 264.6400034427643, - "p99": 289.66400027275085 + "p50": 217.15199947357178, + "p90": 221.3120013475418, + "p95": 224.57599639892578, + "p99": 227.743998169899 }, "roundtrip": { - "p50": 456.86399936676025, - "p90": 473.28001260757446, - "p95": 481.1519980430603, - "p99": 534.8799824714661 + "p50": 392.60798692703247, + "p90": 397.47199416160583, + "p95": 400.09599924087524, + "p99": 421.37598991394043 }, "isolatedSum": { - "p50": 478.08000445365906, - "p90": 502.688005566597, - "p95": 514.3039971590042, - "p99": 559.552013874054 + "p50": 422.07999527454376, + "p90": 440.70400297641754, + "p95": 446.3360011577606, + "p99": 454.1440010070801 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -63828,35 +65594,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 374.2719888687134, - "p90": 386.6559863090515, - "p95": 397.5679874420166, - "p99": 506.0480237007141 + "p50": 319.93600726127625, + "p90": 324.8960077762604, + "p95": 327.1679878234863, + "p99": 330.55999875068665 }, "combine": { - "p50": 423.1039881706238, - "p90": 436.0319972038269, - "p95": 440.8319890499115, - "p99": 470.97599506378174 + "p50": 330.01598715782166, + "p90": 335.1680040359497, + "p95": 336.64000034332275, + "p99": 340.2239978313446 }, "roundtrip": { - "p50": 771.232008934021, - "p90": 783.9679718017578, - "p95": 795.5520153045654, - "p99": 828.4800052642822 + "p50": 624.064028263092, + "p90": 629.2480230331421, + "p95": 631.6159963607788, + "p99": 638.2399797439575 }, "isolatedSum": { - "p50": 797.3759770393372, - "p90": 822.6879835128784, - "p95": 838.3999764919281, - "p99": 977.0240187644958 + "p50": 649.9519944190979, + "p90": 660.0640118122101, + "p95": 663.8079881668091, + "p99": 670.7839965820312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 183916544, - "combineLogicalBytes": 183916544, - "fanoutMean": 1.5660400390625, - "recvTokensMax": 8192, - "stragglerRank": 7, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63865,35 +65631,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 661.8559956550598, - "p90": 673.9199757575989, - "p95": 679.6159744262695, - "p99": 697.5039839744568 + "p50": 570.9440112113953, + "p90": 584.5119953155518, + "p95": 589.1519784927368, + "p99": 593.9199924468994 }, "combine": { - "p50": 770.6559896469116, - "p90": 781.1520099639893, - "p95": 786.7839932441711, - "p99": 830.560028553009 + "p50": 564.9920105934143, + "p90": 574.3039846420288, + "p95": 576.7999887466431, + "p99": 583.5199952125549 }, "roundtrip": { - "p50": 1405.791997909546, - "p90": 1421.280026435852, - "p95": 1432.2559833526611, - "p99": 1481.6319942474365 + "p50": 1105.5680513381958, + "p90": 1120.1599836349487, + "p95": 1124.7680187225342, + "p99": 1134.719967842102 }, "isolatedSum": { - "p50": 1432.5119853019714, - "p90": 1455.0719857215881, - "p95": 1466.3999676704407, - "p99": 1528.0640125274658 + "p50": 1135.9360218048096, + "p90": 1158.8159799575806, + "p95": 1165.9519672393799, + "p99": 1177.4399876594543 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 1, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63902,35 +65668,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1246.2400197982788, - "p90": 1261.631965637207, - "p95": 1269.5679664611816, - "p99": 1482.5600385665894 + "p50": 1075.8719444274902, + "p90": 1088.703989982605, + "p95": 1093.5360193252563, + "p99": 1102.463960647583 }, "combine": { - "p50": 1440.384030342102, - "p90": 1459.455966949463, - "p95": 1471.519947052002, - "p99": 1634.0479850769043 + "p50": 1031.872034072876, + "p90": 1041.3119792938232, + "p95": 1044.4799661636353, + "p99": 1055.359959602356 }, "roundtrip": { - "p50": 2662.400007247925, - "p90": 2688.096046447754, - "p95": 2712.4478816986084, - "p99": 2846.719980239868 + "p50": 2082.304000854492, + "p90": 2096.640110015869, + "p95": 2100.895881652832, + "p99": 2108.031988143921 }, "isolatedSum": { - "p50": 2686.624050140381, - "p90": 2721.08793258667, - "p95": 2741.0879135131836, - "p99": 3116.6080236434937 + "p50": 2107.743978500366, + "p90": 2130.015969276428, + "p95": 2138.0159854888916, + "p99": 2157.823920249939 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 734720000, - "combineLogicalBytes": 734720000, - "fanoutMean": 1.56402587890625, - "recvTokensMax": 32768, - "stragglerRank": 6, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -63938,50 +65704,51 @@ ] }, { - "id": "cx-9bcc6cfd", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", - "colorKey": "h200_06aa1194", - "comparisonKey": "fe01776775c5fb5e", + "id": "cx-107dd39c", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", + "colorKey": "h100_716e65b9", + "comparisonKey": "ea5a5b6f1b74dc9d", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:23.968491+00:00", + "generatedAt": "2026-06-26T17:31:48.643579+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", + "runner": "h100-dgxc-slurm_04", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "label": "H100 EP8 · deepep · bf16 (norm) · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -63990,54 +65757,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "46855e7fa6754eb", - "workloadId": "set:6:1ca614e23cc66be1", + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.38995361328125, - "eplbImbalanceAfter": 1.0000210716610862, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272097307", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272097307", - "createdAt": "2026-06-27T00:03:27Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254367516", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", + "createdAt": "2026-06-26T17:31:48.643579+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.79200285673141, - "p90": 122.94399738311768, - "p95": 127.96799838542938, - "p99": 138.33600282669067 + "p50": 126.65599584579468, + "p90": 131.74399733543396, + "p95": 132.83200562000275, + "p99": 139.80799913406372 }, "combine": { - "p50": 104.38399761915207, - "p90": 111.35999858379364, - "p95": 117.79200285673141, - "p99": 128.63999605178833 + "p50": 120.4800009727478, + "p90": 122.40000069141388, + "p95": 124.28800016641617, + "p99": 129.12000715732574 }, "roundtrip": { - "p50": 197.82400131225586, - "p90": 205.85599541664124, - "p95": 212.351992726326, - "p99": 252.86400318145752 + "p50": 221.40799462795258, + "p90": 226.49599611759186, + "p95": 227.77600586414337, + "p99": 232.16000199317932 }, "isolatedSum": { - "p50": 222.17600047588348, - "p90": 234.30399596691132, - "p95": 245.7600012421608, - "p99": 266.975998878479 + "p50": 247.13599681854248, + "p90": 254.14399802684784, + "p95": 257.1200057864189, + "p99": 268.92800629138947 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 79206400, - "combineLogicalBytes": 79206400, - "fanoutMean": 5.3955078125, - "recvTokensMax": 713, - "stragglerRank": 0, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64046,35 +65813,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 145.60000598430634, - "p90": 154.14400398731232, - "p95": 158.39999914169312, - "p99": 173.63199591636658 + "p50": 174.04800653457642, + "p90": 177.5359958410263, + "p95": 179.29600179195404, + "p99": 190.0160014629364 }, "combine": { - "p50": 145.6959992647171, - "p90": 150.56000649929047, - "p95": 155.2640050649643, - "p99": 165.56799411773682 + "p50": 172.67200350761414, + "p90": 174.52800273895264, + "p95": 175.4239946603775, + "p99": 180.28800189495087 }, "roundtrip": { - "p50": 267.520010471344, - "p90": 276.99199318885803, - "p95": 283.03998708724976, - "p99": 307.3599934577942 + "p50": 317.05600023269653, + "p90": 321.3759958744049, + "p95": 322.4320113658905, + "p99": 326.04798674583435 }, "isolatedSum": { - "p50": 291.29600524902344, - "p90": 304.7040104866028, - "p95": 313.6640042066574, - "p99": 339.1999900341034 + "p50": 346.72001004219055, + "p90": 352.06399857997894, + "p95": 354.71999645233154, + "p99": 370.30400335788727 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 159330304, - "combineLogicalBytes": 159330304, - "fanoutMean": 5.4267578125, - "recvTokensMax": 1436, - "stragglerRank": 0, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64083,35 +65850,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 204.79999482631683, - "p90": 213.85599672794342, - "p95": 218.27200055122375, - "p99": 238.52799832820892 + "p50": 260.70401072502136, + "p90": 264.41600918769836, + "p95": 265.76000452041626, + "p99": 269.6639895439148 }, "combine": { - "p50": 219.4879949092865, - "p90": 226.9439995288849, - "p95": 233.66400599479675, - "p99": 274.944007396698 + "p50": 255.13601303100586, + "p90": 258.2080066204071, + "p95": 259.5840096473694, + "p99": 263.5520100593567 }, "roundtrip": { - "p50": 400.160014629364, - "p90": 409.7279906272888, - "p95": 419.16799545288086, - "p99": 445.6320106983185 + "p50": 489.3760085105896, + "p90": 493.696004152298, + "p95": 495.0079917907715, + "p99": 498.9120066165924 }, "isolatedSum": { - "p50": 424.28798973560333, - "p90": 440.7999962568283, - "p95": 451.9360065460205, - "p99": 513.4720057249069 + "p50": 515.8400237560272, + "p90": 522.6240158081055, + "p95": 525.3440141677856, + "p99": 533.2159996032715 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 319535104, - "combineLogicalBytes": 319535104, - "fanoutMean": 5.441650390625, - "recvTokensMax": 2897, - "stragglerRank": 0, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64120,35 +65887,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 317.7599906921387, - "p90": 327.87200808525085, - "p95": 340.06398916244507, - "p99": 393.3440148830414 + "p50": 437.6640021800995, + "p90": 443.7119960784912, + "p95": 445.248007774353, + "p99": 449.50398802757263 }, "combine": { - "p50": 356.1600148677826, - "p90": 364.6079897880554, - "p95": 369.82399225234985, - "p99": 396.8319892883301 + "p50": 422.14399576187134, + "p90": 426.07998847961426, + "p95": 427.90400981903076, + "p99": 431.0399889945984 }, "roundtrip": { - "p50": 649.6959924697876, - "p90": 660.3519916534424, - "p95": 664.7040247917175, - "p99": 683.4239959716797 + "p50": 834.0799808502197, + "p90": 840.3199911117554, + "p95": 842.8159952163696, + "p99": 852.512001991272 }, "isolatedSum": { - "p50": 673.9200055599213, - "p90": 692.4799978733063, - "p95": 709.8879814147949, - "p99": 790.1760041713715 + "p50": 859.8079979419708, + "p90": 869.7919845581055, + "p95": 873.1520175933838, + "p99": 880.543977022171 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 638410752, - "combineLogicalBytes": 638410752, - "fanoutMean": 5.43603515625, - "recvTokensMax": 5815, - "stragglerRank": 1, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64157,35 +65924,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 541.1199927330017, - "p90": 549.8560070991516, - "p95": 555.4239749908447, - "p99": 643.6160206794739 + "p50": 802.623987197876, + "p90": 819.7439908981323, + "p95": 822.3680257797241, + "p99": 830.3359746932983 }, "combine": { - "p50": 614.8800253868103, - "p90": 626.3039708137512, - "p95": 632.2240233421326, - "p99": 680.8639764785767 + "p50": 751.9360184669495, + "p90": 759.6160173416138, + "p95": 762.0480060577393, + "p99": 765.5680179595947 }, "roundtrip": { - "p50": 1131.7440271377563, - "p90": 1142.7839994430542, - "p95": 1148.192048072815, - "p99": 1196.768045425415 + "p50": 1521.9520330429077, + "p90": 1534.208059310913, + "p95": 1541.4400100708008, + "p99": 1552.5120496749878 }, "isolatedSum": { - "p50": 1156.000018119812, - "p90": 1176.1599779129028, - "p95": 1187.6479983329773, - "p99": 1324.4799971580505 + "p50": 1554.5600056648254, + "p90": 1579.360008239746, + "p95": 1584.4160318374634, + "p99": 1595.903992652893 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1275144192, - "combineLogicalBytes": 1275144192, - "fanoutMean": 5.42889404296875, - "recvTokensMax": 11606, - "stragglerRank": 6, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64194,34 +65961,34 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1027.9680490493774, - "p90": 1046.720027923584, - "p95": 1055.4239749908447, - "p99": 1100.000023841858 + "p50": 1529.0240049362183, + "p90": 1539.5519733428955, + "p95": 1543.4880256652832, + "p99": 1549.504041671753 }, "combine": { - "p50": 1124.384045600891, - "p90": 1135.9679698944092, - "p95": 1140.8640146255493, - "p99": 1170.9760427474976 + "p50": 1399.6479511260986, + "p90": 1406.7840576171875, + "p95": 1409.440040588379, + "p99": 1416.767954826355 }, "roundtrip": { - "p50": 2114.5920753479004, - "p90": 2138.495922088623, - "p95": 2152.127981185913, - "p99": 2480.2560806274414 + "p50": 2903.520107269287, + "p90": 2916.3520336151123, + "p95": 2920.2558994293213, + "p99": 2930.016040802002 }, "isolatedSum": { - "p50": 2152.3520946502686, - "p90": 2182.687997817993, - "p95": 2196.287989616394, - "p99": 2270.9760665893555 + "p50": 2928.671956062317, + "p90": 2946.336030960083, + "p95": 2952.928066253662, + "p99": 2966.271996498108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2546374656, - "combineLogicalBytes": 2546374656, - "fanoutMean": 5.420562744140625, - "recvTokensMax": 23170, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -64230,34 +65997,35 @@ ] }, { - "id": "cx-e075077e", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", - "colorKey": "h200_6a794fcd", - "comparisonKey": "b6c24dab2941895d", + "id": "cx-a1762095", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", + "colorKey": "h100_f7ec28aa", + "comparisonKey": "18d3cab3936a264e", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:10.125267+00:00", + "generatedAt": "2026-06-26T17:29:07.856119+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "label": "H100 EP8 · deepep · bf16 (norm) · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -64266,14 +66034,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -64282,8 +66050,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cf93f8f6b52e428", - "workloadId": "set:6:a224603e5a1640b8", + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -64291,44 +66059,44 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272065129", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272065129", - "createdAt": "2026-06-27T00:02:24Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254376151", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", + "createdAt": "2026-06-26T17:29:07.856119+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 126.52799487113953, - "p90": 140.3840035200119, - "p95": 146.17599546909332, - "p99": 177.08800733089447 + "p50": 119.03999745845795, + "p90": 125.44000148773193, + "p95": 126.01600587368011, + "p99": 130.68799674510956 }, "combine": { - "p50": 116.73600226640701, - "p90": 128.86400520801544, - "p95": 133.63200426101685, - "p99": 143.8719928264618 + "p50": 111.32799834012985, + "p90": 113.92000317573547, + "p95": 114.33599889278412, + "p99": 119.77600306272507 }, "roundtrip": { - "p50": 216.35200083255768, - "p90": 234.3360036611557, - "p95": 240.25599658489227, - "p99": 277.3120105266571 + "p50": 207.42399990558624, + "p90": 212.351992726326, + "p95": 214.56000208854675, + "p99": 233.3119958639145 }, "isolatedSum": { - "p50": 243.26399713754654, - "p90": 269.24800872802734, - "p95": 279.80799973011017, - "p99": 320.96000015735626 + "p50": 230.3679957985878, + "p90": 239.3600046634674, + "p95": 240.35200476646423, + "p99": 250.46399980783463 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -64338,34 +66106,34 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 163.96799683570862, - "p90": 176.256000995636, - "p95": 180.4479956626892, - "p99": 201.50400698184967 + "p50": 159.42400693893433, + "p90": 165.8879965543747, + "p95": 166.6879951953888, + "p99": 169.69600319862366 }, "combine": { - "p50": 160.41600704193115, - "p90": 173.0560064315796, - "p95": 178.3680021762848, - "p99": 186.75200641155243 + "p50": 156.19200468063354, + "p90": 162.49600052833557, + "p95": 163.26400637626648, + "p99": 168.83200407028198 }, "roundtrip": { - "p50": 298.94399642944336, - "p90": 319.487988948822, - "p95": 328.0960023403168, - "p99": 354.65601086616516 + "p50": 290.336012840271, + "p90": 296.4160144329071, + "p95": 298.43199253082275, + "p99": 313.4399950504303 }, "isolatedSum": { - "p50": 324.38400387763977, - "p90": 349.3120074272156, - "p95": 358.815997838974, - "p99": 388.2560133934021 + "p50": 315.61601161956787, + "p90": 328.38399708271027, + "p95": 329.9520015716553, + "p99": 338.52800726890564 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 140879872, - "combineLogicalBytes": 140879872, - "fanoutMean": 4.79833984375, - "recvTokensMax": 1972, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -64375,34 +66143,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 237.72799968719482, - "p90": 252.48000025749207, - "p95": 263.8719975948334, - "p99": 307.16800689697266 + "p50": 234.78400707244873, + "p90": 240.22400379180908, + "p95": 242.20800399780273, + "p99": 246.2719976902008 }, "combine": { - "p50": 262.1760070323944, - "p90": 279.1999876499176, - "p95": 284.7999930381775, - "p99": 311.8399977684021 + "p50": 244.47999894618988, + "p90": 252.16001272201538, + "p95": 254.8159956932068, + "p99": 262.4959945678711 }, "roundtrip": { - "p50": 477.82400250434875, - "p90": 500.70399045944214, - "p95": 516.5759921073914, - "p99": 701.632022857666 + "p50": 450.81600546836853, + "p90": 456.83199167251587, + "p95": 458.624005317688, + "p99": 499.1680085659027 }, "isolatedSum": { - "p50": 499.90400671958923, - "p90": 531.6799879074097, - "p95": 548.6719906330109, - "p99": 619.0080046653748 + "p50": 479.2640060186386, + "p90": 492.38401651382446, + "p95": 497.0239996910095, + "p99": 508.7679922580719 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 282333184, - "combineLogicalBytes": 282333184, - "fanoutMean": 4.80810546875, - "recvTokensMax": 3936, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -64412,34 +66180,34 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 378.495991230011, - "p90": 390.04799723625183, - "p95": 399.58399534225464, - "p99": 429.6320080757141 + "p50": 379.8399865627289, + "p90": 387.58400082588196, + "p95": 389.60000872612, + "p99": 392.9600119590759 }, "combine": { - "p50": 439.9360120296478, - "p90": 452.2880017757416, - "p95": 457.15200901031494, - "p99": 474.047988653183 + "p50": 402.72000432014465, + "p90": 408.35198760032654, + "p95": 410.5280041694641, + "p99": 414.2400026321411 }, "roundtrip": { - "p50": 797.4079847335815, - "p90": 816.32000207901, - "p95": 828.6399841308594, - "p99": 955.839991569519 + "p50": 753.600001335144, + "p90": 759.8080039024353, + "p95": 761.5039944648743, + "p99": 764.959990978241 }, "isolatedSum": { - "p50": 818.4320032596588, - "p90": 842.3359990119934, - "p95": 856.7360043525696, - "p99": 903.6799967288971 + "p50": 782.5599908828735, + "p90": 795.9359884262085, + "p95": 800.1280128955841, + "p99": 807.200014591217 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 566716416, - "combineLogicalBytes": 566716416, - "fanoutMean": 4.8255615234375, - "recvTokensMax": 7855, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -64449,35 +66217,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 679.4559955596924, - "p90": 694.208025932312, - "p95": 704.255998134613, - "p99": 742.8159713745117 + "p50": 663.7120246887207, + "p90": 672.1919775009155, + "p95": 675.9359836578369, + "p99": 683.0080151557922 }, "combine": { - "p50": 780.7040214538574, - "p90": 795.1679825782776, - "p95": 804.7360181808472, - "p99": 879.7439932823181 + "p50": 711.5839719772339, + "p90": 725.5359888076782, + "p95": 729.8880219459534, + "p99": 740.0320172309875 }, "roundtrip": { - "p50": 1432.0640563964844, - "p90": 1453.279972076416, - "p95": 1465.8559560775757, - "p99": 1602.3039817810059 + "p50": 1344.383955001831, + "p90": 1357.5999736785889, + "p95": 1361.0880374908447, + "p99": 1368.6399459838867 }, "isolatedSum": { - "p50": 1460.1600170135498, - "p90": 1489.3760085105896, - "p95": 1508.9920163154602, - "p99": 1622.5599646568298 + "p50": 1375.2959966659546, + "p90": 1397.7279663085938, + "p95": 1405.8240056037903, + "p99": 1423.0400323867798 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1132285952, - "combineLogicalBytes": 1132285952, - "fanoutMean": 4.8206787109375, - "recvTokensMax": 15694, - "stragglerRank": 6, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64486,35 +66254,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1287.071943283081, - "p90": 1304.8959970474243, - "p95": 1310.7839822769165, - "p99": 1432.2240352630615 + "p50": 1251.1359453201294, + "p90": 1264.8320198059082, + "p95": 1269.6640491485596, + "p99": 1279.0080308914185 }, "combine": { - "p50": 1463.6160135269165, - "p90": 1483.8080406188965, - "p95": 1511.7119550704956, - "p99": 1699.0400552749634 + "p50": 1326.9120454788208, + "p90": 1337.3440504074097, + "p95": 1343.008041381836, + "p99": 1352.5439500808716 }, "roundtrip": { - "p50": 2723.9038944244385, - "p90": 2744.607925415039, - "p95": 2758.2719326019287, - "p99": 2967.616081237793 + "p50": 2547.0080375671387, + "p90": 2561.2800121307373, + "p95": 2564.863920211792, + "p99": 2581.696033477783 }, "isolatedSum": { - "p50": 2750.6879568099976, - "p90": 2788.704037666321, - "p95": 2822.495937347412, - "p99": 3131.264090538025 + "p50": 2578.04799079895, + "p90": 2602.176070213318, + "p95": 2612.6720905303955, + "p99": 2631.55198097229 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2267840512, - "combineLogicalBytes": 2267840512, - "fanoutMean": 4.82763671875, - "recvTokensMax": 31357, - "stragglerRank": 6, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64522,34 +66290,35 @@ ] }, { - "id": "cx-f4768a96", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", - "colorKey": "h200_b2ffaf91", - "comparisonKey": "d826aaa5f1321f31", + "id": "cx-6339c695", + "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", + "colorKey": "h100_93503624", + "comparisonKey": "99696dfafd6d026a", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:16.163335+00:00", + "generatedAt": "2026-06-26T17:46:27.794881+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, @@ -64558,14 +66327,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -64574,54 +66343,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "27ddc85ded0add9", - "workloadId": "set:6:a224603e5a1640b8", + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.545684814453125, - "eplbImbalanceAfter": 1.0001495361328125, + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272068834", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272068834", - "createdAt": "2026-06-27T00:02:31Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28255296001", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", + "createdAt": "2026-06-26T17:46:27.794881+00:00", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.07999759912491, - "p90": 128.63999605178833, - "p95": 134.62400436401367, - "p99": 156.2879979610443 + "p50": 110.72000116109848, + "p90": 114.78400230407715, + "p95": 116.57600104808807, + "p99": 121.0239976644516 }, "combine": { - "p50": 105.47199845314026, - "p90": 114.43199962377548, - "p95": 119.19999867677689, - "p99": 136.09600067138672 + "p50": 105.8880016207695, + "p90": 111.35999858379364, + "p95": 112.0000034570694, + "p99": 114.56000059843063 }, "roundtrip": { - "p50": 197.24799692630768, - "p90": 206.01600408554077, - "p95": 211.0079973936081, - "p99": 226.01599991321564 + "p50": 195.99999487400055, + "p90": 200.00000298023224, + "p95": 201.24800503253937, + "p99": 205.59999346733093 }, "isolatedSum": { - "p50": 223.55199605226517, - "p90": 243.0719956755638, - "p95": 253.82400304079056, - "p99": 292.38399863243103 + "p50": 216.60800278186798, + "p90": 226.1440008878708, + "p95": 228.57600450515747, + "p99": 235.58399826288223 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78159872, - "combineLogicalBytes": 78159872, - "fanoutMean": 5.32421875, - "recvTokensMax": 702, - "stragglerRank": 3, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64630,35 +66399,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 149.08799529075623, - "p90": 157.27999806404114, - "p95": 161.56800091266632, - "p99": 172.83199727535248 + "p50": 144.31999623775482, + "p90": 148.0640023946762, + "p95": 149.24800395965576, + "p99": 152.0960032939911 }, "combine": { - "p50": 143.77599954605103, - "p90": 148.99200201034546, - "p95": 152.12799608707428, - "p99": 163.68000209331512 + "p50": 146.62399888038635, + "p90": 151.10400319099426, + "p95": 152.51199901103973, + "p99": 155.32800555229187 }, "roundtrip": { - "p50": 265.28000831604004, - "p90": 273.50398898124695, - "p95": 279.35999631881714, - "p99": 293.37599873542786 + "p50": 260.8959972858429, + "p90": 265.3760015964508, + "p95": 266.400009393692, + "p99": 270.7520127296448 }, "isolatedSum": { - "p50": 292.86399483680725, - "p90": 306.2720000743866, - "p95": 313.6959969997406, - "p99": 336.5119993686676 + "p50": 290.9439951181412, + "p90": 299.16800558567047, + "p95": 301.7600029706955, + "p99": 307.42400884628296 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156563456, - "combineLogicalBytes": 156563456, - "fanoutMean": 5.33251953125, - "recvTokensMax": 1393, - "stragglerRank": 3, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64667,35 +66436,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 206.2080055475235, - "p90": 216.99200570583344, - "p95": 223.03999960422516, - "p99": 264.44798707962036 + "p50": 205.56800067424774, + "p90": 210.36800742149353, + "p95": 212.09600567817688, + "p99": 214.6880030632019 }, "combine": { - "p50": 225.40800273418427, - "p90": 233.37599635124207, - "p95": 238.65599930286407, - "p99": 253.56799364089966 + "p50": 214.78399634361267, + "p90": 219.13599967956543, + "p95": 220.70400416851044, + "p99": 225.2800017595291 }, "roundtrip": { - "p50": 404.4800102710724, - "p90": 415.2959883213043, - "p95": 423.552006483078, - "p99": 451.9039988517761 + "p50": 394.8799967765808, + "p90": 400.2879858016968, + "p95": 401.88801288604736, + "p99": 407.9680144786835 }, "isolatedSum": { - "p50": 431.61600828170776, - "p90": 450.3680020570755, - "p95": 461.69599890708923, - "p99": 518.01598072052 + "p50": 420.3519970178604, + "p90": 429.50400710105896, + "p95": 432.8000098466873, + "p99": 439.968004822731 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312410112, - "combineLogicalBytes": 312410112, - "fanoutMean": 5.3203125, - "recvTokensMax": 2773, - "stragglerRank": 3, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64704,34 +66473,34 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 313.27998638153076, - "p90": 324.8960077762604, - "p95": 334.7199857234955, - "p99": 349.2160141468048 + "p50": 326.9760012626648, + "p90": 332.35201239585876, + "p95": 334.46401357650757, + "p99": 337.98399567604065 }, "combine": { - "p50": 357.05599188804626, - "p90": 370.59199810028076, - "p95": 381.4080059528351, - "p99": 418.43199729919434 + "p50": 338.75200152397156, + "p90": 346.0479974746704, + "p95": 347.4240005016327, + "p99": 379.5199990272522 }, "roundtrip": { - "p50": 643.7439918518066, - "p90": 656.0959815979004, - "p95": 666.2399768829346, - "p99": 702.9759883880615 + "p50": 642.8160071372986, + "p90": 650.6879925727844, + "p95": 652.895987033844, + "p99": 658.7520241737366 }, "isolatedSum": { - "p50": 670.335978269577, - "p90": 695.4880058765411, - "p95": 716.1279916763306, - "p99": 767.6480114459991 + "p50": 665.7280027866364, + "p90": 678.4000098705292, + "p95": 681.8880140781403, + "p99": 717.5039947032928 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 622712832, - "combineLogicalBytes": 622712832, - "fanoutMean": 5.3023681640625, - "recvTokensMax": 5498, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -64741,35 +66510,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 530.1439762115479, - "p90": 539.5519733428955, - "p95": 543.008029460907, - "p99": 568.9600110054016 + "p50": 584.0640068054199, + "p90": 592.6719903945923, + "p95": 595.5520272254944, + "p99": 601.2160181999207 }, "combine": { - "p50": 611.5840077400208, - "p90": 622.048020362854, - "p95": 629.2799711227417, - "p99": 677.5040030479431 + "p50": 568.8639879226685, + "p90": 576.9280195236206, + "p95": 579.3920159339905, + "p99": 584.5119953155518 }, "roundtrip": { - "p50": 1115.488052368164, - "p90": 1129.248023033142, - "p95": 1135.583996772766, - "p99": 1275.6479978561401 + "p50": 1122.3679780960083, + "p90": 1133.8560581207275, + "p95": 1138.6239528656006, + "p99": 1146.783947944641 }, "isolatedSum": { - "p50": 1141.7279839515686, - "p90": 1161.5999937057495, - "p95": 1172.2880005836487, - "p99": 1246.4640140533447 + "p50": 1152.9279947280884, + "p90": 1169.600009918213, + "p95": 1174.9440431594849, + "p99": 1185.7280135154724 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1245038592, - "combineLogicalBytes": 1245038592, - "fanoutMean": 5.30072021484375, - "recvTokensMax": 10955, - "stragglerRank": 4, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64778,35 +66547,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 986.1119985580444, - "p90": 1002.2720098495483, - "p95": 1011.0080242156982, - "p99": 1069.0239667892456 + "p50": 1107.200026512146, + "p90": 1119.0400123596191, + "p95": 1124.384045600891, + "p99": 1133.344054222107 }, "combine": { - "p50": 1125.3440380096436, - "p90": 1136.6080045700073, - "p95": 1142.3360109329224, - "p99": 1163.8400554656982 + "p50": 1020.6719636917114, + "p90": 1029.1839838027954, + "p95": 1032.1919918060303, + "p99": 1037.8559827804565 }, "roundtrip": { - "p50": 2081.088066101074, - "p90": 2097.9840755462646, - "p95": 2111.0079288482666, - "p99": 2311.743974685669 + "p50": 2098.4959602355957, + "p90": 2110.1760864257812, + "p95": 2113.856077194214, + "p99": 2120.60809135437 }, "isolatedSum": { - "p50": 2111.456036567688, - "p90": 2138.8800144195557, - "p95": 2153.3440351486206, - "p99": 2232.864022254944 + "p50": 2127.8719902038574, + "p90": 2148.2239961624146, + "p95": 2156.5760374069214, + "p99": 2171.2000370025635 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2489460736, - "combineLogicalBytes": 2489460736, - "fanoutMean": 5.299407958984375, - "recvTokensMax": 21864, - "stragglerRank": 7, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64814,34 +66583,35 @@ ] }, { - "id": "cx-e1ecd1d4", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h200_f2b19f62", - "comparisonKey": "a7c9c0202574b9d0", + "id": "cx-96b1ca55", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_5df912ff", + "comparisonKey": "9fdbd6763ea7346a", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:45.749249+00:00", + "generatedAt": "2026-06-26T17:28:17.076570+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "label": "H100 EP8 · deepep · bf16 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -64850,14 +66620,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -64866,8 +66636,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:6709a02c31933a9f", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -64875,45 +66645,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272079152", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272079152", - "createdAt": "2026-06-27T00:02:51Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254332840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", + "createdAt": "2026-06-26T17:28:17.076570+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 122.94399738311768, - "p90": 134.20799374580383, - "p95": 138.87999951839447, - "p99": 150.87999403476715 + "p50": 101.31199657917023, + "p90": 105.69600015878677, + "p95": 107.55199939012527, + "p99": 110.84800213575363 }, "combine": { - "p50": 111.90400272607803, - "p90": 122.43200093507767, - "p95": 128.38399410247803, - "p99": 136.4479959011078 + "p50": 105.82400113344193, + "p90": 107.42399841547012, + "p95": 108.60799998044968, + "p99": 112.64000087976456 }, "roundtrip": { - "p50": 213.8880044221878, - "p90": 230.43200373649597, - "p95": 236.735999584198, - "p99": 261.4080011844635 + "p50": 183.1360012292862, + "p90": 188.03200125694275, + "p95": 188.960000872612, + "p99": 195.13599574565887 }, "isolatedSum": { - "p50": 234.8480001091957, - "p90": 256.6399946808815, - "p95": 267.2639936208725, - "p99": 287.32798993587494 + "p50": 207.13599771261215, + "p90": 213.1199985742569, + "p95": 216.15999937057495, + "p99": 223.4880030155182 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64922,35 +66692,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 161.72799468040466, - "p90": 174.20800030231476, - "p95": 182.49599635601044, - "p99": 194.72000002861023 + "p50": 132.89600610733032, + "p90": 137.08800077438354, + "p95": 138.2399946451187, + "p99": 140.70400595664978 }, "combine": { - "p50": 158.27199816703796, - "p90": 174.8799979686737, - "p95": 179.58399653434753, - "p99": 191.26400351524353 + "p50": 144.96000111103058, + "p90": 147.5840061903, + "p95": 148.28799664974213, + "p99": 152.63999998569489 }, "roundtrip": { - "p50": 296.9920039176941, - "p90": 319.0079927444458, - "p95": 327.2320032119751, - "p99": 340.03201127052307 + "p50": 249.56800043582916, + "p90": 253.53598594665527, + "p95": 254.59200143814087, + "p99": 256.73601031303406 }, "isolatedSum": { - "p50": 319.9999928474426, - "p90": 349.08799827098846, - "p95": 362.07999289035797, - "p99": 385.98400354385376 + "p50": 277.8560072183609, + "p90": 284.67200696468353, + "p95": 286.52799129486084, + "p99": 293.34400594234467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64959,35 +66729,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 237.34399676322937, - "p90": 252.19199061393738, - "p95": 259.48798656463623, - "p99": 274.0800082683563 + "p50": 193.4400051832199, + "p90": 202.68799364566803, + "p95": 203.87199521064758, + "p99": 209.9519968032837 }, "combine": { - "p50": 260.44800877571106, - "p90": 278.2079875469208, - "p95": 284.7999930381775, - "p99": 298.880010843277 + "p50": 216.8319970369339, + "p90": 220.92799842357635, + "p95": 223.55200350284576, + "p99": 226.04799270629883 }, "roundtrip": { - "p50": 475.1040041446686, - "p90": 495.2319860458374, - "p95": 509.3119740486145, - "p99": 531.8080186843872 + "p50": 382.4959993362427, + "p90": 387.7759873867035, + "p95": 388.7679874897003, + "p99": 392.767995595932 }, "isolatedSum": { - "p50": 497.79200553894043, - "p90": 530.3999781608582, - "p95": 544.2879796028137, - "p99": 572.9600191116333 + "p50": 410.2720022201538, + "p90": 423.6159920692444, + "p95": 427.42399871349335, + "p99": 435.9999895095825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -64996,35 +66766,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 379.4879913330078, - "p90": 389.60000872612, - "p95": 395.6800103187561, - "p99": 409.92000699043274 + "p50": 315.0720000267029, + "p90": 320.1279938220978, + "p95": 322.04800844192505, + "p99": 324.5759904384613 }, "combine": { - "p50": 438.1760060787201, - "p90": 452.06400752067566, - "p95": 457.69599080085754, - "p99": 494.59201097488403 + "p50": 329.27998900413513, + "p90": 333.3759903907776, + "p95": 335.61599254608154, + "p99": 338.9120101928711 }, "roundtrip": { - "p50": 794.2079901695251, - "p90": 809.7919821739197, - "p95": 823.6799836158752, - "p99": 875.6160140037537 + "p50": 619.0720200538635, + "p90": 625.2480149269104, + "p95": 627.839982509613, + "p99": 630.7839751243591 }, "isolatedSum": { - "p50": 817.6639974117279, - "p90": 841.6640162467957, - "p95": 853.3760011196136, - "p99": 904.5120179653168 + "p50": 644.351989030838, + "p90": 653.5039842128754, + "p95": 657.6640009880066, + "p99": 663.4880006313324 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65033,35 +66803,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 671.2319850921631, - "p90": 682.6879978179932, - "p95": 689.2480254173279, - "p99": 929.0879964828491 + "p50": 560.8959794044495, + "p90": 569.8879957199097, + "p95": 572.1920132637024, + "p99": 577.2799849510193 }, "combine": { - "p50": 786.7839932441711, - "p90": 799.1999983787537, - "p95": 804.2880296707153, - "p99": 833.6960077285767 + "p50": 563.3599758148193, + "p90": 573.248028755188, + "p95": 576.3840079307556, + "p99": 580.672025680542 }, "roundtrip": { - "p50": 1430.0800561904907, - "p90": 1449.9200582504272, - "p95": 1461.3120555877686, - "p99": 1667.8080558776855 + "p50": 1093.727946281433, + "p90": 1102.6240587234497, + "p95": 1105.5999994277954, + "p99": 1112.0959520339966 }, "isolatedSum": { - "p50": 1458.0159783363342, - "p90": 1481.8879961967468, - "p95": 1493.5360550880432, - "p99": 1762.7840042114258 + "p50": 1124.2559552192688, + "p90": 1143.1360244750977, + "p95": 1148.576021194458, + "p99": 1157.9520106315613 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65070,73 +66840,74 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1269.2480087280273, - "p90": 1284.5439910888672, - "p95": 1292.9919958114624, - "p99": 1424.064040184021 + "p50": 1059.0720176696777, + "p90": 1071.7439651489258, + "p95": 1074.8480558395386, + "p99": 1091.2959575653076 }, "combine": { - "p50": 1480.6400537490845, - "p90": 1504.7039985656738, - "p95": 1519.10400390625, - "p99": 1724.0320444107056 + "p50": 1026.8800258636475, + "p90": 1036.2880229949951, + "p95": 1038.7840270996094, + "p99": 1047.4879741668701 }, "roundtrip": { - "p50": 2719.4879055023193, - "p90": 2740.70405960083, - "p95": 2764.8000717163086, - "p99": 3076.0960578918457 + "p50": 2055.1679134368896, + "p90": 2067.13604927063, + "p95": 2069.823980331421, + "p99": 2075.5200386047363 }, "isolatedSum": { - "p50": 2749.888062477112, - "p90": 2789.247989654541, - "p95": 2812.0959997177124, - "p99": 3148.0960845947266 + "p50": 2085.952043533325, + "p90": 2108.031988143921, + "p95": 2113.632082939148, + "p99": 2138.7839317321777 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, - "correct": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, "samplesPooled": 600, "trials": 3 } ] }, { - "id": "cx-f58892d6", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h200_bac4102c", - "comparisonKey": "402825358de599a6", + "id": "cx-1ed69eb7", + "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_17694d2c", + "comparisonKey": "379c3371e525c0fb", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:49.601548+00:00", + "generatedAt": "2026-06-26T23:48:34.870060+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "label": "H100 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" @@ -65158,18 +66929,18 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:6709a02c31933a9f", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272082600", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272082600", - "createdAt": "2026-06-27T00:02:58Z", + "id": "28271555838", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271555838", + "createdAt": "2026-06-26T23:48:34.870060+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -65177,35 +66948,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.72800236940384, - "p90": 136.25599443912506, - "p95": 140.8960074186325, - "p99": 185.34399569034576 + "p50": 105.27999699115753, + "p90": 110.36799848079681, + "p95": 112.12799698114395, + "p99": 115.23199826478958 }, "combine": { - "p50": 103.61599922180176, - "p90": 115.9679964184761, - "p95": 122.49600142240524, - "p99": 137.7599984407425 + "p50": 106.175996363163, + "p90": 108.0000028014183, + "p95": 111.1999973654747, + "p99": 113.72800171375275 }, "roundtrip": { - "p50": 197.02400267124176, - "p90": 215.13600647449493, - "p95": 222.6240038871765, - "p99": 233.43999683856964 + "p50": 183.3599954843521, + "p90": 188.48000466823578, + "p95": 190.17599523067474, + "p99": 193.56800615787506 }, "isolatedSum": { - "p50": 221.3440015912056, - "p90": 252.22399085760117, - "p95": 263.39200884103775, - "p99": 323.10399413108826 + "p50": 211.45599335432053, + "p90": 218.36800128221512, + "p95": 223.32799434661865, + "p99": 228.95999997854233 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 6, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65214,35 +66985,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 145.21600306034088, - "p90": 166.1120057106018, - "p95": 175.1679927110672, - "p99": 194.91200149059296 + "p50": 134.5919966697693, + "p90": 140.06400108337402, + "p95": 142.46399700641632, + "p99": 146.88000082969666 }, "combine": { - "p50": 144.22400295734406, - "p90": 156.2879979610443, - "p95": 161.18399798870087, - "p99": 171.90399765968323 + "p50": 152.12799608707428, + "p90": 158.36800634860992, + "p95": 161.0880047082901, + "p99": 162.81600296497345 }, "roundtrip": { - "p50": 262.87999749183655, - "p90": 277.5999903678894, - "p95": 286.3999903202057, - "p99": 298.97600412368774 + "p50": 254.46400046348572, + "p90": 259.93600487709045, + "p95": 262.4639868736267, + "p99": 268.2560086250305 }, "isolatedSum": { - "p50": 289.44000601768494, - "p90": 322.4000036716461, - "p95": 336.35199069976807, - "p99": 366.8159991502762 + "p50": 286.71999275684357, + "p90": 298.43200743198395, + "p95": 303.5520017147064, + "p99": 309.6960037946701 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 6, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65251,35 +67022,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 203.3279985189438, - "p90": 218.36799383163452, - "p95": 226.1440008878708, - "p99": 242.8479939699173 + "p50": 188.960000872612, + "p90": 194.97600197792053, + "p95": 198.11199605464935, + "p99": 202.5279998779297 }, "combine": { - "p50": 223.00800681114197, - "p90": 237.5359982252121, - "p95": 245.7599937915802, - "p99": 267.2959864139557 + "p50": 228.67199778556824, + "p90": 236.09599471092224, + "p95": 237.05600202083588, + "p99": 241.08800292015076 }, "roundtrip": { - "p50": 399.77601170539856, - "p90": 420.415997505188, - "p95": 433.1839978694916, - "p99": 505.40798902511597 + "p50": 391.90399646759033, + "p90": 399.80798959732056, + "p95": 402.3999869823456, + "p99": 424.0959882736206 }, "isolatedSum": { - "p50": 426.33600533008575, - "p90": 455.9039920568466, - "p95": 471.903994679451, - "p99": 510.143980383873 + "p50": 417.63199865818024, + "p90": 431.0719966888428, + "p95": 435.16799807548523, + "p99": 443.61600279808044 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 4, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65288,35 +67059,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 312.1280074119568, - "p90": 327.7119994163513, - "p95": 334.879994392395, - "p99": 400.4479944705963 + "p50": 294.0160036087036, + "p90": 311.3279938697815, + "p95": 315.20000100135803, + "p99": 326.07999444007874 }, "combine": { - "p50": 352.7680039405823, - "p90": 362.527996301651, - "p95": 367.6159977912903, - "p99": 386.0799968242645 + "p50": 366.1760091781616, + "p90": 382.9120099544525, + "p95": 391.32800698280334, + "p99": 407.039999961853 }, "roundtrip": { - "p50": 641.1839723587036, - "p90": 658.1119894981384, - "p95": 666.0159826278687, - "p99": 719.5199728012085 + "p50": 632.9600214958191, + "p90": 674.3680238723755, + "p95": 687.3279809951782, + "p99": 835.3919982910156 }, "isolatedSum": { - "p50": 664.8960113525391, - "p90": 690.2399957180023, - "p95": 702.4959921836853, - "p99": 786.5279912948608 + "p50": 660.1920127868652, + "p90": 694.240003824234, + "p95": 706.5280079841614, + "p99": 733.1199944019318 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65325,35 +67096,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 527.5200009346008, - "p90": 542.4320101737976, - "p95": 550.4639744758606, - "p99": 575.2959847450256 + "p50": 509.15199518203735, + "p90": 521.8560099601746, + "p95": 526.1120200157166, + "p99": 533.0560207366943 }, "combine": { - "p50": 620.3839778900146, - "p90": 633.5999965667725, - "p95": 639.2639875411987, - "p99": 673.8560199737549 + "p50": 635.2319717407227, + "p90": 645.5680131912231, + "p95": 649.4719982147217, + "p99": 656.3839912414551 }, "roundtrip": { - "p50": 1121.1520433425903, - "p90": 1137.0879411697388, - "p95": 1147.3599672317505, - "p99": 1174.7519969940186 + "p50": 1114.9760484695435, + "p90": 1128.0319690704346, + "p95": 1131.9680213928223, + "p99": 1147.711992263794 }, "isolatedSum": { - "p50": 1147.9039788246155, - "p90": 1176.03200674057, - "p95": 1189.7279620170593, - "p99": 1249.1520047187805 + "p50": 1144.38396692276, + "p90": 1167.4240231513977, + "p95": 1175.5840182304382, + "p99": 1189.4400119781494 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 6, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65362,35 +67133,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1003.3919811248779, - "p90": 1031.5200090408325, - "p95": 1040.4160022735596, - "p99": 1070.2400207519531 + "p50": 970.848023891449, + "p90": 995.136022567749, + "p95": 1001.7919540405273, + "p99": 1016.1279439926147 }, "combine": { - "p50": 1121.9840049743652, - "p90": 1135.7760429382324, - "p95": 1145.0239419937134, - "p99": 1167.8400039672852 + "p50": 1156.3199758529663, + "p90": 1167.1040058135986, + "p95": 1172.287940979004, + "p99": 1184.928059577942 }, "roundtrip": { - "p50": 2083.0399990081787, - "p90": 2113.568067550659, - "p95": 2122.431993484497, - "p99": 2277.791976928711 + "p50": 2089.279890060425, + "p90": 2105.664014816284, + "p95": 2110.431909561157, + "p99": 2118.0479526519775 }, "isolatedSum": { - "p50": 2125.375986099243, - "p90": 2167.296051979065, - "p95": 2185.439944267273, - "p99": 2238.0800247192383 + "p50": 2127.1679997444153, + "p90": 2162.2400283813477, + "p95": 2174.0798950195312, + "p99": 2201.0560035705566 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65398,16 +67169,16 @@ ] }, { - "id": "cx-8c2088d8", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h200_1eda221e", - "comparisonKey": "6ee0b18a3e276ae1", + "id": "cx-39ba4bd5", + "identity": "h100|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_a96c99f3", + "comparisonKey": "b9c15d0905ec0061", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:37.741116+00:00", + "generatedAt": "2026-06-27T11:13:58.971427+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -65415,21 +67186,22 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "label": "H100 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -65450,54 +67222,54 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272052634", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272052634", - "createdAt": "2026-06-27T00:02:03Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287505969", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287505969", + "createdAt": "2026-06-27T11:13:58.971427+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 118.65600198507309, - "p90": 133.56800377368927, - "p95": 139.1039937734604, - "p99": 146.97599411010742 + "p50": 79.03999835252762, + "p90": 84.63999629020691, + "p95": 88.28800171613693, + "p99": 108.89600217342377 }, "combine": { - "p50": 104.3199971318245, - "p90": 118.01599711179733, - "p95": 121.76000326871872, - "p99": 131.77600502967834 + "p50": 77.02399790287018, + "p90": 80.1599994301796, + "p95": 82.0159986615181, + "p99": 85.85599809885025 }, "roundtrip": { - "p50": 197.02400267124176, - "p90": 214.75200355052948, - "p95": 219.67999637126923, - "p99": 230.97600042819977 + "p50": 171.64799571037292, + "p90": 178.01600694656372, + "p95": 187.74400651454926, + "p99": 233.50399732589722 }, "isolatedSum": { - "p50": 222.97599911689758, - "p90": 251.5840008854866, - "p95": 260.8639970421791, - "p99": 278.75199913978577 + "p50": 156.0639962553978, + "p90": 164.7999957203865, + "p95": 170.30400037765503, + "p99": 194.75200027227402 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 2, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65506,35 +67278,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 144.41600441932678, - "p90": 161.8880033493042, - "p95": 168.96000504493713, - "p99": 186.43200397491455 + "p50": 95.39200365543365, + "p90": 113.82400244474411, + "p95": 119.61600184440613, + "p99": 132.57600367069244 }, "combine": { - "p50": 143.19999516010284, - "p90": 153.08800339698792, - "p95": 157.4079990386963, - "p99": 164.60800170898438 + "p50": 103.74400019645691, + "p90": 114.43199962377548, + "p95": 121.24799937009811, + "p99": 155.2640050649643 }, "roundtrip": { - "p50": 262.87999749183655, - "p90": 275.32801032066345, - "p95": 282.4000120162964, - "p99": 291.00799560546875 + "p50": 235.9679937362671, + "p90": 250.62400102615356, + "p95": 265.1839852333069, + "p99": 275.2000093460083 }, "isolatedSum": { - "p50": 287.6159995794296, - "p90": 314.9760067462921, - "p95": 326.3680040836334, - "p99": 351.0400056838989 + "p50": 199.13600385189056, + "p90": 228.2560020685196, + "p95": 240.86400121450424, + "p99": 287.84000873565674 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 2, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65543,35 +67315,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 203.0079960823059, - "p90": 220.768004655838, - "p95": 227.55199670791626, - "p99": 253.63200902938843 + "p50": 126.8479973077774, + "p90": 132.7040046453476, + "p95": 135.74400544166565, + "p99": 147.45600521564484 }, "combine": { - "p50": 219.4879949092865, - "p90": 227.52000391483307, - "p95": 231.23200237751007, - "p99": 248.79999458789825 + "p50": 159.96800363063812, + "p90": 164.95999693870544, + "p95": 166.49599373340607, + "p99": 175.9359985589981 }, "roundtrip": { - "p50": 397.0560133457184, - "p90": 409.5039963722229, - "p95": 413.4719967842102, - "p99": 425.82398653030396 + "p50": 375.8719861507416, + "p90": 385.4080140590668, + "p95": 393.18400621414185, + "p99": 407.9680144786835 }, "isolatedSum": { - "p50": 422.4959909915924, - "p90": 448.2880085706711, - "p95": 458.78399908542633, - "p99": 502.4320036172867 + "p50": 286.8160009384155, + "p90": 297.66400158405304, + "p95": 302.2399991750717, + "p99": 323.39200377464294 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 6, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65580,35 +67352,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 311.5839958190918, - "p90": 334.52799916267395, - "p95": 339.2319977283478, - "p99": 353.88800501823425 + "p50": 191.55199825763702, + "p90": 209.08799767494202, + "p95": 216.15999937057495, + "p99": 227.55199670791626 }, "combine": { - "p50": 350.20801424980164, - "p90": 362.0480000972748, - "p95": 365.9839928150177, - "p99": 423.71198534965515 + "p50": 267.2320008277893, + "p90": 272.3200023174286, + "p95": 273.6000120639801, + "p99": 275.84001421928406 }, "roundtrip": { - "p50": 636.7999911308289, - "p90": 650.1439809799194, - "p95": 654.2080044746399, - "p99": 711.4560008049011 + "p50": 636.2879872322083, + "p90": 641.8560147285461, + "p95": 644.1280245780945, + "p99": 809.0239763259888 }, "isolatedSum": { - "p50": 661.7920100688934, - "p90": 696.5759992599487, - "p95": 705.2159905433655, - "p99": 777.5999903678894 + "p50": 458.78399908542633, + "p90": 481.4079999923706, + "p95": 489.76001143455505, + "p99": 503.3920109272003 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 1, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65617,35 +67389,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 528.3839702606201, - "p90": 545.7599759101868, - "p95": 551.9999861717224, - "p99": 572.2879767417908 + "p50": 307.776004076004, + "p90": 323.35999608039856, + "p95": 325.21599531173706, + "p99": 330.6879997253418 }, "combine": { - "p50": 608.959972858429, - "p90": 620.9920048713684, - "p95": 626.1119842529297, - "p99": 657.0559740066528 + "p50": 460.9279930591583, + "p90": 468.4160053730011, + "p95": 470.2720046043396, + "p99": 600.9600162506104 }, "roundtrip": { - "p50": 1110.2720499038696, - "p90": 1125.0239610671997, - "p95": 1132.032036781311, - "p99": 1183.0079555511475 + "p50": 1133.8239908218384, + "p90": 1142.5600051879883, + "p95": 1146.1759805679321, + "p99": 1151.3279676437378 }, "isolatedSum": { - "p50": 1137.343943119049, - "p90": 1166.7519807815552, - "p95": 1178.111970424652, - "p99": 1229.3439507484436 + "p50": 768.7039971351624, + "p90": 791.7760014533997, + "p95": 795.4879999160767, + "p99": 931.6480159759521 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 6, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65654,35 +67426,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 994.4639801979065, - "p90": 1016.1600112915039, - "p95": 1023.9039659500122, - "p99": 1042.0479774475098 + "p50": 546.3039875030518, + "p90": 551.2959957122803, + "p95": 553.4719824790955, + "p99": 559.6799850463867 }, "combine": { - "p50": 1103.2960414886475, - "p90": 1116.2559986114502, - "p95": 1121.7600107192993, - "p99": 1139.4879817962646 + "p50": 841.6640162467957, + "p90": 850.816011428833, + "p95": 854.1120290756226, + "p99": 870.1440095901489 }, "roundtrip": { - "p50": 2056.544065475464, - "p90": 2077.9199600219727, - "p95": 2088.671922683716, - "p99": 2251.3279914855957 + "p50": 2148.0960845947266, + "p90": 2161.184072494507, + "p95": 2165.440082550049, + "p99": 2175.0400066375732 }, "isolatedSum": { - "p50": 2097.760021686554, - "p90": 2132.416009902954, - "p95": 2145.6639766693115, - "p99": 2181.5359592437744 + "p50": 1387.9680037498474, + "p90": 1402.1120071411133, + "p95": 1407.584011554718, + "p99": 1429.8239946365356 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 1, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65690,50 +67462,51 @@ ] }, { - "id": "cx-8e568434", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_c851a534", - "comparisonKey": "1f9e00010b0d6e5b", + "id": "cx-8fb1cb65", + "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_97196257", + "comparisonKey": "d361c128552b2ee8", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:59.726916+00:00", + "generatedAt": "2026-06-26T23:51:51.842450+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm)", + "label": "H100 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -65742,8 +67515,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -65751,45 +67524,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254392935", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", - "createdAt": "2026-06-26T17:28:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271695735", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271695735", + "createdAt": "2026-06-26T23:51:51.842450+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 116.44800007343292, - "p90": 126.97599828243256, - "p95": 137.92000710964203, - "p99": 159.96800363063812 + "p50": 196.28800451755524, + "p90": 202.33599841594696, + "p95": 203.96800339221954, + "p99": 210.07999777793884 }, "combine": { - "p50": 103.55199873447418, - "p90": 113.11999708414078, - "p95": 120.80000340938568, - "p99": 147.10399508476257 + "p50": 85.11999994516373, + "p90": 87.5839963555336, + "p95": 89.72799777984619, + "p99": 93.24800223112106 }, "roundtrip": { - "p50": 194.62400674819946, - "p90": 208.19200575351715, - "p95": 215.39199352264404, - "p99": 238.75199258327484 + "p50": 266.7520046234131, + "p90": 273.824006319046, + "p95": 277.5680124759674, + "p99": 291.83998703956604 }, "isolatedSum": { - "p50": 219.9999988079071, - "p90": 240.09599536657333, - "p95": 258.7200105190277, - "p99": 307.0719987154007 + "p50": 281.40800446271896, + "p90": 289.91999477148056, + "p95": 293.69600117206573, + "p99": 303.3280000090599 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65798,35 +67571,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 146.97599411010742, - "p90": 163.07200491428375, - "p95": 171.77599668502808, - "p99": 191.42399728298187 + "p50": 248.3839988708496, + "p90": 296.7680096626282, + "p95": 299.80799555778503, + "p99": 321.28000259399414 }, "combine": { - "p50": 142.84799993038177, - "p90": 154.78399395942688, - "p95": 165.12000560760498, - "p99": 172.28800058364868 + "p50": 118.81600320339203, + "p90": 125.15200674533844, + "p95": 126.17599964141846, + "p99": 128.06400656700134 }, "roundtrip": { - "p50": 267.0080065727234, - "p90": 288.9600098133087, - "p95": 295.77600955963135, - "p99": 315.71200489997864 + "p50": 353.85599732398987, + "p90": 407.9360067844391, + "p95": 410.3040099143982, + "p99": 414.40001130104065 }, "isolatedSum": { - "p50": 289.8239940404892, - "p90": 317.85599887371063, - "p95": 336.89600229263306, - "p99": 363.71199786663055 + "p50": 367.20000207424164, + "p90": 421.9200164079666, + "p95": 425.9839951992035, + "p99": 449.3440091609955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 2, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65835,35 +67608,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 207.68000185489655, - "p90": 228.64000499248505, - "p95": 236.92800104618073, - "p99": 267.90401339530945 + "p50": 341.43999218940735, + "p90": 394.0480053424835, + "p95": 396.5440094470978, + "p99": 400.41598677635193 }, "combine": { - "p50": 210.36800742149353, - "p90": 225.0239998102188, - "p95": 234.68799889087677, - "p99": 271.58400416374207 + "p50": 185.12000143527985, + "p90": 191.3280040025711, + "p95": 193.05600225925446, + "p99": 195.74399292469025 }, "roundtrip": { - "p50": 390.49598574638367, - "p90": 413.37600350379944, - "p95": 420.28799653053284, - "p99": 449.8240053653717 + "p50": 510.0160241127014, + "p90": 567.3919916152954, + "p95": 570.8479881286621, + "p99": 574.176013469696 }, "isolatedSum": { - "p50": 418.0480092763901, - "p90": 453.66400480270386, - "p95": 471.6159999370575, - "p99": 539.4880175590515 + "p50": 526.5599936246872, + "p90": 585.3760093450546, + "p95": 589.6000117063522, + "p99": 596.1599797010422 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65872,35 +67645,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 324.8960077762604, - "p90": 341.5679931640625, - "p95": 351.4559864997864, - "p99": 364.73599076271057 + "p50": 517.9839730262756, + "p90": 568.6720013618469, + "p95": 574.0159749984741, + "p99": 579.8079967498779 }, "combine": { - "p50": 328.0960023403168, - "p90": 339.6480083465576, - "p95": 345.95200419425964, - "p99": 362.8480136394501 + "p50": 291.26399755477905, + "p90": 295.80798745155334, + "p95": 297.08799719810486, + "p99": 299.96800422668457 }, "roundtrip": { - "p50": 628.9600133895874, - "p90": 643.231987953186, - "p95": 649.3120193481445, - "p99": 664.3199920654297 + "p50": 794.2079901695251, + "p90": 801.3120293617249, + "p95": 804.095983505249, + "p99": 814.4959807395935 }, "isolatedSum": { - "p50": 652.9920101165771, - "p90": 681.2160015106201, - "p95": 697.407990694046, - "p99": 727.5840044021606 + "p50": 809.2479705810547, + "p90": 864.4799888134003, + "p95": 871.103972196579, + "p99": 879.7760009765625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 6, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65909,34 +67682,34 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 566.3679838180542, - "p90": 581.0880064964294, - "p95": 587.2960090637207, - "p99": 609.1520190238953 + "p50": 962.5599980354309, + "p90": 971.2640047073364, + "p95": 973.0560183525085, + "p99": 978.3999919891357 }, "combine": { - "p50": 560.9920024871826, - "p90": 573.0559825897217, - "p95": 578.2399773597717, - "p99": 609.7279787063599 + "p50": 513.1199955940247, + "p90": 523.5520005226135, + "p95": 526.0800123214722, + "p99": 531.9039821624756 }, "roundtrip": { - "p50": 1097.3440408706665, - "p90": 1114.400029182434, - "p95": 1121.791958808899, - "p99": 1286.6239547729492 + "p50": 1460.576057434082, + "p90": 1472.4160432815552, + "p95": 1476.6080379486084, + "p99": 1773.3759880065918 }, "isolatedSum": { - "p50": 1127.3599863052368, - "p90": 1154.1439890861511, - "p95": 1165.5359864234924, - "p99": 1218.8799977302551 + "p50": 1475.6799936294556, + "p90": 1494.81600522995, + "p95": 1499.1360306739807, + "p99": 1510.3039741516113 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -65946,35 +67719,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1051.8079996109009, - "p90": 1067.8720474243164, - "p95": 1078.271985054016, - "p99": 1161.4079475402832 + "p50": 1818.5919523239136, + "p90": 1826.9439935684204, + "p95": 1829.7280073165894, + "p99": 1833.8559865951538 }, "combine": { - "p50": 1028.9920568466187, - "p90": 1044.0959930419922, - "p95": 1054.4320344924927, - "p99": 1218.783974647522 + "p50": 930.3359985351562, + "p90": 939.7119879722595, + "p95": 942.8160190582275, + "p99": 948.0640292167664 }, "roundtrip": { - "p50": 2049.3760108947754, - "p90": 2068.4800148010254, - "p95": 2079.200029373169, - "p99": 2593.600034713745 + "p50": 2736.9279861450195, + "p90": 2750.3039836883545, + "p95": 2755.199909210205, + "p99": 2763.64803314209 }, "isolatedSum": { - "p50": 2080.8000564575195, - "p90": 2111.9680404663086, - "p95": 2132.704019546509, - "p99": 2380.191922187805 + "p50": 2748.92795085907, + "p90": 2766.65598154068, + "p95": 2772.544026374817, + "p99": 2781.92001581192 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -65982,50 +67755,51 @@ ] }, { - "id": "cx-6764a75f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", - "colorKey": "h200_a1e795ec", - "comparisonKey": "5a22622d9db14749", + "id": "cx-26196af1", + "identity": "h100|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_a96c99f3", + "comparisonKey": "fd1c952adc3abb43", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:54.944678+00:00", + "generatedAt": "2026-06-27T11:13:32.456116+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · balanced", + "label": "H100 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -66034,8 +67808,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -66043,44 +67817,44 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254443915", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", - "createdAt": "2026-06-26T17:29:22Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28287494014", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287494014", + "createdAt": "2026-06-27T11:13:32.456116+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 133.88800621032715, - "p90": 147.16799557209015, - "p95": 159.5200002193451, - "p99": 177.76000499725342 + "p50": 86.30400151014328, + "p90": 99.5199978351593, + "p95": 101.08800232410431, + "p99": 104.99200224876404 }, "combine": { - "p50": 119.39200013875961, - "p90": 131.80799782276154, - "p95": 139.74399864673615, - "p99": 152.48000621795654 + "p50": 91.80799871683121, + "p90": 100.28800368309021, + "p95": 101.31199657917023, + "p99": 105.95200210809708 }, "roundtrip": { - "p50": 227.64800488948822, - "p90": 249.05599653720856, - "p95": 255.74401021003723, - "p99": 274.3679881095886 + "p50": 200.22399723529816, + "p90": 218.87999773025513, + "p95": 220.41599452495575, + "p99": 234.52800512313843 }, "isolatedSum": { - "p50": 253.28000634908676, - "p90": 278.9759933948517, - "p95": 299.26399886608124, - "p99": 330.24001121520996 + "p50": 178.1120002269745, + "p90": 199.8080015182495, + "p95": 202.39999890327454, + "p99": 210.94400435686111 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 1, "correct": true, "samplesPooled": 600, @@ -66090,34 +67864,34 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 176.54399573802948, - "p90": 188.4160041809082, - "p95": 203.07199656963348, - "p99": 299.8400032520294 + "p50": 105.34399747848511, + "p90": 117.60000139474869, + "p95": 119.35999989509583, + "p99": 123.36000055074692 }, "combine": { - "p50": 169.91999745368958, - "p90": 175.48799514770508, - "p95": 180.16000092029572, - "p99": 187.51999735832214 + "p50": 131.9359987974167, + "p90": 142.87999272346497, + "p95": 148.70400726795197, + "p99": 308.4479868412018 }, "roundtrip": { - "p50": 319.4560110569, - "p90": 328.7679851055145, - "p95": 336.32001280784607, - "p99": 355.0400137901306 + "p50": 299.9039888381958, + "p90": 312.5759959220886, + "p95": 314.65598940849304, + "p99": 318.7200129032135 }, "isolatedSum": { - "p50": 346.46399319171906, - "p90": 363.9039993286133, - "p95": 383.2319974899292, - "p99": 487.36000061035156 + "p50": 237.2799962759018, + "p90": 260.47999411821365, + "p95": 268.0640071630478, + "p99": 431.8079873919487 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, "stragglerRank": 1, "correct": true, "samplesPooled": 600, @@ -66127,35 +67901,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 269.567996263504, - "p90": 288.12798857688904, - "p95": 294.048011302948, - "p99": 315.3280019760132 + "p50": 139.3599957227707, + "p90": 143.0400013923645, + "p95": 144.51199769973755, + "p99": 150.94399452209473 }, "combine": { - "p50": 262.0159983634949, - "p90": 282.1120023727417, - "p95": 286.5920066833496, - "p99": 306.11199140548706 + "p50": 200.32000541687012, + "p90": 203.67999374866486, + "p95": 204.73599433898926, + "p99": 209.82399582862854 }, "roundtrip": { - "p50": 505.7920217514038, - "p90": 531.9039821624756, - "p95": 535.7760190963745, - "p99": 544.6720123291016 + "p50": 476.83200240135193, + "p90": 482.40000009536743, + "p95": 484.47999358177185, + "p99": 581.2479853630066 }, "isolatedSum": { - "p50": 531.5839946269989, - "p90": 570.2399909496307, - "p95": 580.6400179862976, - "p99": 621.4399933815002 + "p50": 339.6800011396408, + "p90": 346.71999514102936, + "p95": 349.2479920387268, + "p99": 360.76799035072327 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 4, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66164,35 +67938,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 443.87200474739075, - "p90": 459.55199003219604, - "p95": 467.74399280548096, - "p99": 487.199991941452 + "p50": 210.1760059595108, + "p90": 214.6880030632019, + "p95": 216.19200706481934, + "p99": 219.55199539661407 }, "combine": { - "p50": 427.64800786972046, - "p90": 442.81598925590515, - "p95": 451.58401131629944, - "p99": 483.13599824905396 + "p50": 324.95999336242676, + "p90": 330.1759958267212, + "p95": 332.2240114212036, + "p99": 353.4719944000244 }, "roundtrip": { - "p50": 844.7999954223633, - "p90": 860.0640296936035, - "p95": 867.0719861984253, - "p99": 924.67200756073 + "p50": 807.9040050506592, + "p90": 815.6800270080566, + "p95": 819.6160197257996, + "p99": 854.9759984016418 }, "isolatedSum": { - "p50": 871.5200126171112, - "p90": 902.3679792881012, - "p95": 919.3280041217804, - "p99": 970.335990190506 + "p50": 535.1359993219376, + "p90": 544.8639988899231, + "p95": 548.416018486023, + "p99": 573.0239897966385 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 4, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66201,35 +67975,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 806.1119914054871, - "p90": 823.7119913101196, - "p95": 832.4480056762695, - "p99": 892.3199772834778 + "p50": 352.60799527168274, + "p90": 362.5600039958954, + "p95": 365.85599184036255, + "p99": 394.9440121650696 }, "combine": { - "p50": 758.9120268821716, - "p90": 777.1199941635132, - "p95": 790.3040051460266, - "p99": 827.3919820785522 + "p50": 570.8479881286621, + "p90": 576.7679810523987, + "p95": 579.0719985961914, + "p99": 581.4719796180725 }, "roundtrip": { - "p50": 1534.5920324325562, - "p90": 1550.75204372406, - "p95": 1561.3759756088257, - "p99": 1597.9520082473755 + "p50": 1472.5439548492432, + "p90": 1483.680009841919, + "p95": 1486.4319562911987, + "p99": 1497.4080324172974 }, "isolatedSum": { - "p50": 1565.0240182876587, - "p90": 1600.8319854736328, - "p95": 1622.7520108222961, - "p99": 1719.71195936203 + "p50": 923.4559834003448, + "p90": 939.3279850482941, + "p95": 944.927990436554, + "p99": 976.4159917831421 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 4, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66238,35 +68012,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1534.7520112991333, - "p90": 1552.4159669876099, - "p95": 1570.9120035171509, - "p99": 1686.7519617080688 + "p50": 622.8160262107849, + "p90": 630.2400231361389, + "p95": 632.8639984130859, + "p99": 637.503981590271 }, "combine": { - "p50": 1415.2640104293823, - "p90": 1439.2000436782837, - "p95": 1449.120044708252, - "p99": 1643.1679725646973 + "p50": 1051.8399477005005, + "p90": 1060.4480504989624, + "p95": 1063.5839700698853, + "p99": 1077.728033065796 }, "roundtrip": { - "p50": 2922.528028488159, - "p90": 2943.743944168091, - "p95": 2957.535982131958, - "p99": 3040.5759811401367 + "p50": 2821.791887283325, + "p90": 2846.463918685913, + "p95": 2856.384038925171, + "p99": 2868.5760498046875 }, "isolatedSum": { - "p50": 2950.0160217285156, - "p90": 2991.6160106658936, - "p95": 3020.032048225403, - "p99": 3329.919934272766 + "p50": 1674.6559739112854, + "p90": 1690.6880736351013, + "p95": 1696.4479684829712, + "p99": 1715.232014656067 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 4, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66274,50 +68048,51 @@ ] }, { - "id": "cx-e63750d6", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", - "colorKey": "h200_0a93a01f", - "comparisonKey": "f4911d0a95d49c62", + "id": "cx-db3c52ad", + "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_97196257", + "comparisonKey": "d4fd66af6f4726f6", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:03.582434+00:00", + "generatedAt": "2026-06-26T23:52:17.424978+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf", + "label": "H100 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -66326,8 +68101,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -66335,45 +68110,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254452252", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", - "createdAt": "2026-06-26T17:29:31Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271710412", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271710412", + "createdAt": "2026-06-26T23:52:17.424978+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 120.86399644613266, - "p90": 133.53599607944489, - "p95": 138.5280042886734, - "p99": 154.01600301265717 + "p50": 208.8640034198761, + "p90": 215.68000316619873, + "p95": 218.75199675559998, + "p99": 469.56801414489746 }, "combine": { - "p50": 112.64000087976456, - "p90": 124.86399710178375, - "p95": 130.5599957704544, - "p99": 142.7839994430542 + "p50": 90.33600240945816, + "p90": 93.21600198745728, + "p95": 95.551997423172, + "p99": 98.1760025024414 }, "roundtrip": { - "p50": 213.47199380397797, - "p90": 229.72799837589264, - "p95": 238.68800699710846, - "p99": 280.8000147342682 + "p50": 286.72000765800476, + "p90": 290.75199365615845, + "p95": 293.0240035057068, + "p99": 295.52000761032104 }, "isolatedSum": { - "p50": 233.50399732589722, - "p90": 258.39999318122864, - "p95": 269.0880000591278, - "p99": 296.80000245571136 + "p50": 299.20000582933426, + "p90": 308.896005153656, + "p95": 314.303994178772, + "p99": 567.7440166473389 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66382,35 +68157,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 163.55200111865997, - "p90": 173.7920045852661, - "p95": 181.66400492191315, - "p99": 202.87999510765076 + "p50": 267.2320008277893, + "p90": 273.4079957008362, + "p95": 275.64799785614014, + "p99": 286.8480086326599 }, "combine": { - "p50": 156.54399991035461, - "p90": 170.9119975566864, - "p95": 178.20799350738525, - "p99": 194.62400674819946 + "p50": 127.23200023174286, + "p90": 130.40000200271606, + "p95": 131.52000308036804, + "p99": 134.0479999780655 }, "roundtrip": { - "p50": 297.1839904785156, - "p90": 314.65598940849304, - "p95": 321.02400064468384, - "p99": 352.28800773620605 + "p50": 387.3920142650604, + "p90": 392.2879993915558, + "p95": 394.9440121650696, + "p99": 403.328001499176 }, "isolatedSum": { - "p50": 320.0960010290146, - "p90": 344.7040021419525, - "p95": 359.8719984292984, - "p99": 397.5040018558502 + "p50": 394.46400105953217, + "p90": 403.80799770355225, + "p95": 407.1680009365082, + "p99": 420.8960086107254 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66419,35 +68194,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 237.56800591945648, - "p90": 246.72000110149384, - "p95": 252.44799256324768, - "p99": 262.2720003128052 + "p50": 377.6960074901581, + "p90": 423.93600940704346, + "p95": 426.4320135116577, + "p99": 432.6080083847046 }, "combine": { - "p50": 242.3039972782135, - "p90": 256.99201226234436, - "p95": 264.5759880542755, - "p99": 294.17601227760315 + "p50": 203.42400670051575, + "p90": 213.95200490951538, + "p95": 215.03999829292297, + "p99": 218.6879962682724 }, "roundtrip": { - "p50": 457.5679898262024, - "p90": 477.27999091148376, - "p95": 485.6959879398346, - "p99": 519.9679732322693 + "p50": 564.4479990005493, + "p90": 604.8960089683533, + "p95": 608.1920266151428, + "p99": 615.1360273361206 }, "isolatedSum": { - "p50": 479.87200319767, - "p90": 503.7120133638382, - "p95": 517.0239806175232, - "p99": 556.4480125904083 + "p50": 581.1200141906738, + "p90": 637.8880143165588, + "p95": 641.4720118045807, + "p99": 651.296004652977 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 4, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66456,35 +68231,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 384.0320110321045, - "p90": 394.9120044708252, - "p95": 400.2879858016968, - "p99": 411.77600622177124 + "p50": 593.504011631012, + "p90": 597.9200005531311, + "p95": 599.295973777771, + "p99": 620.9279894828796 }, "combine": { - "p50": 408.2239866256714, - "p90": 420.22401094436646, - "p95": 427.39200592041016, - "p99": 457.5679898262024 + "p50": 322.59199023246765, + "p90": 326.78401470184326, + "p95": 328.7999927997589, + "p99": 331.36001229286194 }, "roundtrip": { - "p50": 765.9199833869934, - "p90": 785.9519720077515, - "p95": 798.2079982757568, - "p99": 844.543993473053 + "p50": 899.1680145263672, + "p90": 904.416024684906, + "p95": 906.6240191459656, + "p99": 913.0560159683228 }, "isolatedSum": { - "p50": 792.2559976577759, - "p90": 815.1360154151917, - "p95": 827.6799917221069, - "p99": 869.3439960479736 + "p50": 916.0960018634796, + "p90": 924.7040152549744, + "p95": 928.0959665775299, + "p99": 952.2880017757416 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 4, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66493,35 +68268,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 669.6959733963013, - "p90": 682.3359727859497, - "p95": 689.0559792518616, - "p99": 731.8080067634583 + "p50": 1106.943964958191, + "p90": 1117.9519891738892, + "p95": 1120.7040548324585, + "p99": 1126.2719631195068 }, "combine": { - "p50": 727.1360158920288, - "p90": 740.4800057411194, - "p95": 746.783971786499, - "p99": 762.8480195999146 + "p50": 574.4640231132507, + "p90": 583.0720067024231, + "p95": 584.991991519928, + "p99": 590.719997882843 }, "roundtrip": { - "p50": 1366.0800457000732, - "p90": 1389.631986618042, - "p95": 1405.6639671325684, - "p99": 1561.8239641189575 + "p50": 1684.0640306472778, + "p90": 1699.5840072631836, + "p95": 1705.1520347595215, + "p99": 1751.9680261611938 }, "isolatedSum": { - "p50": 1396.83198928833, - "p90": 1422.815978527069, - "p95": 1435.8399510383606, - "p99": 1494.6560263633728 + "p50": 1681.4079880714417, + "p90": 1701.0239958763123, + "p95": 1705.6960463523865, + "p99": 1716.9919610023499 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 0, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66530,35 +68305,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1259.1999769210815, - "p90": 1273.1839418411255, - "p95": 1278.5600423812866, - "p99": 1390.463948249817 + "p50": 2105.151891708374, + "p90": 2114.687919616699, + "p95": 2120.1279163360596, + "p99": 2217.0560359954834 }, "combine": { - "p50": 1366.8160438537598, - "p90": 1383.2319974899292, - "p95": 1391.2960290908813, - "p99": 1428.5119771957397 + "p50": 1052.9279708862305, + "p90": 1061.0560178756714, + "p95": 1063.264012336731, + "p99": 1068.0320262908936 }, "roundtrip": { - "p50": 2598.0799198150635, - "p90": 2617.0880794525146, - "p95": 2628.2238960266113, - "p99": 2879.9679279327393 + "p50": 3201.6959190368652, + "p90": 3233.1199645996094, + "p95": 3240.8320903778076, + "p99": 3259.615898132324 }, "isolatedSum": { - "p50": 2626.0160207748413, - "p90": 2656.4159393310547, - "p95": 2669.856071472168, - "p99": 2818.9759254455566 + "p50": 3158.0798625946045, + "p90": 3175.7439374923706, + "p95": 3183.3919286727905, + "p99": 3285.088062286377 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66566,50 +68341,51 @@ ] }, { - "id": "cx-353049ec", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", - "colorKey": "h200_993777bf", - "comparisonKey": "cb74cc9ee6130bb2", + "id": "cx-bf310e7a", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_a96c99f3", + "comparisonKey": "3fc4c710187195cb", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:47:04.200207+00:00", + "generatedAt": "2026-06-27T10:13:01.422194+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -66618,53 +68394,53 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255303840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", - "createdAt": "2026-06-26T17:45:35Z", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28286086353", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286086353", + "createdAt": "2026-06-27T10:13:01.422194+00:00", + "sha": "76a3032d20288ee17220eb6099346f74d56ce005" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 117.40799993276596, - "p90": 132.54399597644806, - "p95": 140.06400108337402, - "p99": 154.27200496196747 + "p50": 89.53599631786346, + "p90": 94.30400282144547, + "p95": 95.87199985980988, + "p99": 98.7199991941452 }, "combine": { - "p50": 104.3199971318245, - "p90": 118.04799735546112, - "p95": 123.99999797344208, - "p99": 158.75199437141418 + "p50": 100.51199793815613, + "p90": 103.35999727249146, + "p95": 104.96000200510025, + "p99": 108.73600095510483 }, "roundtrip": { - "p50": 193.9840018749237, - "p90": 207.68000185489655, - "p95": 215.61600267887115, - "p99": 244.6720004081726 + "p50": 218.07999908924103, + "p90": 221.69600427150726, + "p95": 222.78399765491486, + "p99": 227.64800488948822 }, "isolatedSum": { - "p50": 221.72799706459045, - "p90": 250.59199333190918, - "p95": 264.0639990568161, - "p99": 313.02399933338165 + "p50": 190.0479942560196, + "p90": 197.66400009393692, + "p95": 200.83200186491013, + "p99": 207.45600014925003 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -66674,35 +68450,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 146.68799936771393, - "p90": 160.64000129699707, - "p95": 169.855996966362, - "p99": 192.06400215625763 + "p50": 108.73600095510483, + "p90": 128.83199751377106, + "p95": 132.64000415802002, + "p99": 136.83199882507324 }, "combine": { - "p50": 142.91200041770935, - "p90": 152.0320028066635, - "p95": 157.98400342464447, - "p99": 178.0479997396469 + "p50": 144.76799964904785, + "p90": 153.08800339698792, + "p95": 157.6319932937622, + "p99": 160.7999950647354 }, "roundtrip": { - "p50": 266.1440074443817, - "p90": 278.7199914455414, - "p95": 285.6000065803528, - "p99": 310.43198704719543 + "p50": 332.96000957489014, + "p90": 344.60800886154175, + "p95": 350.271999835968, + "p99": 356.86400532722473 }, "isolatedSum": { - "p50": 289.5999997854233, - "p90": 312.6720041036606, - "p95": 327.84000039100647, - "p99": 370.11200189590454 + "p50": 253.50400060415268, + "p90": 281.920000910759, + "p95": 290.2719974517822, + "p99": 297.63199388980865 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 5, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66711,35 +68487,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 208.12800526618958, - "p90": 229.8559993505478, - "p95": 237.34399676322937, - "p99": 272.5760042667389 + "p50": 144.22400295734406, + "p90": 159.29600596427917, + "p95": 160.863995552063, + "p99": 166.6879951953888 }, "combine": { - "p50": 210.62399446964264, - "p90": 222.75200486183167, - "p95": 228.99200022220612, - "p99": 251.45599246025085 + "p50": 224.95999932289124, + "p90": 231.26399517059326, + "p95": 233.18399488925934, + "p99": 236.12800240516663 }, "roundtrip": { - "p50": 391.4879858493805, - "p90": 413.05598616600037, - "p95": 424.54400658607483, - "p99": 474.047988653183 + "p50": 525.2799987792969, + "p90": 531.4559936523438, + "p95": 534.6879959106445, + "p99": 546.2719798088074 }, "isolatedSum": { - "p50": 418.7519997358322, - "p90": 452.60800421237946, - "p95": 466.3359969854355, - "p99": 524.0319967269897 + "p50": 369.1840022802353, + "p90": 390.56000113487244, + "p95": 394.0479904413223, + "p99": 402.8159976005554 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 4, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66748,35 +68524,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 322.7840065956116, - "p90": 342.78398752212524, - "p95": 351.6800105571747, - "p99": 378.2399892807007 + "p50": 213.1199985742569, + "p90": 218.9760059118271, + "p95": 226.3679951429367, + "p99": 239.42400515079498 }, "combine": { - "p50": 330.1439881324768, - "p90": 345.0239896774292, - "p95": 349.8559892177582, - "p99": 379.13599610328674 + "p50": 360.8640134334564, + "p90": 365.79200625419617, + "p95": 367.3279881477356, + "p99": 371.7760145664215 }, "roundtrip": { - "p50": 626.2080073356628, - "p90": 646.8480229377747, - "p95": 661.1520051956177, - "p99": 823.4559893608093 + "p50": 894.208014011383, + "p90": 899.7120261192322, + "p95": 901.6320109367371, + "p99": 904.8320055007935 }, "isolatedSum": { - "p50": 652.9279947280884, - "p90": 687.8079771995544, - "p95": 701.5359997749329, - "p99": 757.3759853839874 + "p50": 573.9840120077133, + "p90": 584.7680121660233, + "p95": 593.6959832906723, + "p99": 611.2000197172165 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 5, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66785,35 +68561,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 569.7280168533325, - "p90": 585.7920050621033, - "p95": 596.2240099906921, - "p99": 690.7520294189453 + "p50": 348.54400157928467, + "p90": 367.39200353622437, + "p95": 369.7600066661835, + "p99": 378.6559998989105 }, "combine": { - "p50": 569.1199898719788, - "p90": 583.1040143966675, - "p95": 591.0400152206421, - "p99": 609.503984451294 + "p50": 634.4000101089478, + "p90": 643.0079936981201, + "p95": 645.7599997520447, + "p99": 650.0160098075867 }, "roundtrip": { - "p50": 1109.8560094833374, - "p90": 1127.8719902038574, - "p95": 1138.335943222046, - "p99": 1191.648006439209 + "p50": 1619.871973991394, + "p90": 1633.5680484771729, + "p95": 1636.3840103149414, + "p99": 1644.09601688385 }, "isolatedSum": { - "p50": 1138.8480067253113, - "p90": 1168.8960194587708, - "p95": 1187.2640252113342, - "p99": 1300.2560138702393 + "p50": 982.9440116882324, + "p90": 1010.3999972343445, + "p95": 1015.5200064182281, + "p99": 1028.6720097064972 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 0, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66822,35 +68598,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1082.5920104980469, - "p90": 1103.16801071167, - "p95": 1116.927981376648, - "p99": 1311.8400573730469 + "p50": 631.3279867172241, + "p90": 641.6320204734802, + "p95": 644.9919939041138, + "p99": 654.4640064239502 }, "combine": { - "p50": 1018.3039903640747, - "p90": 1032.4480533599854, - "p95": 1047.5200414657593, - "p99": 1417.472004890442 + "p50": 1156.607985496521, + "p90": 1167.0080423355103, + "p95": 1169.7280406951904, + "p99": 1179.3279647827148 }, "roundtrip": { - "p50": 2072.60799407959, - "p90": 2096.7679023742676, - "p95": 2112.7359867095947, - "p99": 2388.000011444092 + "p50": 3077.791929244995, + "p90": 3088.8640880584717, + "p95": 3093.4720039367676, + "p99": 3101.408004760742 }, "isolatedSum": { - "p50": 2100.8960008621216, - "p90": 2135.6160640716553, - "p95": 2164.448022842407, - "p99": 2729.3120622634888 + "p50": 1787.9359722137451, + "p90": 1808.6400628089905, + "p95": 1814.7200345993042, + "p99": 1833.791971206665 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 4, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66858,28 +68634,29 @@ ] }, { - "id": "cx-5c3f9114", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_edd92e38", - "comparisonKey": "696a49bd5b0de953", + "id": "cx-9440251a", + "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_97196257", + "comparisonKey": "2b50b361430bc4f6", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:13.181201+00:00", + "generatedAt": "2026-06-26T23:48:40.278594+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) [cl]", + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -66889,19 +68666,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", @@ -66919,45 +68696,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254409438", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", - "createdAt": "2026-06-26T17:28:41Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28271583505", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271583505", + "createdAt": "2026-06-26T23:48:40.278594+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 104.032002389431, - "p90": 116.12799763679504, - "p95": 120.83200365304947, - "p99": 131.00799918174744 + "p50": 211.2639993429184, + "p90": 217.28000044822693, + "p95": 219.32800114154816, + "p99": 226.78400576114655 }, "combine": { - "p50": 103.07200253009796, - "p90": 115.167997777462, - "p95": 120.95999717712402, - "p99": 125.76000392436981 + "p50": 97.15200215578079, + "p90": 100.41599720716476, + "p95": 102.27199643850327, + "p99": 105.59999942779541 }, "roundtrip": { - "p50": 182.23999440670013, - "p90": 196.48000597953796, - "p95": 200.095996260643, - "p99": 249.7600018978119 + "p50": 296.640008687973, + "p90": 303.26399207115173, + "p95": 305.82401156425476, + "p99": 313.9199912548065 }, "isolatedSum": { - "p50": 207.10400491952896, - "p90": 231.29599541425705, - "p95": 241.7920008301735, - "p99": 256.76800310611725 + "p50": 308.4160014986992, + "p90": 317.6959976553917, + "p95": 321.5999975800514, + "p99": 332.38400518894196 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -66966,31 +68743,31 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 136.03200018405914, - "p90": 151.96800231933594, - "p95": 158.4639996290207, - "p99": 170.68800330162048 + "p50": 281.98400139808655, + "p90": 324.6400058269501, + "p95": 327.7760148048401, + "p99": 332.99198746681213 }, "combine": { - "p50": 142.59199798107147, - "p90": 157.53600001335144, - "p95": 161.18399798870087, - "p99": 179.6800047159195 + "p50": 141.53599739074707, + "p90": 147.87200093269348, + "p95": 149.9519944190979, + "p99": 152.70400047302246 }, "roundtrip": { - "p50": 252.8960108757019, - "p90": 265.28000831604004, - "p95": 271.232008934021, - "p99": 293.4400141239166 + "p50": 409.7920060157776, + "p90": 415.45599699020386, + "p95": 417.7280068397522, + "p99": 423.39199781417847 }, "isolatedSum": { - "p50": 278.6239981651306, - "p90": 309.5040023326874, - "p95": 319.64799761772156, - "p99": 350.36800801754 + "p50": 423.5199987888336, + "p90": 472.51200675964355, + "p95": 477.728009223938, + "p99": 485.6959879398346 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, + "dispatchLogicalBytes": 77944832, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, @@ -67003,35 +68780,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 194.87999379634857, - "p90": 210.33599972724915, - "p95": 215.87200462818146, - "p99": 243.9039945602417 + "p50": 402.24000811576843, + "p90": 444.64001059532166, + "p95": 447.3919868469238, + "p99": 454.1440010070801 }, "combine": { - "p50": 208.064004778862, - "p90": 222.04799950122833, - "p95": 230.14399409294128, - "p99": 255.42399287223816 + "p50": 224.16000068187714, + "p90": 233.0559939146042, + "p95": 235.23199558258057, + "p99": 239.29600417613983 }, "roundtrip": { - "p50": 378.84798645973206, - "p90": 394.9120044708252, - "p95": 405.5039882659912, - "p99": 434.27199125289917 + "p50": 613.9839887619019, + "p90": 657.7600240707397, + "p95": 661.9200110435486, + "p99": 734.7840070724487 }, "isolatedSum": { - "p50": 402.94399857521057, - "p90": 432.3839992284775, - "p95": 446.01599872112274, - "p99": 499.32798743247986 + "p50": 626.4000087976456, + "p90": 677.6960045099258, + "p95": 682.6239824295044, + "p99": 693.4400051832199 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, + "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 6, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67040,35 +68817,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 312.99200654029846, - "p90": 334.1119885444641, - "p95": 342.9119884967804, - "p99": 389.15199041366577 + "p50": 664.7359728813171, + "p90": 669.5680022239685, + "p95": 671.1360216140747, + "p99": 674.7519969940186 }, "combine": { - "p50": 326.1120021343231, - "p90": 339.35999870300293, - "p95": 347.3280072212219, - "p99": 393.0560052394867 + "p50": 358.0799996852875, + "p90": 363.23198676109314, + "p95": 364.47998881340027, + "p99": 369.9199855327606 }, "roundtrip": { - "p50": 614.0159964561462, - "p90": 628.4800171852112, - "p95": 635.7759833335876, - "p99": 708.4479928016663 + "p50": 1005.2160024642944, + "p90": 1010.2720260620117, + "p95": 1012.287974357605, + "p99": 1019.9999809265137 }, "isolatedSum": { - "p50": 639.1040086746216, - "p90": 673.471987247467, - "p95": 690.2399957180023, - "p99": 782.2079956531525 + "p50": 1022.8159725666046, + "p90": 1032.7999889850616, + "p95": 1035.616010427475, + "p99": 1044.6719825267792 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, + "dispatchLogicalBytes": 311721984, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67077,35 +68854,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 549.3760108947754, - "p90": 563.264012336731, - "p95": 569.2480206489563, - "p99": 593.1519865989685 + "p50": 1237.8560304641724, + "p90": 1245.919942855835, + "p95": 1249.0559816360474, + "p99": 1253.6319494247437 }, "combine": { - "p50": 560.8000159263611, - "p90": 573.2799768447876, - "p95": 579.8400044441223, - "p99": 591.871976852417 + "p50": 632.0639848709106, + "p90": 639.0720009803772, + "p95": 641.5359973907471, + "p99": 646.9119787216187 }, "roundtrip": { - "p50": 1080.9600353240967, - "p90": 1097.5359678268433, - "p95": 1106.0800552368164, - "p99": 1136.512041091919 + "p50": 1845.0239896774292, + "p90": 1854.3039560317993, + "p95": 1857.983946800232, + "p99": 1862.720012664795 }, "isolatedSum": { - "p50": 1110.1760268211365, - "p90": 1136.5439891815186, - "p95": 1149.0880250930786, - "p99": 1185.0239634513855 + "p50": 1869.920015335083, + "p90": 1884.9919438362122, + "p95": 1890.5919790267944, + "p99": 1900.5439281463623 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, + "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67114,31 +68891,31 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1017.7919864654541, - "p90": 1032.1600437164307, - "p95": 1039.6480560302734, - "p99": 1061.1519813537598 + "p50": 2360.80002784729, + "p90": 2371.2639808654785, + "p95": 2375.3280639648438, + "p99": 2383.3279609680176 }, "combine": { - "p50": 1013.0879878997803, - "p90": 1025.823950767517, - "p95": 1031.775951385498, - "p99": 1097.7599620819092 + "p50": 1150.8159637451172, + "p90": 1160.032033920288, + "p95": 1162.9120111465454, + "p99": 1171.6159582138062 }, "roundtrip": { - "p50": 2001.5358924865723, - "p90": 2015.7439708709717, - "p95": 2029.7598838806152, - "p99": 2119.1039085388184 + "p50": 3508.7039470672607, + "p90": 3525.631904602051, + "p95": 3531.615972518921, + "p99": 3547.4560260772705 }, "isolatedSum": { - "p50": 2030.8799743652344, - "p90": 2057.9839944839478, - "p95": 2071.4240074157715, - "p99": 2158.911943435669 + "p50": 3511.615991592407, + "p90": 3531.2960147857666, + "p95": 3538.240075111389, + "p99": 3554.9439191818237 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, + "dispatchLogicalBytes": 1243504640, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, @@ -67150,38 +68927,39 @@ ] }, { - "id": "cx-e1047fdc", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_76bb7d5d", - "comparisonKey": "174936235ac15d2c", + "id": "cx-4bac404d", + "identity": "h100|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_a96c99f3", + "comparisonKey": "773edc302de99204", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:44.261568+00:00", + "generatedAt": "2026-06-27T11:16:09.188835+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · bf16 [cl]", + "label": "H100 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", "activationProfile": "normal", "combineQuantMode": "none" }, @@ -67202,8 +68980,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -67211,45 +68989,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271611947", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271611947", - "createdAt": "2026-06-26T23:48:13Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287500362", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287500362", + "createdAt": "2026-06-27T11:16:09.188835+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 104.3199971318245, - "p90": 121.50400131940842, - "p95": 125.50400197505951, - "p99": 141.76000654697418 + "p50": 89.9839997291565, + "p90": 93.9520001411438, + "p95": 96.44799679517746, + "p99": 158.87999534606934 }, "combine": { - "p50": 104.032002389431, - "p90": 119.71200257539749, - "p95": 123.96799772977829, - "p99": 145.4080045223236 + "p50": 98.04800152778625, + "p90": 100.5759984254837, + "p95": 102.11200267076492, + "p99": 104.67199981212616 }, "roundtrip": { - "p50": 184.4799965620041, - "p90": 197.24799692630768, - "p95": 202.11200416088104, - "p99": 221.91999852657318 + "p50": 217.75999665260315, + "p90": 221.66399657726288, + "p95": 223.00800681114197, + "p99": 227.7120053768158 }, "isolatedSum": { - "p50": 208.3519995212555, - "p90": 241.2160038948059, - "p95": 249.4719997048378, - "p99": 287.1680110692978 + "p50": 188.03200125694275, + "p90": 194.5279985666275, + "p95": 198.55999946594238, + "p99": 263.5519951581955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 2, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67258,35 +69036,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 134.49600338935852, - "p90": 149.59999918937683, - "p95": 156.63999319076538, - "p99": 199.0080028772354 + "p50": 107.4879989027977, + "p90": 111.42399907112122, + "p95": 114.59200084209442, + "p99": 170.30400037765503 }, "combine": { - "p50": 143.71199905872345, - "p90": 156.51200711727142, - "p95": 161.6639941930771, - "p99": 174.14399981498718 + "p50": 143.39199662208557, + "p90": 146.4959979057312, + "p95": 147.8399932384491, + "p99": 150.56000649929047 }, "roundtrip": { - "p50": 254.88001108169556, - "p90": 277.50399708747864, - "p95": 284.09600257873535, - "p99": 315.20000100135803 + "p50": 329.24801111221313, + "p90": 333.5680067539215, + "p95": 335.32801270484924, + "p99": 338.7199938297272 }, "isolatedSum": { - "p50": 278.20800244808197, - "p90": 306.11200630664825, - "p95": 318.30398738384247, - "p99": 373.1520026922226 + "p50": 250.87999552488327, + "p90": 257.9199969768524, + "p95": 262.4319940805435, + "p99": 320.8640068769455 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67295,35 +69073,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 192.89599359035492, - "p90": 207.39200711250305, - "p95": 213.53599429130554, - "p99": 229.8240065574646 + "p50": 146.97599411010742, + "p90": 158.9439958333969, + "p95": 163.39200735092163, + "p99": 460.86400747299194 }, "combine": { - "p50": 222.88000583648682, - "p90": 239.77600038051605, - "p95": 244.06400322914124, - "p99": 276.16000175476074 + "p50": 223.80800545215607, + "p90": 229.40799593925476, + "p95": 230.81600666046143, + "p99": 233.40800404548645 }, "roundtrip": { - "p50": 388.51198554039, - "p90": 405.08800745010376, - "p95": 412.6400053501129, - "p99": 470.43201327323914 + "p50": 523.967981338501, + "p90": 529.1839838027954, + "p95": 530.6879878044128, + "p99": 534.6559882164001 }, "isolatedSum": { - "p50": 415.77599942684174, - "p90": 447.1680074930191, - "p95": 457.5999975204468, - "p99": 505.98400831222534 + "p50": 370.7839995622635, + "p90": 388.3519917726517, + "p95": 394.20801401138306, + "p99": 694.2720115184784 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67332,35 +69110,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 304.32000756263733, - "p90": 328.2560110092163, - "p95": 334.6239924430847, - "p99": 354.8159897327423 + "p50": 215.32799303531647, + "p90": 219.55199539661407, + "p95": 220.7999974489212, + "p99": 223.51999580860138 }, "combine": { - "p50": 352.35199332237244, - "p90": 364.1279935836792, - "p95": 372.44799733161926, - "p99": 391.80800318717957 + "p50": 361.11998558044434, + "p90": 366.5280044078827, + "p95": 367.8719997406006, + "p99": 371.7760145664215 }, "roundtrip": { - "p50": 630.1760077476501, - "p90": 646.7840075492859, - "p95": 655.135989189148, - "p99": 679.5520186424255 + "p50": 895.6800103187561, + "p90": 901.2479782104492, + "p95": 903.1360149383545, + "p99": 906.8480134010315 }, "isolatedSum": { - "p50": 656.6720008850098, - "p90": 692.3840045928955, - "p95": 707.071989774704, - "p99": 746.6239929199219 + "p50": 576.4479786157608, + "p90": 586.0799998044968, + "p95": 588.6719971895218, + "p99": 595.2960103750229 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67369,35 +69147,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 515.2000188827515, - "p90": 530.0800204277039, - "p95": 538.9119982719421, - "p99": 611.7119789123535 + "p50": 354.8479974269867, + "p90": 371.8079924583435, + "p95": 374.2400109767914, + "p99": 380.2880048751831 }, "combine": { - "p50": 611.2319827079773, - "p90": 623.5520243644714, - "p95": 633.2160234451294, - "p99": 764.1919851303101 + "p50": 633.0239772796631, + "p90": 642.304003238678, + "p95": 644.927978515625, + "p99": 650.7840156555176 }, "roundtrip": { - "p50": 1099.4880199432373, - "p90": 1118.4959411621094, - "p95": 1131.1999559402466, - "p99": 1154.2079448699951 + "p50": 1625.4080533981323, + "p90": 1642.7520513534546, + "p95": 1652.6720523834229, + "p99": 1691.4559602737427 }, "isolatedSum": { - "p50": 1126.4320015907288, - "p90": 1153.6320447921753, - "p95": 1172.1280217170715, - "p99": 1375.9039640426636 + "p50": 987.8719747066498, + "p90": 1014.1119956970215, + "p95": 1019.1679894924164, + "p99": 1031.0720205307007 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67406,35 +69184,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 964.959979057312, - "p90": 992.2879934310913, - "p95": 1000.3199577331543, - "p99": 1034.4959497451782 + "p50": 641.3760185241699, + "p90": 647.5200057029724, + "p95": 649.5040059089661, + "p99": 655.4880142211914 }, "combine": { - "p50": 1105.7920455932617, - "p90": 1125.1840591430664, - "p95": 1137.5679969787598, - "p99": 1247.26402759552 + "p50": 1148.4800577163696, + "p90": 1157.5679779052734, + "p95": 1160.2239608764648, + "p99": 1163.807988166809 }, "roundtrip": { - "p50": 2036.895990371704, - "p90": 2068.3839321136475, - "p95": 2084.383964538574, - "p99": 2168.4799194335938 + "p50": 3082.240104675293, + "p90": 3094.0160751342773, + "p95": 3098.112106323242, + "p99": 3109.055995941162 }, "isolatedSum": { - "p50": 2070.7520246505737, - "p90": 2117.4720525741577, - "p95": 2137.887954711914, - "p99": 2281.7599773406982 + "p50": 1789.8560762405396, + "p90": 1805.0879836082458, + "p95": 1809.727966785431, + "p99": 1819.2960023880005 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67442,16 +69220,16 @@ ] }, { - "id": "cx-26de8d70", - "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h200_87683f6c", - "comparisonKey": "b7adcc489d58bf89", + "id": "cx-0ee3ca7d", + "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_97196257", + "comparisonKey": "7f26f72cd9fff78c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:37.273038+00:00", + "generatedAt": "2026-06-26T23:50:56.826066+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -67459,15 +69237,16 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -67494,8 +69273,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -67503,9 +69282,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271739849", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271739849", - "createdAt": "2026-06-26T23:52:08Z", + "id": "28271663775", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271663775", + "createdAt": "2026-06-26T23:50:56.826066+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -67513,35 +69292,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 233.2800030708313, - "p90": 296.25600576400757, - "p95": 315.45600295066833, - "p99": 387.84000277519226 - }, - "combine": { - "p50": 74.72000271081924, - "p90": 92.96000003814697, - "p95": 97.98400104045868, - "p99": 124.86399710178375 + "p50": 211.93599700927734, + "p90": 218.33600103855133, + "p95": 220.5120027065277, + "p99": 225.055992603302 + }, + "combine": { + "p50": 97.59999811649323, + "p90": 100.0640019774437, + "p95": 101.85600072145462, + "p99": 104.5759990811348 }, "roundtrip": { - "p50": 278.9759933948517, - "p90": 337.44001388549805, - "p95": 363.5840117931366, - "p99": 408.9600145816803 + "p50": 297.91998863220215, + "p90": 303.9360046386719, + "p95": 306.5600097179413, + "p99": 328.000009059906 }, "isolatedSum": { - "p50": 308.00000578165054, - "p90": 389.21600580215454, - "p95": 413.440003991127, - "p99": 512.703999876976 + "p50": 309.53599512577057, + "p90": 318.400003015995, + "p95": 322.36800342798233, + "p99": 329.6319916844368 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 6, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67550,35 +69329,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 240.28800427913666, - "p90": 292.03200340270996, - "p95": 306.97599053382874, - "p99": 329.5679986476898 + "p50": 278.3359885215759, + "p90": 284.5759987831116, + "p95": 285.8560085296631, + "p99": 292.03200340270996 }, "combine": { - "p50": 98.30400347709656, - "p90": 115.07199704647064, - "p95": 119.00799721479416, - "p99": 131.9359987974167 + "p50": 141.88799262046814, + "p90": 145.1520025730133, + "p95": 146.88000082969666, + "p99": 151.39199793338776 }, "roundtrip": { - "p50": 325.408011674881, - "p90": 376.67199969291687, - "p95": 392.8639888763428, - "p99": 439.520001411438 + "p50": 404.4800102710724, + "p90": 410.7840061187744, + "p95": 413.9519929885864, + "p99": 420.51199078559875 }, "isolatedSum": { - "p50": 338.5920077562332, - "p90": 407.1040004491806, - "p95": 425.9839877486229, - "p99": 461.5039974451065 + "p50": 420.22398114204407, + "p90": 429.7280013561249, + "p95": 432.73600935935974, + "p99": 443.4240013360977 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 6, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67587,35 +69366,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 315.45600295066833, - "p90": 357.08799958229065, - "p95": 369.9199855327606, - "p99": 407.039999961853 + "p50": 397.2800076007843, + "p90": 402.8480052947998, + "p95": 405.44000267982483, + "p99": 410.71999073028564 }, "combine": { - "p50": 147.45600521564484, - "p90": 164.67200219631195, - "p95": 168.16000640392303, - "p99": 182.52800405025482 + "p50": 221.02400660514832, + "p90": 225.0880002975464, + "p95": 226.01599991321564, + "p99": 229.50400412082672 }, "roundtrip": { - "p50": 460.4479968547821, - "p90": 508.575975894928, - "p95": 523.360013961792, - "p99": 576.0959982872009 + "p50": 601.4400124549866, + "p90": 608.1600189208984, + "p95": 610.4000210762024, + "p99": 616.8000102043152 }, "isolatedSum": { - "p50": 462.91200816631317, - "p90": 521.7600017786026, - "p95": 538.0799919366837, - "p99": 589.5680040121078 + "p50": 618.3040142059326, + "p90": 627.9360055923462, + "p95": 631.4560025930405, + "p99": 640.2239948511124 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 0, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67624,35 +69403,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 458.2720100879669, - "p90": 501.5680193901062, - "p95": 517.632007598877, - "p99": 562.1119737625122 + "p50": 659.5199704170227, + "p90": 663.2959842681885, + "p95": 665.0239825248718, + "p99": 667.2319769859314 }, "combine": { - "p50": 241.2160038948059, - "p90": 252.06398963928223, - "p95": 257.34400749206543, - "p99": 279.83999252319336 + "p50": 360.22400856018066, + "p90": 364.9280071258545, + "p95": 366.3040101528168, + "p99": 369.85599994659424 }, "roundtrip": { - "p50": 681.9199919700623, - "p90": 713.4079933166504, - "p95": 728.8320064544678, - "p99": 805.8239817619324 + "p50": 1002.9439926147461, + "p90": 1008.3839893341064, + "p95": 1010.0159645080566, + "p99": 1013.856053352356 }, "isolatedSum": { - "p50": 699.4880139827728, - "p90": 753.6320090293884, - "p95": 774.9760150909424, - "p99": 841.9519662857056 + "p50": 1019.7439789772034, + "p90": 1028.223991394043, + "p95": 1031.3279926776886, + "p99": 1037.0879769325256 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 6, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67661,35 +69440,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 734.112024307251, - "p90": 769.8879837989807, - "p95": 783.7439775466919, - "p99": 899.9680280685425 + "p50": 1226.9760370254517, + "p90": 1235.1679801940918, + "p95": 1236.8320226669312, + "p99": 1242.143988609314 }, "combine": { - "p50": 410.17600893974304, - "p90": 422.4640130996704, - "p95": 427.64800786972046, - "p99": 457.72799849510193 + "p50": 624.5120167732239, + "p90": 631.8399906158447, + "p95": 634.1120004653931, + "p99": 675.8400201797485 }, "roundtrip": { - "p50": 1137.4399662017822, - "p90": 1176.416039466858, - "p95": 1203.328013420105, - "p99": 1318.8159465789795 + "p50": 1831.455945968628, + "p90": 1840.831995010376, + "p95": 1843.775987625122, + "p99": 1848.2880592346191 }, "isolatedSum": { - "p50": 1144.288033246994, - "p90": 1192.3519968986511, - "p95": 1211.3919854164124, - "p99": 1357.6960265636444 + "p50": 1851.4880537986755, + "p90": 1867.0079708099365, + "p95": 1870.9440231323242, + "p99": 1917.9840087890625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 6, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67698,35 +69477,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1373.792052268982, - "p90": 1396.7679738998413, - "p95": 1406.9440364837646, - "p99": 1577.5359869003296 + "p50": 2344.1600799560547, + "p90": 2350.719928741455, + "p95": 2352.9601097106934, + "p99": 2358.0799102783203 }, "combine": { - "p50": 750.3679990768433, - "p90": 762.6879811286926, - "p95": 770.3359723091125, - "p99": 788.0319952964783 + "p50": 1141.4719820022583, + "p90": 1150.9439945220947, + "p95": 1153.7920236587524, + "p99": 1162.592053413391 }, "roundtrip": { - "p50": 2134.335994720459, - "p90": 2161.439895629883, - "p95": 2178.2400608062744, - "p99": 2561.3439083099365 + "p50": 3469.856023788452, + "p90": 3481.6958904266357, + "p95": 3484.3521118164062, + "p99": 3490.528106689453 }, "isolatedSum": { - "p50": 2124.160051345825, - "p90": 2159.455955028534, - "p95": 2177.280008792877, - "p99": 2365.567982196808 + "p50": 3485.632061958313, + "p90": 3501.66392326355, + "p95": 3506.752133369446, + "p99": 3520.6719636917114 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 6, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67734,32 +69513,33 @@ ] }, { - "id": "cx-2e0e49b4", - "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h200_87683f6c", - "comparisonKey": "dcdf4b262ed1d48f", + "id": "cx-560e55e7", + "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|dc27c5e0894e569", + "colorKey": "h100_7f10961a", + "comparisonKey": "6a3a9660e48371b3", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:08.323229+00:00", + "generatedAt": "2026-06-27T00:45:34.307375+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8 (norm)", + "model": "Qwen3.5", "shape": { - "hidden": 5120, + "hidden": 4096, "topk": 8, - "experts": 160, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, @@ -67770,14 +69550,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -67786,8 +69566,8 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -67795,9 +69575,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271755854", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271755854", - "createdAt": "2026-06-26T23:52:36Z", + "id": "28273218274", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273218274", + "createdAt": "2026-06-27T00:45:34.307375+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -67805,35 +69585,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 219.84000504016876, - "p90": 274.01599287986755, - "p95": 289.5039916038513, - "p99": 343.77598762512207 + "p50": 196.79999351501465, + "p90": 203.80799472332, + "p95": 205.79199492931366, + "p99": 214.11199867725372 }, "combine": { - "p50": 81.08799904584885, - "p90": 91.90399944782257, - "p95": 99.55199807882309, - "p99": 105.79200088977814 + "p50": 75.71200281381607, + "p90": 78.5600021481514, + "p95": 80.54400235414505, + "p99": 84.6719965338707 }, "roundtrip": { - "p50": 288.57600688934326, - "p90": 340.2239978313446, - "p95": 353.95199060440063, - "p99": 388.0319893360138 + "p50": 255.64798712730408, + "p90": 264.41600918769836, + "p95": 274.1119861602783, + "p99": 321.9519853591919 }, "isolatedSum": { - "p50": 300.9280040860176, - "p90": 365.9199923276901, - "p95": 389.0559896826744, - "p99": 449.5679885149002 + "p50": 272.5119963288307, + "p90": 282.3679968714714, + "p95": 286.3359972834587, + "p99": 298.7839952111244 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, "recvTokensMax": 699, - "stragglerRank": 4, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67842,34 +69622,34 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 254.62400913238525, - "p90": 299.74400997161865, - "p95": 313.2160007953644, - "p99": 335.6480002403259 + "p50": 231.26399517059326, + "p90": 269.6639895439148, + "p95": 272.19200134277344, + "p99": 278.01600098609924 }, "combine": { - "p50": 112.60800063610077, - "p90": 124.57600235939026, - "p95": 128.31999361515045, - "p99": 137.472003698349 + "p50": 100.99200159311295, + "p90": 109.82400178909302, + "p95": 110.81600189208984, + "p99": 113.3119985461235 }, "roundtrip": { - "p50": 357.88801312446594, - "p90": 402.78398990631104, - "p95": 418.7839925289154, - "p99": 468.3839976787567 + "p50": 315.8720135688782, + "p90": 327.39201188087463, + "p95": 355.679988861084, + "p99": 369.53601241111755 }, "isolatedSum": { - "p50": 367.232009768486, - "p90": 424.3200123310089, - "p95": 441.53599441051483, - "p99": 473.1200039386749 + "p50": 332.2559967637062, + "p90": 379.4879913330078, + "p95": 383.0080032348633, + "p99": 391.32799953222275 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55552000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -67879,35 +69659,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 338.17601203918457, - "p90": 376.48001313209534, - "p95": 391.80800318717957, - "p99": 431.71200156211853 + "p50": 309.59999561309814, + "p90": 352.1279990673065, + "p95": 355.00800609588623, + "p99": 361.1519932746887 }, "combine": { - "p50": 170.43200135231018, - "p90": 182.8480064868927, - "p95": 187.77599930763245, - "p99": 198.46400618553162 + "p50": 147.90399372577667, + "p90": 156.99200332164764, + "p95": 158.24000537395477, + "p99": 162.08000481128693 }, "roundtrip": { - "p50": 509.5679759979248, - "p90": 558.2079887390137, - "p95": 577.6960253715515, - "p99": 617.7600026130676 + "p50": 442.4000084400177, + "p90": 483.3280146121979, + "p95": 487.8399968147278, + "p99": 518.4000134468079 }, "isolatedSum": { - "p50": 508.60801339149475, - "p90": 559.328019618988, - "p95": 579.584002494812, - "p99": 630.1760077476501 + "p50": 457.5039893388748, + "p90": 509.12000238895416, + "p95": 513.248011469841, + "p99": 523.2319980859756 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 111549440, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 5, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67916,35 +69696,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 505.0879716873169, - "p90": 540.7040119171143, - "p95": 552.6720285415649, - "p99": 595.1679944992065 + "p50": 453.98399233818054, + "p90": 459.3600034713745, + "p95": 461.88798546791077, + "p99": 466.623991727829 }, "combine": { - "p50": 273.75999093055725, - "p90": 285.66399216651917, - "p95": 291.4240062236786, - "p99": 313.05599212646484 + "p50": 235.29599606990814, + "p90": 239.00799453258514, + "p95": 240.51199853420258, + "p99": 242.46400594711304 }, "roundtrip": { - "p50": 780.2879810333252, - "p90": 834.7839713096619, - "p95": 867.3920035362244, - "p99": 1058.9760541915894 + "p50": 673.3120083808899, + "p90": 678.8480281829834, + "p95": 680.6079745292664, + "p99": 684.544026851654 }, "isolatedSum": { - "p50": 778.8479626178741, - "p90": 826.3680040836334, - "p95": 844.0960347652435, - "p99": 908.2239866256714 + "p50": 689.2799884080887, + "p90": 698.3679980039597, + "p95": 702.3999840021133, + "p99": 709.087997674942 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 223365120, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 7, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67953,35 +69733,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 859.1039776802063, - "p90": 874.3680119514465, - "p95": 884.447991847992, - "p99": 1000.8000135421753 + "p50": 784.928023815155, + "p90": 799.8719811439514, + "p95": 803.2000064849854, + "p99": 809.0239763259888 }, "combine": { - "p50": 476.0960042476654, - "p90": 487.5839948654175, - "p95": 495.9680140018463, - "p99": 551.2639880180359 + "p50": 405.4720103740692, + "p90": 416.06399416923523, + "p95": 418.3039963245392, + "p99": 422.4959909915924 }, "roundtrip": { - "p50": 1315.2320384979248, - "p90": 1342.4960374832153, - "p95": 1364.9920225143433, - "p99": 1437.1839761734009 + "p50": 1170.1120138168335, + "p90": 1179.58402633667, + "p95": 1183.6479902267456, + "p99": 1192.7679777145386 }, "isolatedSum": { - "p50": 1335.1999819278717, - "p90": 1361.952006816864, - "p95": 1380.4160058498383, - "p99": 1552.0640015602112 + "p50": 1190.4000341892242, + "p90": 1215.9359753131866, + "p95": 1221.5040028095245, + "p99": 1231.5199673175812 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 446817280, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 7, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -67990,34 +69770,34 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1618.3040142059326, - "p90": 1638.8479471206665, - "p95": 1650.3679752349854, - "p99": 1797.8880405426025 + "p50": 1483.3279848098755, + "p90": 1490.496039390564, + "p95": 1493.6319589614868, + "p99": 1501.5679597854614 }, "combine": { - "p50": 871.5839982032776, - "p90": 885.4719996452332, - "p95": 893.7280178070068, - "p99": 936.1280202865601 + "p50": 732.2880029678345, + "p90": 738.8160228729248, + "p95": 740.8000230789185, + "p99": 745.9840178489685 }, "roundtrip": { - "p50": 2472.0640182495117, - "p90": 2496.8960285186768, - "p95": 2517.6639556884766, - "p99": 2775.1998901367188 + "p50": 2199.039936065674, + "p90": 2209.439992904663, + "p95": 2212.5439643859863, + "p99": 2217.087984085083 }, "isolatedSum": { - "p50": 2489.88801240921, - "p90": 2524.3199467658997, - "p95": 2544.095993041992, - "p99": 2734.0160608291626 + "p50": 2215.61598777771, + "p90": 2229.3120622634888, + "p95": 2234.4319820404053, + "p99": 2247.55197763443 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 893132800, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -68026,30 +69806,31 @@ ] }, { - "id": "cx-92d6dac4", - "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_87683f6c", - "comparisonKey": "5878390fb0ef3ac0", + "id": "cx-de081cfe", + "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_91aa6e56", + "comparisonKey": "e439d265ee12c9f2", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:33.209811+00:00", + "generatedAt": "2026-06-26T17:30:20.983875+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -68062,14 +69843,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -68079,7 +69860,7 @@ }, "routingConsistent": true, "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, @@ -68087,45 +69868,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271771597", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271771597", - "createdAt": "2026-06-26T23:53:03Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254323956", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", + "createdAt": "2026-06-26T17:30:20.983875+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 237.12000250816345, - "p90": 447.00801372528076, - "p95": 466.2080109119415, - "p99": 509.2800259590149 + "p50": 89.59999680519104, + "p90": 93.72799843549728, + "p95": 95.36000341176987, + "p99": 100.832000374794 }, "combine": { - "p50": 89.59999680519104, - "p90": 118.20799857378006, - "p95": 120.38400024175644, - "p99": 131.55199587345123 + "p50": 98.14400225877762, + "p90": 100.60799866914749, + "p95": 102.11200267076492, + "p99": 105.0880029797554 }, "roundtrip": { - "p50": 299.51998591423035, - "p90": 465.9839868545532, - "p95": 490.01601338386536, - "p99": 533.9199900627136 + "p50": 215.13600647449493, + "p90": 218.55999529361725, + "p95": 220.12799978256226, + "p99": 228.06400060653687 }, "isolatedSum": { - "p50": 326.7199993133545, - "p90": 565.2160122990608, - "p95": 586.592011153698, - "p99": 640.8320218324661 + "p50": 187.74399906396866, + "p90": 194.33599710464478, + "p95": 197.4720060825348, + "p99": 205.9200033545494 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68134,35 +69915,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 272.96000719070435, - "p90": 312.19199299812317, - "p95": 322.7840065956116, - "p99": 376.6080141067505 + "p50": 106.175996363163, + "p90": 121.47200107574463, + "p95": 122.52800166606903, + "p99": 125.91999769210815 }, "combine": { - "p50": 121.91999703645706, - "p90": 133.34399461746216, - "p95": 139.1039937734604, - "p99": 144.48000490665436 + "p50": 139.48799669742584, + "p90": 146.17599546909332, + "p95": 147.61599898338318, + "p99": 149.82399344444275 }, "roundtrip": { - "p50": 388.5760009288788, - "p90": 429.28001284599304, - "p95": 448.5439956188202, - "p99": 507.87198543548584 + "p50": 320.92800736427307, + "p90": 336.41600608825684, + "p95": 337.92001008987427, + "p99": 341.2800133228302 }, "isolatedSum": { - "p50": 394.8800042271614, - "p90": 445.5359876155853, - "p95": 461.88800036907196, - "p99": 521.0880190134048 + "p50": 245.66399306058884, + "p90": 267.64799654483795, + "p95": 270.1440006494522, + "p99": 275.7439911365509 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68171,35 +69952,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 375.61601400375366, - "p90": 427.4879992008209, - "p95": 443.77601146698, - "p99": 500.4799962043762 + "p50": 138.46400380134583, + "p90": 182.8799992799759, + "p95": 190.97599387168884, + "p99": 197.28000462055206 }, "combine": { - "p50": 192.9599940776825, - "p90": 205.08800446987152, - "p95": 213.47199380397797, - "p99": 237.92000114917755 + "p50": 208.3200067281723, + "p90": 223.00800681114197, + "p95": 231.83999955654144, + "p99": 242.01600253582 }, "roundtrip": { - "p50": 553.5680055618286, - "p90": 599.2000102996826, - "p95": 623.583972454071, - "p99": 716.1920070648193 + "p50": 509.69600677490234, + "p90": 521.5680003166199, + "p95": 523.4879851341248, + "p99": 528.9599895477295 }, "isolatedSum": { - "p50": 568.5760080814362, - "p90": 632.5760036706924, - "p95": 657.248005270958, - "p99": 738.3999973535538 + "p50": 346.7840105295181, + "p90": 405.88800609111786, + "p95": 422.8159934282303, + "p99": 439.29600715637207 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68208,35 +69989,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 557.6000213623047, - "p90": 596.7360138893127, - "p95": 607.3920130729675, - "p99": 644.9599862098694 + "p50": 197.4399983882904, + "p90": 210.87999641895294, + "p95": 213.31200003623962, + "p99": 216.2880003452301 }, "combine": { - "p50": 306.335985660553, - "p90": 316.3520097732544, - "p95": 320.51199674606323, - "p99": 334.52799916267395 + "p50": 325.82399249076843, + "p90": 330.1120102405548, + "p95": 331.6799998283386, + "p99": 335.80800890922546 }, "roundtrip": { - "p50": 853.1839847564697, - "p90": 880.8959722518921, - "p95": 895.3920006752014, - "p99": 966.7840003967285 + "p50": 847.4879860877991, + "p90": 858.0160140991211, + "p95": 861.0879778862, + "p99": 869.2799806594849 }, "isolatedSum": { - "p50": 863.9360070228577, - "p90": 913.0880236625671, - "p95": 927.9040098190308, - "p99": 979.4879853725433 + "p50": 523.2639908790588, + "p90": 540.9920066595078, + "p95": 544.9919998645782, + "p99": 552.0960092544556 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68245,35 +70026,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 987.8720045089722, - "p90": 1001.9840002059937, - "p95": 1013.2479667663574, - "p99": 1395.5520391464233 + "p50": 318.65599751472473, + "p90": 335.29600501060486, + "p95": 338.0799889564514, + "p99": 347.29599952697754 }, "combine": { - "p50": 540.9280061721802, - "p90": 573.7280249595642, - "p95": 584.6400260925293, - "p99": 626.0480284690857 + "p50": 559.7760081291199, + "p90": 566.815972328186, + "p95": 569.5040225982666, + "p99": 573.311984539032 }, "roundtrip": { - "p50": 1523.6799716949463, - "p90": 1545.408010482788, - "p95": 1558.1120252609253, - "p99": 1704.2880058288574 + "p50": 1524.0000486373901, + "p90": 1544.0640449523926, + "p95": 1550.7839918136597, + "p99": 1576.7359733581543 }, "isolatedSum": { - "p50": 1528.8000106811523, - "p90": 1575.7120251655579, - "p95": 1597.8879928588867, - "p99": 2021.600067615509 + "p50": 878.4320056438446, + "p90": 902.1119773387909, + "p95": 907.584011554718, + "p99": 920.6079840660095 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 0, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68282,35 +70063,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1865.3759956359863, - "p90": 1883.2000494003296, - "p95": 1893.02396774292, - "p99": 1925.7279634475708 + "p50": 574.7519731521606, + "p90": 593.1839942932129, + "p95": 598.1760025024414, + "p99": 604.7999858856201 }, "combine": { - "p50": 981.823980808258, - "p90": 994.0800070762634, - "p95": 1002.7199983596802, - "p99": 1096.3200330734253 + "p50": 1025.056004524231, + "p90": 1033.5359573364258, + "p95": 1036.1920595169067, + "p99": 1042.847990989685 }, "roundtrip": { - "p50": 2907.2320461273193, - "p90": 2933.151960372925, - "p95": 2943.104028701782, - "p99": 3191.3599967956543 + "p50": 2880.863904953003, + "p90": 2894.5279121398926, + "p95": 2899.9040126800537, + "p99": 2908.3518981933594 }, "isolatedSum": { - "p50": 2847.1999764442444, - "p90": 2877.280056476593, - "p95": 2895.7439661026, - "p99": 3022.047996520996 + "p50": 1599.8079776763916, + "p90": 1626.7199516296387, + "p95": 1634.3680620193481, + "p99": 1647.6479768753052 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 2, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68318,28 +70099,29 @@ ] }, { - "id": "cx-e6cb64c3", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_9979edfc", - "comparisonKey": "e1fcecbd9bd8ede3", + "id": "cx-e8c2a4d2", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_eddc3af6", + "comparisonKey": "fd73340f2af530d5", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:14.800894+00:00", + "generatedAt": "2026-06-26T17:30:48.926445+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", + "runner": "h100-dgxc-slurm_19", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", + "resourceMode": "normalized", + "suite": "resource-constrained", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8 (norm) [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -68354,14 +70136,14 @@ "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", "fixedKernel": false, - "paretoEligible": false + "paretoEligible": true }, "placement": { "kind": "packed", @@ -68379,45 +70161,45 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271625900", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271625900", - "createdAt": "2026-06-26T23:48:41Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28254341346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", + "createdAt": "2026-06-26T17:30:48.926445+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 87.74399757385254, - "p90": 110.55999994277954, - "p95": 120.25599926710129, - "p99": 134.36800241470337 + "p50": 77.60000228881836, + "p90": 81.4720019698143, + "p95": 83.52000266313553, + "p99": 102.7199998497963 }, "combine": { - "p50": 96.73599898815155, - "p90": 114.01599645614624, - "p95": 121.0239976644516, - "p99": 137.34400272369385 + "p50": 98.08000177145004, + "p90": 102.01600193977356, + "p95": 115.35999923944473, + "p99": 344.0319895744324 }, "roundtrip": { - "p50": 209.24800634384155, - "p90": 246.39999866485596, - "p95": 260.0319981575012, - "p99": 304.22401428222656 + "p50": 205.1520049571991, + "p90": 208.19200575351715, + "p95": 209.85600352287292, + "p99": 214.9440050125122 }, "isolatedSum": { - "p50": 184.4799965620041, - "p90": 224.57599639892578, - "p95": 241.2799969315529, - "p99": 271.7120051383972 + "p50": 175.6800040602684, + "p90": 183.48800390958786, + "p95": 198.88000190258026, + "p99": 446.75198942422867 }, "roundtripMeasured": true, "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68426,28 +70208,28 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 102.24000364542007, - "p90": 118.97599697113037, - "p95": 129.7920048236847, - "p99": 143.42400431632996 + "p50": 95.74399888515472, + "p90": 110.07999628782272, + "p95": 111.13599687814713, + "p99": 114.81600254774094 }, "combine": { - "p50": 136.86400651931763, - "p90": 150.43200552463531, - "p95": 156.3200056552887, - "p99": 173.92000555992126 + "p50": 141.7279988527298, + "p90": 148.8959938287735, + "p95": 150.4960060119629, + "p99": 153.02400290966034 }, "roundtrip": { - "p50": 310.88000535964966, - "p90": 332.73598551750183, - "p95": 338.78400921821594, - "p99": 370.11200189590454 + "p50": 311.45599484443665, + "p90": 319.5840120315552, + "p95": 321.696013212204, + "p99": 324.67201352119446 }, "isolatedSum": { - "p50": 239.1040101647377, - "p90": 269.4080024957657, - "p95": 286.1120104789734, - "p99": 317.3440098762512 + "p50": 237.47199773788452, + "p90": 258.9759901165962, + "p95": 261.63200289011, + "p99": 267.8400054574013 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77944832, @@ -68463,35 +70245,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 138.14400136470795, - "p90": 162.59199380874634, - "p95": 168.47999393939972, - "p99": 190.3039962053299 + "p50": 129.56799566745758, + "p90": 144.57599818706512, + "p95": 146.14400267601013, + "p99": 148.8959938287735 }, "combine": { - "p50": 214.88000452518463, - "p90": 230.17600178718567, - "p95": 236.32000386714935, - "p99": 254.4960081577301 + "p50": 213.4079933166504, + "p90": 218.36799383163452, + "p95": 219.7760045528412, + "p99": 224.2240011692047 }, "roundtrip": { - "p50": 494.4959878921509, - "p90": 515.6800150871277, - "p95": 529.6000242233276, - "p99": 559.8719716072083 + "p50": 500.70399045944214, + "p90": 508.1599950790405, + "p95": 510.81597805023193, + "p99": 514.8159861564636 }, "isolatedSum": { - "p50": 353.0240058898926, - "p90": 392.767995595932, - "p95": 404.7999978065491, - "p99": 444.80000436306 + "p50": 342.97598898410797, + "p90": 362.94399201869965, + "p95": 365.9200072288513, + "p99": 373.1199949979782 }, "roundtripMeasured": true, "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68500,28 +70282,28 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 206.1759978532791, - "p90": 232.9919934272766, - "p95": 242.49599874019623, - "p99": 273.98398518562317 + "p50": 186.49600446224213, + "p90": 196.0960030555725, + "p95": 197.50399887561798, + "p99": 202.55999267101288 }, "combine": { - "p50": 348.63999485969543, - "p90": 365.7279908657074, - "p95": 373.1519877910614, - "p99": 415.2640104293823 + "p50": 327.7760148048401, + "p90": 333.18400382995605, + "p95": 334.3679904937744, + "p99": 337.72799372673035 }, "roundtrip": { - "p50": 835.8079791069031, - "p90": 859.5200181007385, - "p95": 871.6480135917664, - "p99": 925.055980682373 + "p50": 835.2640271186829, + "p90": 841.69602394104, + "p95": 844.0639972686768, + "p99": 848.2879996299744 }, "isolatedSum": { - "p50": 554.8159927129745, - "p90": 598.719984292984, - "p95": 615.6479865312576, - "p99": 689.2479956150055 + "p50": 514.2720192670822, + "p90": 529.2800068855286, + "p95": 531.8719893693924, + "p99": 540.2879863977432 }, "roundtripMeasured": true, "dispatchLogicalBytes": 311721984, @@ -68537,35 +70319,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 338.8800024986267, - "p90": 359.6799969673157, - "p95": 372.0000088214874, - "p99": 397.5360095500946 + "p50": 306.62399530410767, + "p90": 320.19200921058655, + "p95": 322.7519989013672, + "p99": 327.1679878234863 }, "combine": { - "p50": 606.4640283584595, - "p90": 624.895989894867, - "p95": 636.7679834365845, - "p99": 693.5359835624695 + "p50": 559.6479773521423, + "p90": 567.296028137207, + "p95": 570.1119899749756, + "p99": 574.5919942855835 }, "roundtrip": { - "p50": 1500, - "p90": 1528.9280414581299, - "p95": 1547.0080375671387, - "p99": 1667.6160097122192 + "p50": 1509.6960067749023, + "p90": 1522.7199792861938, + "p95": 1525.6320238113403, + "p99": 1585.9839916229248 }, "isolatedSum": { - "p50": 945.3440308570862, - "p90": 984.5759868621826, - "p95": 1008.7679922580719, - "p99": 1091.071993112564 + "p50": 866.27197265625, + "p90": 887.4880373477936, + "p95": 892.8639888763428, + "p99": 901.7599821090698 }, "roundtripMeasured": true, "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68574,35 +70356,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 615.9039735794067, - "p90": 634.0479850769043, - "p95": 644.2239880561829, - "p99": 707.9039812088013 + "p50": 539.8719906806946, + "p90": 550.7839918136597, + "p95": 555.7119846343994, + "p99": 564.7040009498596 }, "combine": { - "p50": 1102.112054824829, - "p90": 1116.7999505996704, - "p95": 1128.767967224121, - "p99": 1167.2320365905762 + "p50": 1024.9279737472534, + "p90": 1034.3040227890015, + "p95": 1037.11998462677, + "p99": 1047.0720529556274 }, "roundtrip": { - "p50": 2840.384006500244, - "p90": 2870.07999420166, - "p95": 2894.5279121398926, - "p99": 3452.9600143432617 + "p50": 2850.719928741455, + "p90": 2861.407995223999, + "p95": 2864.9280071258545, + "p99": 2870.176076889038 }, "isolatedSum": { - "p50": 1718.0160284042358, - "p90": 1750.8479356765747, - "p95": 1772.991955280304, - "p99": 1875.1360177993774 + "p50": 1564.799964427948, + "p90": 1585.0880146026611, + "p95": 1592.8319692611694, + "p99": 1611.776053905487 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1243504640, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68610,28 +70392,29 @@ ] }, { - "id": "cx-4da6f6db", - "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_87683f6c", - "comparisonKey": "90a8a7fc3b314f23", + "id": "cx-f6d2d196", + "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ec72792b", + "comparisonKey": "39b4bc74c45641cb", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:44.259181+00:00", + "generatedAt": "2026-06-26T23:48:09.793091+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -68671,9 +70454,9 @@ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271640687", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271640687", - "createdAt": "2026-06-26T23:49:09Z", + "id": "28271576503", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271576503", + "createdAt": "2026-06-26T23:48:09.793091+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -68681,35 +70464,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 239.3600046634674, - "p90": 286.52799129486084, - "p95": 313.79199028015137, - "p99": 391.2000060081482 + "p50": 76.73600316047668, + "p90": 80.19199967384338, + "p95": 82.17599987983704, + "p99": 85.4720026254654 }, "combine": { - "p50": 97.21600264310837, - "p90": 110.59200018644333, - "p95": 116.67200177907944, - "p99": 134.783998131752 + "p50": 98.68799895048141, + "p90": 100.8640006184578, + "p95": 102.84800082445145, + "p99": 113.27999830245972 }, "roundtrip": { - "p50": 309.9519908428192, - "p90": 360.48001050949097, - "p95": 381.5680146217346, - "p99": 466.94400906562805 + "p50": 204.25599813461304, + "p90": 206.84799551963806, + "p95": 208.0959975719452, + "p99": 211.32799983024597 }, "isolatedSum": { - "p50": 336.5760073065758, - "p90": 397.11999148130417, - "p95": 430.4639920592308, - "p99": 525.9840041399002 + "p50": 175.4240021109581, + "p90": 181.05600029230118, + "p95": 185.02400070428848, + "p99": 198.7520009279251 }, "roundtripMeasured": true, "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68718,35 +70501,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 291.0720109939575, - "p90": 340.5759930610657, - "p95": 355.19999265670776, - "p99": 430.30399084091187 + "p50": 95.551997423172, + "p90": 98.65599870681763, + "p95": 100.44799745082855, + "p99": 104.63999956846237 }, "combine": { - "p50": 137.7599984407425, - "p90": 154.30399775505066, - "p95": 160.41600704193115, - "p99": 182.3360025882721 + "p50": 143.51999759674072, + "p90": 146.04799449443817, + "p95": 147.2640037536621, + "p99": 150.07999539375305 }, "roundtrip": { - "p50": 415.8079922199249, - "p90": 464.0960097312927, - "p95": 484.5759868621826, - "p99": 556.8320155143738 + "p50": 317.05600023269653, + "p90": 320.67200541496277, + "p95": 322.07998633384705, + "p99": 325.56799054145813 }, "isolatedSum": { - "p50": 428.8320094347, - "p90": 494.87999081611633, - "p95": 515.6159996986389, - "p99": 612.639993429184 + "p50": 239.07199501991272, + "p90": 244.7039932012558, + "p95": 247.71200120449066, + "p99": 254.71999496221542 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77944832, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68755,35 +70538,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 408.28800201416016, - "p90": 486.4000082015991, - "p95": 495.7759976387024, - "p99": 554.3680191040039 + "p50": 132.4159950017929, + "p90": 136.09600067138672, + "p95": 137.40800321102142, + "p99": 140.19200205802917 }, "combine": { - "p50": 219.10400688648224, - "p90": 233.37599635124207, - "p95": 239.48800563812256, - "p99": 266.07999205589294 + "p50": 224.16000068187714, + "p90": 228.2239943742752, + "p95": 229.312002658844, + "p99": 232.03200101852417 }, "roundtrip": { - "p50": 607.4560284614563, - "p90": 650.2400040626526, - "p95": 670.5920100212097, - "p99": 729.3760180473328 + "p50": 517.5039768218994, + "p90": 522.5920081138611, + "p95": 523.8400101661682, + "p99": 534.1759920120239 }, "isolatedSum": { - "p50": 627.3920089006424, - "p90": 719.7760045528412, - "p95": 735.264003276825, - "p99": 820.4480111598969 + "p50": 356.57599568367004, + "p90": 364.3199950456619, + "p95": 366.7200058698654, + "p99": 372.22400307655334 }, "roundtripMeasured": true, "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68792,35 +70575,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 621.9840049743652, - "p90": 667.8720116615295, - "p95": 696.0639953613281, - "p99": 765.0880217552185 + "p50": 203.5199999809265, + "p90": 207.39200711250305, + "p95": 208.95999670028687, + "p99": 213.1199985742569 }, "combine": { - "p50": 346.8480110168457, - "p90": 362.08000779151917, - "p95": 368.47999691963196, - "p99": 384.89601016044617 + "p50": 359.0719997882843, + "p90": 364.25599455833435, + "p95": 365.4080033302307, + "p99": 367.35999584198 }, "roundtrip": { - "p50": 955.2639722824097, - "p90": 1010.1120471954346, - "p95": 1039.4879579544067, - "p99": 1108.6399555206299 + "p50": 883.679986000061, + "p90": 889.6960020065308, + "p95": 891.5839791297913, + "p99": 897.7599740028381 }, "isolatedSum": { - "p50": 968.8320159912109, - "p90": 1029.9520194530487, - "p95": 1064.54399228096, - "p99": 1149.9840319156647 + "p50": 562.5919997692108, + "p90": 571.6480016708374, + "p95": 574.3680000305176, + "p99": 580.4799944162369 }, "roundtripMeasured": true, "dispatchLogicalBytes": 311721984, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68829,35 +70612,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 1107.7439785003662, - "p90": 1126.9439458847046, - "p95": 1137.887954711914, - "p99": 1176.8319606781006 + "p50": 341.0240113735199, + "p90": 352.9280126094818, + "p95": 354.7840118408203, + "p99": 361.31200194358826 }, "combine": { - "p50": 609.9200248718262, - "p90": 624.4159936904907, - "p95": 631.8399906158447, - "p99": 652.1919965744019 + "p50": 631.2000155448914, + "p90": 639.136016368866, + "p95": 641.5359973907471, + "p99": 644.1599726676941 }, "roundtrip": { - "p50": 1692.2240257263184, - "p90": 1713.1520509719849, - "p95": 1732.5439453125, - "p99": 1810.7199668884277 + "p50": 1616.5440082550049, + "p90": 1624.9920129776, + "p95": 1627.3599863052368, + "p99": 1631.9680213928223 }, "isolatedSum": { - "p50": 1717.6640033721924, - "p90": 1751.3599395751953, - "p95": 1769.7279453277588, - "p99": 1829.0239572525024 + "p50": 972.2240269184113, + "p90": 992.0640289783478, + "p95": 996.3200092315674, + "p99": 1005.4719746112823 }, "roundtripMeasured": true, "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68866,35 +70649,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 2100.4478931427, - "p90": 2129.312038421631, - "p95": 2148.47993850708, - "p99": 2358.464002609253 + "p50": 611.6160154342651, + "p90": 621.0240125656128, + "p95": 624.0959763526917, + "p99": 790.3040051460266 }, "combine": { - "p50": 1102.6560068130493, - "p90": 1120.0640201568604, - "p95": 1132.8959465026855, - "p99": 1158.560037612915 + "p50": 1165.503978729248, + "p90": 1175.487995147705, + "p95": 1177.664041519165, + "p99": 1188.9280080795288 }, "roundtrip": { - "p50": 3193.376064300537, - "p90": 3219.615936279297, - "p95": 3229.9840450286865, - "p99": 3288.5758876800537 + "p50": 3078.4640312194824, + "p90": 3095.8399772644043, + "p95": 3103.071928024292, + "p99": 3115.9679889678955 }, "isolatedSum": { - "p50": 3203.1038999557495, - "p90": 3249.376058578491, - "p95": 3281.3758850097656, - "p99": 3517.024040222168 + "p50": 1777.1199941635132, + "p90": 1796.5120077133179, + "p95": 1801.7600178718567, + "p99": 1979.2320132255554 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1243504640, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -68902,49 +70685,50 @@ ] }, { - "id": "cx-d2673258", - "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h200_87683f6c", - "comparisonKey": "ae4528707b5ffd7f", + "id": "cx-0f748c2f", + "identity": "h100|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ec8c28a9", + "comparisonKey": "04d8dc12f0898400", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:16.316846+00:00", + "generatedAt": "2026-06-28T02:32:47.489418+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", + "runner": "h100-dgxc-slurm_08", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8", + "label": "H100 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -68954,239 +70738,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "hybrid-e0a5b1d", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271725115", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271725115", - "createdAt": "2026-06-26T23:51:41Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28308875809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308875809", + "createdAt": "2026-06-28T02:32:47.489418+00:00", + "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 221.27999365329742, - "p90": 242.20800399780273, - "p95": 255.3279995918274, - "p99": 294.94398832321167 + "p50": 167.1680063009262, + "p90": 219.7120040655136, + "p95": 222.01600670814514, + "p99": 227.84000635147095 }, "combine": { - "p50": 96.67199850082397, - "p90": 103.20000350475311, - "p95": 107.32799768447876, - "p99": 117.85600334405899 + "p50": 36.896001547575, + "p90": 52.2879995405674, + "p95": 52.799999713897705, + "p99": 57.34400078654289 }, "roundtrip": { - "p50": 306.8479895591736, - "p90": 331.07200264930725, - "p95": 352.31998562812805, - "p99": 409.05600786209106 + "p50": 195.3279972076416, + "p90": 256.76798820495605, + "p95": 260.51199436187744, + "p99": 266.2079930305481 }, "isolatedSum": { - "p50": 317.9519921541214, - "p90": 345.40800750255585, - "p95": 362.65599727630615, - "p99": 412.79999166727066 + "p50": 204.0640078485012, + "p90": 272.000003606081, + "p95": 274.81600642204285, + "p99": 285.18400713801384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 3, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 282.04798698425293, - "p90": 307.3279857635498, - "p95": 327.2320032119751, - "p99": 442.68798828125 + "p50": 165.27999937534332, + "p90": 173.08799922466278, + "p95": 185.98400056362152, + "p99": 224.48000311851501 }, "combine": { - "p50": 138.87999951839447, - "p90": 145.05599439144135, - "p95": 152.73599326610565, - "p99": 170.01600563526154 + "p50": 35.93600168824196, + "p90": 39.264000952243805, + "p95": 42.047999799251556, + "p99": 47.93599992990494 }, "roundtrip": { - "p50": 410.46398878097534, - "p90": 435.39199233055115, - "p95": 465.6960070133209, - "p99": 525.2479910850525 + "p50": 193.34399700164795, + "p90": 199.64799284934998, + "p95": 202.72000133991241, + "p99": 207.58399367332458 }, "isolatedSum": { - "p50": 420.9279865026474, - "p90": 452.38398015499115, - "p95": 479.96799647808075, - "p99": 612.7039939165115 + "p50": 201.21600106358528, + "p90": 212.35200017690659, + "p95": 228.03200036287308, + "p99": 272.41600304841995 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 0, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 390.9760117530823, - "p90": 407.8719913959503, - "p95": 414.3039882183075, - "p99": 448.2240080833435 + "p50": 166.97600483894348, + "p90": 218.55999529361725, + "p95": 221.72799706459045, + "p99": 226.17599368095398 }, "combine": { - "p50": 212.3199999332428, - "p90": 220.2560007572174, - "p95": 229.08799350261688, - "p99": 299.71200227737427 + "p50": 39.64800015091896, + "p90": 52.15999856591225, + "p95": 52.76799947023392, + "p99": 55.93600124120712 }, "roundtrip": { - "p50": 589.3120169639587, - "p90": 609.9839806556702, - "p95": 625.5040168762207, - "p99": 686.6880059242249 + "p50": 195.0400024652481, + "p90": 255.90398907661438, + "p95": 258.432000875473, + "p99": 266.1759853363037 }, "isolatedSum": { - "p50": 603.2960116863251, - "p90": 628.1279921531677, - "p95": 643.3919817209244, - "p99": 747.9360103607178 + "p50": 206.62400498986244, + "p90": 270.7199938595295, + "p95": 274.49599653482437, + "p99": 282.1119949221611 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 3, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 601.7919778823853, - "p90": 624.064028263092, - "p95": 640.0960087776184, - "p99": 705.2800059318542 + "p50": 168.44800114631653, + "p90": 221.02400660514832, + "p95": 223.39199483394623, + "p99": 229.34399545192719 }, "combine": { - "p50": 343.29599142074585, - "p90": 351.39200091362, - "p95": 357.02401399612427, - "p99": 386.01601123809814 + "p50": 39.744000881910324, + "p90": 52.352000027894974, + "p95": 53.18399891257286, + "p99": 58.079998940229416 }, "roundtrip": { - "p50": 930.400013923645, - "p90": 953.1520009040833, - "p95": 967.1040177345276, - "p99": 1069.5680379867554 + "p50": 195.77600061893463, + "p90": 259.68000292778015, + "p95": 262.14399933815, + "p99": 267.64801144599915 }, "isolatedSum": { - "p50": 945.0879693031311, - "p90": 975.456029176712, - "p95": 997.1200227737427, - "p99": 1091.2960171699524 + "p50": 208.19200202822685, + "p90": 273.3760066330433, + "p95": 276.5759937465191, + "p99": 287.4239943921566 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 0, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1100.0959873199463, - "p90": 1113.9520406723022, - "p95": 1130.784034729004, - "p99": 1221.2159633636475 + "p50": 180.28800189495087, + "p90": 222.1119999885559, + "p95": 225.37599503993988, + "p99": 237.0239943265915 }, "combine": { - "p50": 596.3199734687805, - "p90": 606.9440245628357, - "p95": 612.6400232315063, - "p99": 648.5120058059692 + "p50": 42.94399917125702, + "p90": 53.727999329566956, + "p95": 57.0559985935688, + "p99": 143.96800100803375 }, "roundtrip": { - "p50": 1675.5199432373047, - "p90": 1687.999963760376, - "p95": 1695.3599452972412, - "p99": 2014.2719745635986 + "p50": 211.07199788093567, + "p90": 258.84801149368286, + "p95": 261.85598969459534, + "p99": 270.7520127296448 }, "isolatedSum": { - "p50": 1696.4159607887268, - "p90": 1720.896065235138, - "p95": 1743.4240579605103, - "p99": 1869.7279691696167 + "p50": 223.23200106620789, + "p90": 275.83999931812286, + "p95": 282.4319936335087, + "p99": 380.99199533462524 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 5, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2087.3920917510986, - "p90": 2099.519968032837, - "p95": 2110.6879711151123, - "p99": 2213.7598991394043 + "p50": 205.4399996995926, + "p90": 242.5599992275238, + "p95": 244.89599466323853, + "p99": 249.31199848651886 }, "combine": { - "p50": 1087.4559879302979, - "p90": 1099.4240045547485, - "p95": 1103.5200357437134, - "p99": 1151.8080234527588 + "p50": 45.21600157022476, + "p90": 54.55999821424484, + "p95": 55.48800155520439, + "p99": 59.13599953055382 }, "roundtrip": { - "p50": 3166.016101837158, - "p90": 3187.0079040527344, - "p95": 3196.5761184692383, - "p99": 3422.0480918884277 + "p50": 241.66400730609894, + "p90": 279.9679934978485, + "p95": 282.20799565315247, + "p99": 286.3680124282837 }, "isolatedSum": { - "p50": 3174.8480796813965, - "p90": 3198.9439725875854, - "p95": 3214.2080068588257, - "p99": 3365.567922592163 + "p50": 250.65600126981735, + "p90": 297.11999744176865, + "p95": 300.3839962184429, + "p99": 308.4479980170727 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 3, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 258.36798548698425, + "p90": 366.784006357193, + "p95": 370.36800384521484, + "p99": 381.98399543762207 + }, + "combine": { + "p50": 56.352000683546066, + "p90": 67.29599833488464, + "p95": 68.09599697589874, + "p99": 72.9919970035553 + }, + "roundtrip": { + "p50": 305.88799715042114, + "p90": 344.9920117855072, + "p95": 346.78399562835693, + "p99": 349.8559892177582 + }, + "isolatedSum": { + "p50": 314.7199861705303, + "p90": 434.08000469207764, + "p95": 438.4640008211136, + "p99": 454.97599244117737 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 262.0159983634949, + "p90": 300.86401104927063, + "p95": 303.99999022483826, + "p99": 463.1359875202179 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 80.73599636554718, + "p95": 81.53600245714188, + "p99": 84.95999872684479 + }, + "roundtrip": { + "p50": 325.0240087509155, + "p90": 364.4160032272339, + "p95": 366.36799573898315, + "p99": 370.11200189590454 + }, + "isolatedSum": { + "p50": 331.87200129032135, + "p90": 381.6000074148178, + "p95": 385.53599268198013, + "p99": 548.0959862470627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -69194,28 +71052,29 @@ ] }, { - "id": "cx-5a82a4d9", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_3a17d46b", - "comparisonKey": "680e15fb3428bab0", + "id": "cx-402bdadc", + "identity": "h100|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ec8c28a9", + "comparisonKey": "2d8d821b3680de8a", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:05.917629+00:00", + "generatedAt": "2026-06-28T02:32:51.441168+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep-hybrid", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm)", + "label": "H100 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -69225,19 +71084,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false }, "placement": { "kind": "packed", @@ -69251,49 +71110,49 @@ "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "hybrid-e0a5b1d", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254401482", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", - "createdAt": "2026-06-26T17:28:31Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28308875809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308875809", + "createdAt": "2026-06-28T02:32:51.441168+00:00", + "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 86.81599795818329, - "p90": 108.2879975438118, - "p95": 115.26399850845337, - "p99": 141.79199934005737 + "p50": 257.79199600219727, + "p90": 262.65600323677063, + "p95": 264.95999097824097, + "p99": 272.2240090370178 }, "combine": { - "p50": 96.38399630784988, - "p90": 114.68800157308578, - "p95": 119.55200135707855, - "p99": 138.72000575065613 + "p50": 69.21599805355072, + "p90": 71.19999825954437, + "p95": 73.27999919652939, + "p99": 79.83999699354172 }, "roundtrip": { - "p50": 210.59200167655945, - "p90": 242.94400215148926, - "p95": 254.17599081993103, - "p99": 313.27998638153076 + "p50": 320.47998905181885, + "p90": 324.3519961833954, + "p95": 327.07199454307556, + "p99": 332.3200047016144 }, "isolatedSum": { - "p50": 183.19999426603317, - "p90": 222.97599911689758, - "p95": 234.81599986553192, - "p99": 280.5120050907135 + "p50": 327.007994055748, + "p90": 333.856001496315, + "p95": 338.23999017477036, + "p99": 352.06400603055954 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, + "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 4, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -69302,35 +71161,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 103.2319962978363, - "p90": 128.28800082206726, - "p95": 134.8160058259964, - "p99": 155.07200360298157 + "p50": 269.6320116519928, + "p90": 274.6559977531433, + "p95": 276.70401334762573, + "p99": 282.6240062713623 }, "combine": { - "p50": 133.66399705410004, - "p90": 149.79200065135956, - "p95": 157.21599757671356, - "p99": 173.37599396705627 + "p50": 104.51199859380722, + "p90": 106.59199953079224, + "p95": 107.45599865913391, + "p99": 110.36799848079681 }, "roundtrip": { - "p50": 304.22401428222656, - "p90": 332.41599798202515, - "p95": 337.92001008987427, - "p99": 353.2800078392029 + "p50": 368.3199882507324, + "p90": 372.79999256134033, + "p95": 375.0399947166443, + "p99": 377.85598635673523 }, "isolatedSum": { - "p50": 236.89599335193634, - "p90": 278.0800014734268, - "p95": 292.03200340270996, - "p99": 328.44799757003784 + "p50": 374.1440102458, + "p90": 381.24799728393555, + "p95": 384.16001200675964, + "p99": 392.9920047521591 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, + "dispatchLogicalBytes": 155889664, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 2, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -69339,35 +71198,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 135.77599823474884, - "p90": 162.30399906635284, - "p95": 169.95200514793396, - "p99": 237.98400163650513 + "p50": 293.15200448036194, + "p90": 298.5279858112335, + "p95": 300.7360100746155, + "p99": 305.9839904308319 }, "combine": { - "p50": 203.2960057258606, - "p90": 220.41599452495575, - "p95": 226.55999660491943, - "p99": 257.31199979782104 + "p50": 172.31999337673187, + "p90": 174.78400468826294, + "p95": 175.9680062532425, + "p99": 179.1680008172989 }, "roundtrip": { - "p50": 476.9600033760071, - "p90": 496.63999676704407, - "p95": 511.55197620391846, - "p99": 544.7999835014343 + "p50": 464.4159972667694, + "p90": 468.8960015773773, + "p95": 470.8159863948822, + "p99": 480.76799511909485 }, "isolatedSum": { - "p50": 339.07200396060944, - "p90": 382.7199935913086, - "p95": 396.5120017528534, - "p99": 495.2960014343262 + "p50": 465.4719978570938, + "p90": 473.31199049949646, + "p95": 476.70401632785797, + "p99": 485.1519912481308 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, + "dispatchLogicalBytes": 312266752, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 5, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -69376,31 +71235,31 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 196.57599925994873, - "p90": 218.87999773025513, - "p95": 225.3119945526123, - "p99": 253.7280023097992 + "p50": 448.0000138282776, + "p90": 458.8800072669983, + "p95": 461.1839950084686, + "p99": 468.4799909591675 }, "combine": { - "p50": 320.607990026474, - "p90": 335.2319896221161, - "p95": 344.4800078868866, - "p99": 365.9519851207733 + "p50": 299.1679906845093, + "p90": 301.7280101776123, + "p95": 302.4959862232208, + "p99": 305.6960105895996 }, "roundtrip": { - "p50": 794.7199940681458, - "p90": 817.6959753036499, - "p95": 837.0879888534546, - "p99": 910.5280041694641 + "p50": 749.9840259552002, + "p90": 761.568009853363, + "p95": 765.2480006217957, + "p99": 789.8880243301392 }, "isolatedSum": { - "p50": 517.1839892864227, - "p90": 554.1119873523712, - "p95": 569.7920024394989, - "p99": 619.6799874305725 + "p50": 747.1680045127869, + "p90": 760.6080174446106, + "p95": 763.6799812316895, + "p99": 774.1760015487671 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, + "dispatchLogicalBytes": 623443968, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, @@ -69413,35 +71272,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 320.16000151634216, - "p90": 343.55199337005615, - "p95": 363.45601081848145, - "p99": 439.9999976158142 + "p50": 729.3440103530884, + "p90": 734.2399954795837, + "p95": 737.824022769928, + "p99": 864.9600148200989 }, "combine": { - "p50": 554.8160076141357, - "p90": 569.7919726371765, - "p95": 577.6000022888184, - "p99": 639.3280029296875 + "p50": 555.8080077171326, + "p90": 558.6240291595459, + "p95": 559.935986995697, + "p99": 565.7600164413452 }, "roundtrip": { - "p50": 1425.7279634475708, - "p90": 1448.3519792556763, - "p95": 1468.4480428695679, - "p99": 1752.8959512710571 + "p50": 1285.599946975708, + "p90": 1290.560007095337, + "p95": 1292.7680015563965, + "p99": 1297.9520559310913 }, "isolatedSum": { - "p50": 874.9760091304779, - "p90": 913.3439660072327, - "p95": 941.0560131072998, - "p99": 1079.3280005455017 + "p50": 1285.152018070221, + "p90": 1292.8640246391296, + "p95": 1297.760009765625, + "p99": 1430.720031261444 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, + "dispatchLogicalBytes": 1243805696, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 7, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -69450,31 +71309,31 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 572.4160075187683, - "p90": 584.447979927063, - "p95": 591.6479825973511, - "p99": 629.6640038490295 + "p50": 1308.0320358276367, + "p90": 1330.7839632034302, + "p95": 1333.2480192184448, + "p99": 1338.43195438385 }, "combine": { - "p50": 1012.6080513000488, - "p90": 1025.696039199829, - "p95": 1030.2400588989258, - "p99": 1060.1279735565186 + "p50": 1069.3119764328003, + "p90": 1073.2159614562988, + "p95": 1074.7519731521606, + "p99": 1078.3040523529053 }, "roundtrip": { - "p50": 2698.7199783325195, - "p90": 2725.055932998657, - "p95": 2745.215892791748, - "p99": 2952.064037322998 + "p50": 2376.9280910491943, + "p90": 2398.9760875701904, + "p95": 2401.3121128082275, + "p99": 2405.503988265991 }, "isolatedSum": { - "p50": 1585.0240588188171, - "p90": 1610.144019126892, - "p95": 1621.8880414962769, - "p99": 1689.791977405548 + "p50": 2377.344012260437, + "p90": 2403.999924659729, + "p95": 2407.9999923706055, + "p99": 2416.7360067367554 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, + "dispatchLogicalBytes": 2487009280, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, @@ -69486,28 +71345,29 @@ ] }, { - "id": "cx-da3555d5", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_50a9ee63", - "comparisonKey": "ee1a607167629f55", + "id": "cx-f1858975", + "identity": "h100|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_6c33dc8f", + "comparisonKey": "5205049e72237a92", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:23.809590+00:00", + "generatedAt": "2026-06-27T17:24:08.744102+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm) [cl]", + "label": "H100 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -69517,19 +71377,19 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false }, "placement": { "kind": "packed", @@ -69538,239 +71398,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254418007", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", - "createdAt": "2026-06-26T17:28:51Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28296376857", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296376857", + "createdAt": "2026-06-27T17:24:08.744102+00:00", + "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 73.69600236415863, - "p90": 84.63999629020691, - "p95": 90.08000046014786, - "p99": 106.6880002617836 + "p50": 99.35999661684036, + "p90": 103.29599678516388, + "p95": 105.53599894046783, + "p99": 110.81600189208984 }, "combine": { - "p50": 95.20000219345093, - "p90": 106.97600245475769, - "p95": 112.28799819946289, - "p99": 135.77599823474884 + "p50": 99.35999661684036, + "p90": 103.29599678516388, + "p95": 105.53599894046783, + "p99": 110.81600189208984 }, "roundtrip": { - "p50": 196.70400023460388, - "p90": 213.79199624061584, - "p95": 224.16000068187714, - "p99": 281.0240089893341 + "p50": 99.35999661684036, + "p90": 103.29599678516388, + "p95": 105.53599894046783, + "p99": 110.81600189208984 }, "isolatedSum": { - "p50": 168.89600455760956, - "p90": 191.6159987449646, - "p95": 202.36799865961075, - "p99": 242.46399849653244 + "p50": 198.71999323368073, + "p90": 206.59199357032776, + "p95": 211.07199788093567, + "p99": 221.6320037841797 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 91.71199798583984, - "p90": 108.0000028014183, - "p95": 111.87200248241425, - "p99": 124.57600235939026 + "p50": 99.20000284910202, + "p90": 102.78400033712387, + "p95": 105.53599894046783, + "p99": 109.63200032711029 }, "combine": { - "p50": 132.7359974384308, - "p90": 146.2399959564209, - "p95": 151.8400013446808, - "p99": 165.56799411773682 + "p50": 99.20000284910202, + "p90": 102.78400033712387, + "p95": 105.53599894046783, + "p99": 109.63200032711029 }, "roundtrip": { - "p50": 291.456013917923, - "p90": 308.57598781585693, - "p95": 313.34400177001953, - "p99": 330.78399300575256 + "p50": 99.20000284910202, + "p90": 102.78400033712387, + "p95": 105.53599894046783, + "p99": 109.63200032711029 }, "isolatedSum": { - "p50": 224.44799542427063, - "p90": 254.2399987578392, - "p95": 263.71200382709503, - "p99": 290.1439964771271 + "p50": 198.40000569820404, + "p90": 205.56800067424774, + "p95": 211.07199788093567, + "p99": 219.26400065422058 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 125.50400197505951, - "p90": 144.3520039319992, - "p95": 149.85600113868713, - "p99": 213.6639952659607 + "p50": 96.96000069379807, + "p90": 101.08800232410431, + "p95": 103.42399775981903, + "p99": 108.86400192975998 }, "combine": { - "p50": 203.10400426387787, - "p90": 215.64799547195435, - "p95": 220.47999501228333, - "p99": 236.92800104618073 + "p50": 96.96000069379807, + "p90": 101.08800232410431, + "p95": 103.42399775981903, + "p99": 108.86400192975998 }, "roundtrip": { - "p50": 464.7040069103241, - "p90": 485.5999946594238, - "p95": 495.64799666404724, - "p99": 524.3520140647888 + "p50": 96.96000069379807, + "p90": 101.08800232410431, + "p95": 103.42399775981903, + "p99": 108.86400192975998 }, "isolatedSum": { - "p50": 328.6080062389374, - "p90": 359.99999940395355, - "p95": 370.33599615097046, - "p99": 450.5919963121414 + "p50": 193.92000138759613, + "p90": 202.17600464820862, + "p95": 206.84799551963806, + "p99": 217.72800385951996 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 184.1599941253662, - "p90": 198.94400238990784, - "p95": 204.352006316185, - "p99": 232.12799429893494 + "p50": 98.68799895048141, + "p90": 102.30399668216705, + "p95": 104.25599664449692, + "p99": 109.21599715948105 }, "combine": { - "p50": 318.39999556541443, - "p90": 328.96000146865845, - "p95": 333.15199613571167, - "p99": 352.7359962463379 + "p50": 98.68799895048141, + "p90": 102.30399668216705, + "p95": 104.25599664449692, + "p99": 109.21599715948105 }, "roundtrip": { - "p50": 782.4640274047852, - "p90": 796.064019203186, - "p95": 802.4960160255432, - "p99": 826.4960050582886 + "p50": 98.68799895048141, + "p90": 102.30399668216705, + "p95": 104.25599664449692, + "p99": 109.21599715948105 }, "isolatedSum": { - "p50": 502.55998969078064, - "p90": 527.9040038585663, - "p95": 537.5040024518967, - "p99": 584.8639905452728 + "p50": 197.37599790096283, + "p90": 204.6079933643341, + "p95": 208.51199328899384, + "p99": 218.4319943189621 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 304.3519854545593, - "p90": 320.8320140838623, - "p95": 336.2559974193573, - "p99": 371.42398953437805 + "p50": 96.54399752616882, + "p90": 101.72799974679947, + "p95": 102.94400155544281, + "p99": 107.42399841547012 }, "combine": { - "p50": 550.4000186920166, - "p90": 560.2880120277405, - "p95": 567.7760243415833, - "p99": 656.8959951400757 + "p50": 96.54399752616882, + "p90": 101.72799974679947, + "p95": 102.94400155544281, + "p99": 107.42399841547012 }, "roundtrip": { - "p50": 1410.4959964752197, - "p90": 1427.456021308899, - "p95": 1436.4160299301147, - "p99": 1585.2479934692383 + "p50": 96.54399752616882, + "p90": 101.72799974679947, + "p95": 102.94400155544281, + "p99": 107.42399841547012 }, "isolatedSum": { - "p50": 854.7520041465759, - "p90": 881.1200261116028, - "p95": 904.0320217609406, - "p99": 1028.3199846744537 + "p50": 193.08799505233765, + "p90": 203.45599949359894, + "p95": 205.88800311088562, + "p99": 214.84799683094025 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 542.8479909896851, - "p90": 557.5680136680603, - "p95": 565.5360221862793, - "p99": 587.7760052680969 + "p50": 97.72799909114838, + "p90": 101.6639992594719, + "p95": 105.0880029797554, + "p99": 111.84000223875046 }, "combine": { - "p50": 1013.5680437088013, - "p90": 1026.4320373535156, - "p95": 1031.999945640564, - "p99": 1048.192024230957 + "p50": 97.72799909114838, + "p90": 101.6639992594719, + "p95": 105.0880029797554, + "p99": 111.84000223875046 }, "roundtrip": { - "p50": 2668.4160232543945, - "p90": 2694.3039894104004, - "p95": 2716.320037841797, - "p99": 3019.615888595581 + "p50": 97.72799909114838, + "p90": 101.6639992594719, + "p95": 105.0880029797554, + "p99": 111.84000223875046 }, "isolatedSum": { - "p50": 1556.4160346984863, - "p90": 1584.000051021576, - "p95": 1597.5359678268433, - "p99": 1635.968029499054 + "p50": 195.45599818229675, + "p90": 203.3279985189438, + "p95": 210.1760059595108, + "p99": 223.68000447750092 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.4639972448349, + "p90": 104.44799810647964, + "p95": 122.75200337171555, + "p99": 401.5359878540039 + }, + "combine": { + "p50": 98.4639972448349, + "p90": 104.44799810647964, + "p95": 122.75200337171555, + "p99": 401.5359878540039 + }, + "roundtrip": { + "p50": 98.4639972448349, + "p90": 104.44799810647964, + "p95": 122.75200337171555, + "p99": 401.5359878540039 + }, + "isolatedSum": { + "p50": 196.9279944896698, + "p90": 208.8959962129593, + "p95": 245.5040067434311, + "p99": 803.0719757080078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.62400043010712, + "p90": 115.84000289440155, + "p95": 118.23999881744385, + "p99": 130.52800297737122 + }, + "combine": { + "p50": 110.62400043010712, + "p90": 115.84000289440155, + "p95": 118.23999881744385, + "p99": 130.52800297737122 + }, + "roundtrip": { + "p50": 110.62400043010712, + "p90": 115.84000289440155, + "p95": 118.23999881744385, + "p99": 130.52800297737122 + }, + "isolatedSum": { + "p50": 221.24800086021423, + "p90": 231.6800057888031, + "p95": 236.4799976348877, + "p99": 261.05600595474243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -69778,28 +71712,29 @@ ] }, { - "id": "cx-4a1bc537", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_4f483b60", - "comparisonKey": "ac62097ce902c24f", + "id": "cx-236b5900", + "identity": "h100|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_865f08c6", + "comparisonKey": "63f2ed34d1d8c7db", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:33.490755+00:00", + "generatedAt": "2026-06-28T01:38:24.466545+00:00", "status": "valid", "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", + "runner": "h100-dgxc-slurm_15", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "H200 EP8 · deepep · fp8 [cl]", + "label": "H100 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -69815,12 +71750,12 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, + "achievedFraction": null, + "configuredUnits": null, "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { @@ -69830,239 +71765,313 @@ "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271633476", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271633476", - "createdAt": "2026-06-26T23:48:55Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307778986", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307778986", + "createdAt": "2026-06-28T01:38:24.466545+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 75.71200281381607, - "p90": 95.29600292444229, - "p95": 102.11200267076492, - "p99": 128.83199751377106 + "p50": 109.11999642848969, + "p90": 113.8560026884079, + "p95": 116.19199812412262, + "p99": 121.34400010108948 }, "combine": { - "p50": 97.31200337409973, - "p90": 115.93600362539291, - "p95": 120.80000340938568, - "p99": 140.44800400733948 + "p50": 109.11999642848969, + "p90": 113.8560026884079, + "p95": 116.19199812412262, + "p99": 121.34400010108948 }, "roundtrip": { - "p50": 200.8959949016571, - "p90": 248.28800559043884, - "p95": 261.24799251556396, - "p99": 302.5600016117096 + "p50": 109.11999642848969, + "p90": 113.8560026884079, + "p95": 116.19199812412262, + "p99": 121.34400010108948 }, "isolatedSum": { - "p50": 173.0240061879158, - "p90": 211.2320065498352, - "p95": 222.9120060801506, - "p99": 269.28000152111053 + "p50": 218.23999285697937, + "p90": 227.7120053768158, + "p95": 232.38399624824524, + "p99": 242.68800020217896 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 91.61599725484848, - "p90": 110.33599823713303, - "p95": 116.35199934244156, - "p99": 134.17600095272064 + "p50": 108.96000266075134, + "p90": 116.80000275373459, + "p95": 122.30399996042252, + "p99": 135.3279948234558 }, "combine": { - "p50": 136.76799833774567, - "p90": 151.5199989080429, - "p95": 159.04000401496887, - "p99": 170.6240028142929 + "p50": 108.96000266075134, + "p90": 116.80000275373459, + "p95": 122.30399996042252, + "p99": 135.3279948234558 }, "roundtrip": { - "p50": 299.45600032806396, - "p90": 324.38400387763977, - "p95": 331.07200264930725, - "p99": 365.7279908657074 + "p50": 108.96000266075134, + "p90": 116.80000275373459, + "p95": 122.30399996042252, + "p99": 135.3279948234558 }, "isolatedSum": { - "p50": 228.38399559259415, - "p90": 261.85599714517593, - "p95": 275.39200335741043, - "p99": 304.80000376701355 + "p50": 217.92000532150269, + "p90": 233.60000550746918, + "p95": 244.60799992084503, + "p99": 270.6559896469116 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 127.83999741077423, - "p90": 142.94399321079254, - "p95": 150.4960060119629, - "p99": 162.7199947834015 + "p50": 109.18399691581726, + "p90": 115.99999666213989, + "p95": 119.55200135707855, + "p99": 376.6399919986725 }, "combine": { - "p50": 214.62400257587433, - "p90": 226.78400576114655, - "p95": 231.51999711990356, - "p99": 242.14400351047516 + "p50": 109.18399691581726, + "p90": 115.99999666213989, + "p95": 119.55200135707855, + "p99": 376.6399919986725 }, "roundtrip": { - "p50": 483.5200011730194, - "p90": 497.2800016403198, - "p95": 504.5120120048523, - "p99": 540.831983089447 + "p50": 109.18399691581726, + "p90": 115.99999666213989, + "p95": 119.55200135707855, + "p99": 376.6399919986725 }, "isolatedSum": { - "p50": 342.46399998664856, - "p90": 369.7279989719391, - "p95": 382.01600313186646, - "p99": 404.86399829387665 + "p50": 218.36799383163452, + "p90": 231.99999332427979, + "p95": 239.1040027141571, + "p99": 753.279983997345 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 194.75199282169342, - "p90": 214.88000452518463, - "p95": 220.2879935503006, - "p99": 243.74400079250336 + "p50": 108.70400071144104, + "p90": 114.30399864912033, + "p95": 116.12799763679504, + "p99": 120.64000219106674 }, "combine": { - "p50": 346.3360071182251, - "p90": 362.8160059452057, - "p95": 374.4960129261017, - "p99": 426.56001448631287 + "p50": 108.70400071144104, + "p90": 114.30399864912033, + "p95": 116.12799763679504, + "p99": 120.64000219106674 }, "roundtrip": { - "p50": 824.5440125465393, - "p90": 852.5760173797607, - "p95": 862.2400164604187, - "p99": 896.6720104217529 + "p50": 108.70400071144104, + "p90": 114.30399864912033, + "p95": 116.12799763679504, + "p99": 120.64000219106674 }, "isolatedSum": { - "p50": 541.0879999399185, - "p90": 577.6960104703903, - "p95": 594.7840064764023, - "p99": 670.3040152788162 + "p50": 217.40800142288208, + "p90": 228.60799729824066, + "p95": 232.2559952735901, + "p99": 241.28000438213348 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 325.0879943370819, - "p90": 342.52798557281494, - "p95": 348.9919900894165, - "p99": 374.9440014362335 + "p50": 108.86400192975998, + "p90": 114.656001329422, + "p95": 119.03999745845795, + "p99": 151.19999647140503 }, "combine": { - "p50": 603.8720011711121, - "p90": 613.6959791183472, - "p95": 618.1120276451111, - "p99": 640.3520107269287 + "p50": 108.86400192975998, + "p90": 114.656001329422, + "p95": 119.03999745845795, + "p99": 151.19999647140503 }, "roundtrip": { - "p50": 1486.36794090271, - "p90": 1510.7519626617432, - "p95": 1524.1600275039673, - "p99": 1566.3679838180542 + "p50": 108.86400192975998, + "p90": 114.656001329422, + "p95": 119.03999745845795, + "p99": 151.19999647140503 }, "isolatedSum": { - "p50": 928.959995508194, - "p90": 956.2239646911621, - "p95": 967.1040177345276, - "p99": 1015.2960121631622 + "p50": 217.72800385951996, + "p90": 229.312002658844, + "p95": 238.0799949169159, + "p99": 302.39999294281006 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 586.624026298523, - "p90": 618.9759969711304, - "p95": 627.6800036430359, - "p99": 654.7200083732605 + "p50": 109.21599715948105, + "p90": 121.18399888277054, + "p95": 127.3919939994812, + "p99": 205.56800067424774 }, "combine": { - "p50": 1108.8639497756958, - "p90": 1126.1119842529297, - "p95": 1134.2079639434814, - "p99": 1169.376015663147 + "p50": 109.21599715948105, + "p90": 121.18399888277054, + "p95": 127.3919939994812, + "p99": 205.56800067424774 }, "roundtrip": { - "p50": 2817.1839714050293, - "p90": 2849.3120670318604, - "p95": 2871.0079193115234, - "p99": 3254.4960975646973 + "p50": 109.21599715948105, + "p90": 121.18399888277054, + "p95": 127.3919939994812, + "p99": 205.56800067424774 }, "isolatedSum": { - "p50": 1695.4879760742188, - "p90": 1745.08798122406, - "p95": 1761.8879675865173, - "p99": 1824.0960240364075 + "p50": 218.4319943189621, + "p90": 242.36799776554108, + "p95": 254.7839879989624, + "p99": 411.1360013484955 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.97600311040878, + "p90": 115.93600362539291, + "p95": 118.27199906110764, + "p99": 126.88000500202179 + }, + "combine": { + "p50": 110.97600311040878, + "p90": 115.93600362539291, + "p95": 118.27199906110764, + "p99": 126.88000500202179 + }, + "roundtrip": { + "p50": 110.97600311040878, + "p90": 115.93600362539291, + "p95": 118.27199906110764, + "p99": 126.88000500202179 + }, + "isolatedSum": { + "p50": 221.95200622081757, + "p90": 231.87200725078583, + "p95": 236.54399812221527, + "p99": 253.76001000404358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.41600114107132, + "p90": 129.05600666999817, + "p95": 130.5599957704544, + "p99": 136.4479959011078 + }, + "combine": { + "p50": 124.41600114107132, + "p90": 129.05600666999817, + "p95": 130.5599957704544, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 124.41600114107132, + "p90": 129.05600666999817, + "p95": 130.5599957704544, + "p99": 136.4479959011078 + }, + "isolatedSum": { + "p50": 248.83200228214264, + "p90": 258.11201333999634, + "p95": 261.1199915409088, + "p99": 272.8959918022156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70070,28 +72079,29 @@ ] }, { - "id": "cx-279043f8", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "5776ea979804ef91", + "id": "cx-0d201725", + "identity": "h100|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_1686fbdd", + "comparisonKey": "27114da636b19722", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:08:32.534640+00:00", + "generatedAt": "2026-06-28T01:37:57.511914+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_05", - "sku": "mi355x", - "backend": "mori", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "flashinfer", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16", + "label": "H100 EP8 · flashinfer · mxfp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -70101,75 +72111,75 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "dispatchDtype": "mxfp8", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272169530", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272169530", - "createdAt": "2026-06-27T00:05:44Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307780015", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307780015", + "createdAt": "2026-06-28T01:37:57.511914+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.19999876618385, - "p90": 43.000999838113785, - "p95": 44.56000030040741, - "p99": 47.880999743938446 + "p50": 109.56799983978271, + "p90": 113.98400366306305, + "p95": 117.85600334405899, + "p99": 129.72800433635712 }, "combine": { - "p50": 17.760999500751495, - "p90": 19.360000267624855, - "p95": 20.959999412298203, - "p99": 23.080000653862953 + "p50": 109.56799983978271, + "p90": 113.98400366306305, + "p95": 117.85600334405899, + "p99": 129.72800433635712 }, "roundtrip": { - "p50": 56.04099854826927, - "p90": 59.00000035762787, - "p95": 60.201000422239304, - "p99": 62.24000081419945 + "p50": 109.56799983978271, + "p90": 113.98400366306305, + "p95": 117.85600334405899, + "p99": 129.72800433635712 }, "isolatedSum": { - "p50": 57.96099826693535, - "p90": 62.36100010573864, - "p95": 65.51999971270561, - "p99": 70.9610003978014 + "p50": 219.13599967956543, + "p90": 227.9680073261261, + "p95": 235.71200668811798, + "p99": 259.45600867271423 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70178,35 +72188,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.64099895954132, - "p90": 45.52000015974045, - "p95": 47.07999899983406, - "p99": 49.76100102066994 + "p50": 109.11999642848969, + "p90": 113.27999830245972, + "p95": 115.35999923944473, + "p99": 119.84000355005264 }, "combine": { - "p50": 16.599999740719795, - "p90": 18.60000006854534, - "p95": 19.79999989271164, - "p99": 23.080000653862953 + "p50": 109.11999642848969, + "p90": 113.27999830245972, + "p95": 115.35999923944473, + "p99": 119.84000355005264 }, "roundtrip": { - "p50": 58.96100029349327, - "p90": 62.39999830722809, - "p95": 64.32099640369415, - "p99": 102.64100134372711 + "p50": 109.11999642848969, + "p90": 113.27999830245972, + "p95": 115.35999923944473, + "p99": 119.84000355005264 }, "isolatedSum": { - "p50": 59.240998700261116, - "p90": 64.12000022828579, - "p95": 66.8799988925457, - "p99": 72.84100167453289 + "p50": 218.23999285697937, + "p90": 226.55999660491943, + "p95": 230.71999847888947, + "p99": 239.68000710010529 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70215,35 +72225,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 42.160000652074814, - "p90": 44.76099833846092, - "p95": 46.20100185275078, - "p99": 48.5600009560585 + "p50": 108.99200290441513, + "p90": 112.99200356006622, + "p95": 116.64000153541565, + "p99": 122.36800044775009 }, "combine": { - "p50": 19.759999588131905, - "p90": 21.27999998629093, - "p95": 22.5210003554821, - "p99": 25.200000032782555 + "p50": 108.99200290441513, + "p90": 112.99200356006622, + "p95": 116.64000153541565, + "p99": 122.36800044775009 }, "roundtrip": { - "p50": 62.001001089811325, - "p90": 65.32099843025208, - "p95": 66.16000086069107, - "p99": 69.15999948978424 + "p50": 108.99200290441513, + "p90": 112.99200356006622, + "p95": 116.64000153541565, + "p99": 122.36800044775009 }, "isolatedSum": { - "p50": 61.92000024020672, - "p90": 66.04099832475185, - "p95": 68.72200220823288, - "p99": 73.76000098884106 + "p50": 217.98400580883026, + "p90": 225.98400712013245, + "p95": 233.2800030708313, + "p99": 244.73600089550018 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70252,35 +72262,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.399998754262924, - "p90": 45.35999894142151, - "p95": 47.15999960899353, - "p99": 49.52000081539154 + "p50": 108.67200046777725, + "p90": 112.92800307273865, + "p95": 115.1999980211258, + "p99": 121.79200351238251 }, "combine": { - "p50": 20.880000665783882, - "p90": 23.08100089430809, - "p95": 24.04000051319599, - "p99": 26.441000401973724 + "p50": 108.67200046777725, + "p90": 112.92800307273865, + "p95": 115.1999980211258, + "p99": 121.79200351238251 }, "roundtrip": { - "p50": 62.52100318670273, - "p90": 65.64100086688995, - "p95": 66.56000018119812, - "p99": 68.84100288152695 + "p50": 108.67200046777725, + "p90": 112.92800307273865, + "p95": 115.1999980211258, + "p99": 121.79200351238251 }, "isolatedSum": { - "p50": 63.279999420046806, - "p90": 68.4409998357296, - "p95": 71.20000012218952, - "p99": 75.96100121736526 + "p50": 217.3440009355545, + "p90": 225.8560061454773, + "p95": 230.3999960422516, + "p99": 243.58400702476501 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70289,35 +72299,146 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.52000153064728, - "p90": 45.32000049948692, - "p95": 46.640001237392426, - "p99": 49.04000088572502 + "p50": 108.96000266075134, + "p90": 113.47199976444244, + "p95": 116.80000275373459, + "p99": 125.40799379348755 }, "combine": { - "p50": 25.599999353289604, - "p90": 27.799999341368675, - "p95": 29.239999130368233, - "p99": 31.520001590251923 + "p50": 108.96000266075134, + "p90": 113.47199976444244, + "p95": 116.80000275373459, + "p99": 125.40799379348755 }, "roundtrip": { - "p50": 67.63999909162521, - "p90": 70.60100138187408, - "p95": 71.68100029230118, - "p99": 74.36099648475647 + "p50": 108.96000266075134, + "p90": 113.47199976444244, + "p95": 116.80000275373459, + "p99": 125.40799379348755 }, "isolatedSum": { - "p50": 68.12000088393688, - "p90": 73.1199998408556, - "p95": 75.88000036776066, - "p99": 80.56000247597694 + "p50": 217.92000532150269, + "p90": 226.9439995288849, + "p95": 233.60000550746918, + "p99": 250.8159875869751 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 108.86400192975998, + "p90": 113.21599781513214, + "p95": 116.60800129175186, + "p99": 119.00799721479416 + }, + "combine": { + "p50": 108.86400192975998, + "p90": 113.21599781513214, + "p95": 116.60800129175186, + "p99": 119.00799721479416 + }, + "roundtrip": { + "p50": 108.86400192975998, + "p90": 113.21599781513214, + "p95": 116.60800129175186, + "p99": 119.00799721479416 + }, + "isolatedSum": { + "p50": 217.72800385951996, + "p90": 226.43199563026428, + "p95": 233.21600258350372, + "p99": 238.01599442958832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.33599823713303, + "p90": 115.42399972677231, + "p95": 118.78400295972824, + "p99": 129.88799810409546 + }, + "combine": { + "p50": 110.33599823713303, + "p90": 115.42399972677231, + "p95": 118.78400295972824, + "p99": 129.88799810409546 + }, + "roundtrip": { + "p50": 110.33599823713303, + "p90": 115.42399972677231, + "p95": 118.78400295972824, + "p99": 129.88799810409546 + }, + "isolatedSum": { + "p50": 220.67199647426605, + "p90": 230.84799945354462, + "p95": 237.56800591945648, + "p99": 259.7759962081909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.84799665212631, + "p90": 128.28800082206726, + "p95": 130.048006772995, + "p99": 133.08799266815186 + }, + "combine": { + "p50": 122.84799665212631, + "p90": 128.28800082206726, + "p95": 130.048006772995, + "p99": 133.08799266815186 + }, + "roundtrip": { + "p50": 122.84799665212631, + "p90": 128.28800082206726, + "p95": 130.048006772995, + "p99": 133.08799266815186 + }, + "isolatedSum": { + "p50": 245.69599330425262, + "p90": 256.5760016441345, + "p95": 260.09601354599, + "p99": 266.1759853363037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70325,28 +72446,29 @@ ] }, { - "id": "cx-60c60832", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "3677ee6ace04ac65", + "id": "cx-6fee4962", + "identity": "h100|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_6c33dc8f", + "comparisonKey": "ab2d6ab146526e25", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:53:59.155172+00:00", + "generatedAt": "2026-06-27T17:55:14.883072+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_05", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16", + "label": "H100 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -70362,217 +72484,254 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28273516714", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714", - "createdAt": "2026-06-27T00:53:08Z", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + "id": "28297139240", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28297139240", + "createdAt": "2026-06-27T17:55:14.883072+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 40.6000018119812, - "p90": 43.76000165939331, - "p95": 45.239999890327454, - "p99": 54.71999943256378 + "p50": 88.92799913883209, + "p90": 92.38400310277939, + "p95": 93.66399794816971, + "p99": 97.50399738550186 }, "combine": { - "p50": 17.920000478625298, - "p90": 19.039999693632126, - "p95": 20.999999716877937, - "p99": 22.87999913096428 + "p50": 88.92799913883209, + "p90": 92.38400310277939, + "p95": 93.66399794816971, + "p99": 97.50399738550186 }, "roundtrip": { - "p50": 56.32000043988228, - "p90": 59.4400018453598, - "p95": 60.64099818468094, - "p99": 63.19999694824219 + "p50": 88.92799913883209, + "p90": 92.38400310277939, + "p95": 93.66399794816971, + "p99": 97.50399738550186 }, "isolatedSum": { - "p50": 58.5200022906065, - "p90": 62.800001353025436, - "p95": 66.23999960720539, - "p99": 77.59999856352806 + "p50": 177.85599827766418, + "p90": 184.76800620555878, + "p95": 187.32799589633942, + "p99": 195.00799477100372 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 42.64000058174133, - "p90": 45.35999894142151, - "p95": 46.76000028848648, - "p99": 50.23999884724617 + "p50": 149.9519944190979, + "p90": 152.51199901103973, + "p95": 153.50399911403656, + "p99": 157.56799280643463 }, "combine": { - "p50": 16.759999096393585, - "p90": 18.68000067770481, - "p95": 19.801000133156776, - "p99": 22.08000048995018 + "p50": 149.9519944190979, + "p90": 152.51199901103973, + "p95": 153.50399911403656, + "p99": 157.56799280643463 }, "roundtrip": { - "p50": 58.9199997484684, - "p90": 61.799999326467514, - "p95": 62.95999884605408, - "p99": 65.20000100135803 + "p50": 149.9519944190979, + "p90": 152.51199901103973, + "p95": 153.50399911403656, + "p99": 157.56799280643463 }, "isolatedSum": { - "p50": 59.39999967813492, - "p90": 64.03999961912632, - "p95": 66.56100042164326, - "p99": 72.31999933719635 + "p50": 299.9039888381958, + "p90": 305.02399802207947, + "p95": 307.0079982280731, + "p99": 315.13598561286926 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 42.44000092148781, - "p90": 45.281000435352325, - "p95": 46.4400015771389, - "p99": 47.919999808073044 + "p50": 264.70398902893066, + "p90": 268.38400959968567, + "p95": 269.9199914932251, + "p99": 279.07198667526245 }, "combine": { - "p50": 19.999999552965164, - "p90": 21.99999988079071, - "p95": 23.360000923275948, - "p99": 25.72000026702881 + "p50": 264.70398902893066, + "p90": 268.38400959968567, + "p95": 269.9199914932251, + "p99": 279.07198667526245 }, "roundtrip": { - "p50": 61.91999837756157, - "p90": 65.20099937915802, - "p95": 66.3599967956543, - "p99": 67.84100085496902 + "p50": 264.70398902893066, + "p90": 268.38400959968567, + "p95": 269.9199914932251, + "p99": 279.07198667526245 }, "isolatedSum": { - "p50": 62.44000047445297, - "p90": 67.28100031614304, - "p95": 69.80000250041485, - "p99": 73.64000007510185 + "p50": 529.4079780578613, + "p90": 536.7680191993713, + "p95": 539.8399829864502, + "p99": 558.1439733505249 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 42.44000092148781, - "p90": 45.00100016593933, - "p95": 46.88100144267082, - "p99": 49.27999898791313 + "p50": 498.6239969730377, + "p90": 502.4319887161255, + "p95": 504.2240023612976, + "p99": 506.9440007209778 }, "combine": { - "p50": 20.880000665783882, - "p90": 22.840000689029694, - "p95": 24.240000173449516, - "p99": 26.399999856948853 + "p50": 498.6239969730377, + "p90": 502.4319887161255, + "p95": 504.2240023612976, + "p99": 506.9440007209778 }, "roundtrip": { - "p50": 62.401000410318375, - "p90": 65.48000127077103, - "p95": 66.28099828958511, - "p99": 68.00000369548798 + "p50": 498.6239969730377, + "p90": 502.4319887161255, + "p95": 504.2240023612976, + "p99": 506.9440007209778 }, "isolatedSum": { - "p50": 63.32000158727169, - "p90": 67.84100085496902, - "p95": 71.12100161612034, - "p99": 75.67999884486198 + "p50": 997.2479939460754, + "p90": 1004.863977432251, + "p95": 1008.4480047225952, + "p99": 1013.8880014419556 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 42.520999908447266, - "p90": 45.1200008392334, - "p95": 46.59999907016754, - "p99": 49.04000088572502 + "p50": 953.6640048027039, + "p90": 962.0800018310547, + "p95": 964.2559885978699, + "p99": 967.9039716720581 }, "combine": { - "p50": 25.8799996227026, - "p90": 27.879999950528145, - "p95": 29.239999130368233, - "p99": 31.800001859664917 + "p50": 953.6640048027039, + "p90": 962.0800018310547, + "p95": 964.2559885978699, + "p99": 967.9039716720581 }, "roundtrip": { - "p50": 67.80099868774414, - "p90": 71.16000354290009, - "p95": 72.2000002861023, - "p99": 74.47999715805054 + "p50": 953.6640048027039, + "p90": 962.0800018310547, + "p95": 964.2559885978699, + "p99": 967.9039716720581 }, "isolatedSum": { - "p50": 68.40099953114986, - "p90": 73.00000078976154, - "p95": 75.83999820053577, - "p99": 80.84000274538994 + "p50": 1907.3280096054077, + "p90": 1924.1600036621094, + "p95": 1928.5119771957397, + "p99": 1935.8079433441162 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1854.8799753189087, + "p90": 1862.4320030212402, + "p95": 1864.2560243606567, + "p99": 1869.7919845581055 + }, + "combine": { + "p50": 1854.8799753189087, + "p90": 1862.4320030212402, + "p95": 1864.2560243606567, + "p99": 1869.7919845581055 + }, + "roundtrip": { + "p50": 1854.8799753189087, + "p90": 1862.4320030212402, + "p95": 1864.2560243606567, + "p99": 1869.7919845581055 + }, + "isolatedSum": { + "p50": 3709.7599506378174, + "p90": 3724.8640060424805, + "p95": 3728.5120487213135, + "p99": 3739.583969116211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -70580,28 +72739,29 @@ ] }, { - "id": "cx-f513e0f0", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "43eedfb9c3cc2b53", + "id": "cx-6d37a6fd", + "identity": "h100|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_865f08c6", + "comparisonKey": "7ac85b4ec0b69909", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:07:01.734617+00:00", + "generatedAt": "2026-06-28T01:37:55.644705+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_01", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", + "runner": "h100-dgxc-slurm_00", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16", + "label": "H100 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -70611,222 +72771,259 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", + "dispatchDtype": "fp8", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272162006", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272162006", - "createdAt": "2026-06-27T00:05:30Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307778986", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307778986", + "createdAt": "2026-06-28T01:37:55.644705+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 40.44099897146225, - "p90": 43.72100159525871, - "p95": 45.1200008392334, - "p99": 51.600001752376556 + "p50": 101.15200281143188, + "p90": 120.99199742078781, + "p95": 122.17599898576736, + "p99": 127.87200510501862 }, "combine": { - "p50": 15.960000455379486, - "p90": 18.160000443458557, - "p95": 19.279999658465385, - "p99": 21.159999072551727 + "p50": 101.15200281143188, + "p90": 120.99199742078781, + "p95": 122.17599898576736, + "p99": 127.87200510501862 }, "roundtrip": { - "p50": 55.56099861860275, - "p90": 58.75999853014946, - "p95": 60.120001435279846, - "p99": 63.63999843597412 + "p50": 101.15200281143188, + "p90": 120.99199742078781, + "p95": 122.17599898576736, + "p99": 127.87200510501862 }, "isolatedSum": { - "p50": 56.400999426841736, - "p90": 61.88100203871727, - "p95": 64.40000049769878, - "p99": 72.76000082492828 + "p50": 202.30400562286377, + "p90": 241.98399484157562, + "p95": 244.35199797153473, + "p99": 255.74401021003723 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 42.08099842071533, - "p90": 45.0810007750988, - "p95": 46.39999940991402, - "p99": 49.76100102066994 + "p50": 139.5840048789978, + "p90": 168.60799491405487, + "p95": 170.97599804401398, + "p99": 179.1359931230545 }, "combine": { - "p50": 16.00000075995922, - "p90": 18.60000006854534, - "p95": 19.55999992787838, - "p99": 21.920999512076378 + "p50": 139.5840048789978, + "p90": 168.60799491405487, + "p95": 170.97599804401398, + "p99": 179.1359931230545 }, "roundtrip": { - "p50": 58.32099914550781, - "p90": 61.64000183343887, - "p95": 63.600003719329834, - "p99": 67.59999692440033 + "p50": 139.5840048789978, + "p90": 168.60799491405487, + "p95": 170.97599804401398, + "p99": 179.1359931230545 }, "isolatedSum": { - "p50": 58.08099918067455, - "p90": 63.68100084364414, - "p95": 65.9599993377924, - "p99": 71.68200053274632 + "p50": 279.1680097579956, + "p90": 337.21598982810974, + "p95": 341.95199608802795, + "p99": 358.271986246109 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 41.839998215436935, - "p90": 44.920001178979874, - "p95": 46.28000035881996, - "p99": 49.40100014209747 + "p50": 221.66399657726288, + "p90": 232.7360063791275, + "p95": 234.46400463581085, + "p99": 239.00799453258514 }, "combine": { - "p50": 19.31999996304512, - "p90": 21.75999991595745, - "p95": 22.5600004196167, - "p99": 24.43999983370304 + "p50": 221.66399657726288, + "p90": 232.7360063791275, + "p95": 234.46400463581085, + "p99": 239.00799453258514 }, "roundtrip": { - "p50": 60.80099940299988, - "p90": 64.03999775648117, - "p95": 65.56099653244019, - "p99": 69.92000341415405 + "p50": 221.66399657726288, + "p90": 232.7360063791275, + "p95": 234.46400463581085, + "p99": 239.00799453258514 }, "isolatedSum": { - "p50": 61.159998178482056, - "p90": 66.68000109493732, - "p95": 68.84000077843666, - "p99": 73.84099997580051 + "p50": 443.32799315452576, + "p90": 465.472012758255, + "p95": 468.9280092716217, + "p99": 478.0159890651703 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 42.080000042915344, - "p90": 45.20000144839287, - "p95": 46.64099961519241, - "p99": 48.43999817967415 + "p50": 406.43200278282166, + "p90": 415.19999504089355, + "p95": 416.9920086860657, + "p99": 419.74401473999023 }, "combine": { - "p50": 20.16099914908409, - "p90": 22.280000150203705, - "p95": 23.04000034928322, - "p99": 24.960000067949295 + "p50": 406.43200278282166, + "p90": 415.19999504089355, + "p95": 416.9920086860657, + "p99": 419.74401473999023 }, "roundtrip": { - "p50": 62.199998646974564, - "p90": 65.36100059747696, - "p95": 66.72099977731705, - "p99": 68.71999800205231 + "p50": 406.43200278282166, + "p90": 415.19999504089355, + "p95": 416.9920086860657, + "p99": 419.74401473999023 }, "isolatedSum": { - "p50": 62.240999191999435, - "p90": 67.48000159859657, - "p95": 69.68099996447563, - "p99": 73.39999824762344 + "p50": 812.8640055656433, + "p90": 830.3999900817871, + "p95": 833.9840173721313, + "p99": 839.4880294799805 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 42.24099963903427, - "p90": 45.239999890327454, - "p95": 46.36099934577942, - "p99": 48.40100184082985 + "p50": 764.959990978241, + "p90": 773.6319899559021, + "p95": 775.5839824676514, + "p99": 795.3600287437439 }, "combine": { - "p50": 24.639999493956566, - "p90": 26.88100002706051, - "p95": 27.881000190973282, - "p99": 30.079999938607216 + "p50": 764.959990978241, + "p90": 773.6319899559021, + "p95": 775.5839824676514, + "p99": 795.3600287437439 }, "roundtrip": { - "p50": 67.47999787330627, - "p90": 70.60100138187408, - "p95": 72.28100299835205, - "p99": 75.20099729299545 + "p50": 764.959990978241, + "p90": 773.6319899559021, + "p95": 775.5839824676514, + "p99": 795.3600287437439 }, "isolatedSum": { - "p50": 66.88099913299084, - "p90": 72.12099991738796, - "p95": 74.2419995367527, - "p99": 78.48100177943707 + "p50": 1529.919981956482, + "p90": 1547.2639799118042, + "p95": 1551.1679649353027, + "p99": 1590.7200574874878 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1479.904055595398, + "p90": 1490.8479452133179, + "p95": 1496.7039823532104, + "p99": 1506.6879987716675 + }, + "combine": { + "p50": 1479.904055595398, + "p90": 1490.8479452133179, + "p95": 1496.7039823532104, + "p99": 1506.6879987716675 + }, + "roundtrip": { + "p50": 1479.904055595398, + "p90": 1490.8479452133179, + "p95": 1496.7039823532104, + "p99": 1506.6879987716675 + }, + "isolatedSum": { + "p50": 2959.808111190796, + "p90": 2981.6958904266357, + "p95": 2993.407964706421, + "p99": 3013.375997543335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -70835,28 +73032,29 @@ ] }, { - "id": "cx-67074ab6", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "2ccb7553c969aafc", + "id": "cx-00728192", + "identity": "h100|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_1686fbdd", + "comparisonKey": "f82129f37146e350", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:07:48.076161+00:00", + "generatedAt": "2026-06-28T01:37:59.531491+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_06", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16", + "label": "H100 EP8 · flashinfer · mxfp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -70866,223 +73064,260 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", + "dispatchDtype": "mxfp8", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272165928", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272165928", - "createdAt": "2026-06-27T00:05:37Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28307780015", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307780015", + "createdAt": "2026-06-28T01:37:59.531491+00:00", + "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 40.240999311208725, - "p90": 43.43999922275543, - "p95": 44.76099833846092, - "p99": 48.11999946832657 + "p50": 100.19200295209885, + "p90": 127.6479959487915, + "p95": 128.7039965391159, + "p99": 135.68000495433807 }, "combine": { - "p50": 16.839999705553055, - "p90": 18.319999799132347, - "p95": 19.600000232458115, - "p99": 23.399999365210533 + "p50": 100.19200295209885, + "p90": 127.6479959487915, + "p95": 128.7039965391159, + "p99": 135.68000495433807 }, "roundtrip": { - "p50": 56.120000779628754, - "p90": 59.48000028729439, - "p95": 60.76100096106529, - "p99": 65.24000316858292 + "p50": 100.19200295209885, + "p90": 127.6479959487915, + "p95": 128.7039965391159, + "p99": 135.68000495433807 }, "isolatedSum": { - "p50": 57.08099901676178, - "p90": 61.75999902188778, - "p95": 64.36099857091904, - "p99": 71.5199988335371 + "p50": 200.3840059041977, + "p90": 255.295991897583, + "p95": 257.4079930782318, + "p99": 271.36000990867615 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 2, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 42.44000092148781, - "p90": 45.48000171780586, - "p95": 46.51999846100807, - "p99": 49.19999837875366 + "p50": 139.26400244235992, + "p90": 165.12000560760498, + "p95": 166.62399470806122, + "p99": 172.41600155830383 }, "combine": { - "p50": 16.201000660657883, - "p90": 18.479999154806137, - "p95": 19.55999992787838, - "p99": 21.800000220537186 + "p50": 139.26400244235992, + "p90": 165.12000560760498, + "p95": 166.62399470806122, + "p99": 172.41600155830383 }, "roundtrip": { - "p50": 58.80099907517433, - "p90": 61.96000054478645, - "p95": 62.76000291109085, - "p99": 64.19999897480011 + "p50": 139.26400244235992, + "p90": 165.12000560760498, + "p95": 166.62399470806122, + "p99": 172.41600155830383 }, "isolatedSum": { - "p50": 58.64100158214569, - "p90": 63.960000872612, - "p95": 66.07999838888645, - "p99": 70.99999859929085 + "p50": 278.52800488471985, + "p90": 330.24001121520996, + "p95": 333.24798941612244, + "p99": 344.83200311660767 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 2, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 42.04000160098076, - "p90": 44.280000030994415, - "p95": 45.921001583337784, - "p99": 49.28100109100342 + "p50": 223.90399873256683, + "p90": 230.3999960422516, + "p95": 232.12799429893494, + "p99": 235.29599606990814 }, "combine": { - "p50": 19.039999693632126, - "p90": 21.51999995112419, - "p95": 22.801000624895096, - "p99": 24.560000747442245 + "p50": 223.90399873256683, + "p90": 230.3999960422516, + "p95": 232.12799429893494, + "p99": 235.29599606990814 }, "roundtrip": { - "p50": 61.601001769304276, - "p90": 64.92000073194504, - "p95": 66.00099802017212, - "p99": 67.72000342607498 + "p50": 223.90399873256683, + "p90": 230.3999960422516, + "p95": 232.12799429893494, + "p99": 235.29599606990814 }, "isolatedSum": { - "p50": 61.080001294612885, - "p90": 65.7999999821186, - "p95": 68.72200220823288, - "p99": 73.84100183844566 + "p50": 447.80799746513367, + "p90": 460.7999920845032, + "p95": 464.2559885978699, + "p99": 470.5919921398163 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 2, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 42.27999970316887, - "p90": 45.00000178813934, - "p95": 46.23999819159508, - "p99": 48.16000163555145 + "p50": 407.26399421691895, + "p90": 413.5040044784546, + "p95": 415.3600037097931, + "p99": 419.0399944782257 }, "combine": { - "p50": 20.320000126957893, - "p90": 23.32100085914135, - "p95": 25.439999997615814, - "p99": 57.88100138306618 + "p50": 407.26399421691895, + "p90": 413.5040044784546, + "p95": 415.3600037097931, + "p99": 419.0399944782257 }, "roundtrip": { - "p50": 62.3599998652935, - "p90": 65.0399997830391, - "p95": 66.0799965262413, - "p99": 68.00100207328796 + "p50": 407.26399421691895, + "p90": 413.5040044784546, + "p95": 415.3600037097931, + "p99": 419.0399944782257 }, "isolatedSum": { - "p50": 62.59999983012676, - "p90": 68.3210026472807, - "p95": 71.67999818921089, - "p99": 106.04100301861763 + "p50": 814.5279884338379, + "p90": 827.0080089569092, + "p95": 830.7200074195862, + "p99": 838.0799889564514 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 2, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 42.399998754262924, - "p90": 45.0810007750988, - "p95": 46.23999819159508, - "p99": 48.8400012254715 + "p50": 767.7760124206543, + "p90": 772.8639841079712, + "p95": 775.1359939575195, + "p99": 777.8880000114441 }, "combine": { - "p50": 25.120999664068222, - "p90": 27.2000003606081, - "p95": 28.161000460386276, - "p99": 30.319999903440475 + "p50": 767.7760124206543, + "p90": 772.8639841079712, + "p95": 775.1359939575195, + "p99": 777.8880000114441 }, "roundtrip": { - "p50": 67.63999909162521, - "p90": 70.79999893903732, - "p95": 71.68000191450119, - "p99": 73.72000068426132 + "p50": 767.7760124206543, + "p90": 772.8639841079712, + "p95": 775.1359939575195, + "p99": 777.8880000114441 }, "isolatedSum": { - "p50": 67.52099841833115, - "p90": 72.2810011357069, - "p95": 74.40099865198135, - "p99": 79.16000112891197 + "p50": 1535.5520248413086, + "p90": 1545.7279682159424, + "p95": 1550.271987915039, + "p99": 1555.7760000228882 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 2, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1483.0399751663208, + "p90": 1491.7759895324707, + "p95": 1494.2400455474854, + "p99": 1497.5039958953857 + }, + "combine": { + "p50": 1483.0399751663208, + "p90": 1491.7759895324707, + "p95": 1494.2400455474854, + "p99": 1497.5039958953857 + }, + "roundtrip": { + "p50": 1483.0399751663208, + "p90": 1491.7759895324707, + "p95": 1494.2400455474854, + "p99": 1497.5039958953857 + }, + "isolatedSum": { + "p50": 2966.0799503326416, + "p90": 2983.5519790649414, + "p95": 2988.4800910949707, + "p99": 2995.0079917907715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71090,28 +73325,29 @@ ] }, { - "id": "cx-23f1ecd4", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "1ab1f06166250146", + "id": "cx-5657eb6e", + "identity": "h100|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_7104d5f0", + "comparisonKey": "d2fd76f5ec2f3d88", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:16.763261+00:00", + "generatedAt": "2026-06-27T17:35:51.567423+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_02", - "sku": "mi355x", - "backend": "mori", + "runner": "h100-dgxc-slurm_14", + "sku": "h100", + "backend": "uccl", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16", + "label": "H100 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, @@ -71122,14 +73358,14 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "zeros", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -71137,59 +73373,59 @@ }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272158268", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272158268", - "createdAt": "2026-06-27T00:05:24Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28296667411", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296667411", + "createdAt": "2026-06-27T17:35:51.567423+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.240999311208725, - "p90": 43.5199998319149, - "p95": 44.920001178979874, - "p99": 54.32000011205673 + "p50": 127.68000364303589, + "p90": 134.3040019273758, + "p95": 136.60800457000732, + "p99": 143.93599331378937 }, "combine": { - "p50": 17.680000513792038, - "p90": 19.401000812649727, - "p95": 20.759999752044678, - "p99": 23.80100078880787 + "p50": 88.0960002541542, + "p90": 90.36800265312195, + "p95": 91.32800251245499, + "p99": 270.30399441719055 }, "roundtrip": { - "p50": 56.040000170469284, - "p90": 59.12100151181221, - "p95": 60.47999858856201, - "p99": 63.040003180503845 + "p50": 200.28799772262573, + "p90": 205.56800067424774, + "p95": 207.42399990558624, + "p99": 212.79999613761902 }, "isolatedSum": { - "p50": 57.92099982500076, - "p90": 62.92100064456463, - "p95": 65.68000093102455, - "p99": 78.1210009008646 + "p50": 215.7760038971901, + "p90": 224.67200458049774, + "p95": 227.9360070824623, + "p99": 414.2399877309799 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 1, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71198,35 +73434,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.319998145103455, - "p90": 44.87999901175499, - "p95": 46.480998396873474, - "p99": 49.320999532938 + "p50": 179.51999604701996, + "p90": 186.17600202560425, + "p95": 189.2160028219223, + "p99": 194.91200149059296 }, "combine": { - "p50": 16.720000654459, - "p90": 18.240999430418015, - "p95": 19.401000812649727, - "p99": 23.240000009536743 + "p50": 99.20000284910202, + "p90": 105.82400113344193, + "p95": 107.19999670982361, + "p99": 191.64800643920898 }, "roundtrip": { - "p50": 58.479998260736465, - "p90": 61.879999935626984, - "p95": 62.880001962184906, - "p99": 65.99999964237213 + "p50": 254.84800338745117, + "p90": 262.7840042114258, + "p95": 265.4719948768616, + "p99": 418.8799858093262 }, "isolatedSum": { - "p50": 59.039998799562454, - "p90": 63.120998442173004, - "p95": 65.8819992095232, - "p99": 72.56099954247475 + "p50": 278.719998896122, + "p90": 292.0000031590462, + "p95": 296.4159995317459, + "p99": 386.56000792980194 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1232896, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 1, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71235,35 +73471,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 41.919998824596405, - "p90": 45.120999217033386, - "p95": 46.59999907016754, - "p99": 50.84000155329704 + "p50": 130.87999820709229, + "p90": 193.4400051832199, + "p95": 196.57599925994873, + "p99": 200.95999538898468 }, "combine": { - "p50": 19.79999989271164, - "p90": 21.27999998629093, - "p95": 23.16099964082241, - "p99": 25.400999933481216 + "p50": 89.72799777984619, + "p90": 107.26399719715118, + "p95": 108.06400328874588, + "p99": 112.31999844312668 }, "roundtrip": { - "p50": 61.51999905705452, - "p90": 64.40100073814392, - "p95": 65.80100208520889, - "p99": 68.24000179767609 + "p50": 204.67199385166168, + "p90": 272.352010011673, + "p95": 274.78399872779846, + "p99": 282.30398893356323 }, "isolatedSum": { - "p50": 61.719998717308044, - "p90": 66.40099920332432, - "p95": 69.76099871098995, - "p99": 76.24100148677826 + "p50": 220.60799598693848, + "p90": 300.7040023803711, + "p95": 304.6400025486946, + "p99": 313.27999383211136 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2480128, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 1, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71272,35 +73508,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.121000587940216, - "p90": 45.04000023007393, - "p95": 46.31999880075455, - "p99": 50.641000270843506 + "p50": 133.05599987506866, + "p90": 187.23200261592865, + "p95": 195.93599438667297, + "p99": 479.0079891681671 }, "combine": { - "p50": 21.04100026190281, - "p90": 22.95999974012375, - "p95": 24.6799997985363, - "p99": 26.920000091195107 + "p50": 89.75999802350998, + "p90": 104.73600029945374, + "p95": 105.92000186443329, + "p99": 108.0000028014183 }, "roundtrip": { - "p50": 62.20100075006485, - "p90": 66.39999896287918, - "p95": 68.59999895095825, - "p99": 95.88100016117096 + "p50": 205.63200116157532, + "p90": 260.44800877571106, + "p95": 262.36799359321594, + "p99": 269.79199051856995 }, "isolatedSum": { - "p50": 63.162000849843025, - "p90": 67.99999997019768, - "p95": 70.99999859929085, - "p99": 77.56100036203861 + "p50": 222.81599789857864, + "p90": 291.9680029153824, + "p95": 301.85599625110626, + "p99": 587.0079919695854 }, "roundtripMeasured": true, "dispatchLogicalBytes": 4974592, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 1, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71309,35 +73545,146 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.281001806259155, - "p90": 45.27999833226204, - "p95": 46.51999846100807, - "p99": 49.320001155138016 + "p50": 129.60000336170197, + "p90": 195.16800343990326, + "p95": 197.60000705718994, + "p99": 203.2960057258606 }, "combine": { - "p50": 25.919999927282333, - "p90": 28.080999851226807, - "p95": 29.559999704360962, - "p99": 32.35999867320061 + "p50": 90.52799642086029, + "p90": 107.29599744081497, + "p95": 108.15999656915665, + "p99": 114.30399864912033 }, "roundtrip": { - "p50": 67.31999665498734, - "p90": 70.2809989452362, - "p95": 71.40100002288818, - "p99": 74.16000217199326 + "p50": 206.59199357032776, + "p90": 274.6559977531433, + "p95": 275.9360074996948, + "p99": 280.7680070400238 }, "isolatedSum": { - "p50": 68.20100173354149, - "p90": 73.36099818348885, - "p95": 76.07999816536903, - "p99": 81.67999982833862 + "p50": 220.12799978256226, + "p90": 302.46400088071823, + "p95": 305.7600036263466, + "p99": 317.6000043749809 }, "roundtripMeasured": true, "dispatchLogicalBytes": 9920512, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 1, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 181.7920058965683, + "p90": 195.68000733852386, + "p95": 198.81600141525269, + "p99": 324.47999715805054 + }, + "combine": { + "p50": 108.12799632549286, + "p90": 115.39199948310852, + "p95": 116.19199812412262, + "p99": 118.97599697113037 + }, + "roundtrip": { + "p50": 263.7439966201782, + "p90": 279.83999252319336, + "p95": 281.43998980522156, + "p99": 286.20800375938416 + }, + "isolatedSum": { + "p50": 289.92000222206116, + "p90": 311.0720068216324, + "p95": 315.0079995393753, + "p99": 443.4559941291809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 147.71200716495514, + "p90": 197.11999595165253, + "p95": 200.3840059041977, + "p99": 211.67999505996704 + }, + "combine": { + "p50": 105.95200210809708, + "p90": 124.32000041007996, + "p95": 125.2799928188324, + "p99": 129.98400628566742 + }, + "roundtrip": { + "p50": 221.0880070924759, + "p90": 289.40799832344055, + "p95": 292.28800535202026, + "p99": 295.77600955963135 + }, + "isolatedSum": { + "p50": 253.66400927305222, + "p90": 321.4399963617325, + "p95": 325.6639987230301, + "p99": 341.66400134563446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 186.24000251293182, + "p90": 199.74400103092194, + "p95": 202.55999267101288, + "p99": 208.03199708461761 + }, + "combine": { + "p50": 134.20799374580383, + "p90": 139.96799290180206, + "p95": 141.15199446678162, + "p99": 147.2640037536621 + }, + "roundtrip": { + "p50": 292.32001304626465, + "p90": 306.62399530410767, + "p95": 309.63200330734253, + "p99": 314.5279884338379 + }, + "isolatedSum": { + "p50": 320.44799625873566, + "p90": 339.711993932724, + "p95": 343.7119871377945, + "p99": 355.2960008382797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71345,34 +73692,35 @@ ] }, { - "id": "cx-83a44089", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2c22646e864c27e", - "colorKey": "mi355x_eb5b377e", - "comparisonKey": "5bbe7a250a72d8b4", + "id": "cx-8af55e63", + "identity": "h100|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_7104d5f0", + "comparisonKey": "4f16a23c02cdc2c5", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:24.839410+00:00", + "generatedAt": "2026-06-27T17:35:56.194527+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_01", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · balanced", + "label": "H100 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -71382,9 +73730,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -71392,207 +73740,244 @@ }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2c22646e864c27e", - "workloadId": "set:5:7af12818400d6348", + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": null, "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271906612", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271906612", - "createdAt": "2026-06-26T23:57:30Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28296667411", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296667411", + "createdAt": "2026-06-27T17:35:56.194527+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 40.36099836230278, - "p90": 43.44100132584572, - "p95": 44.60100084543228, - "p99": 48.920001834630966 + "p50": 161.1199975013733, + "p90": 165.24800658226013, + "p95": 166.9120043516159, + "p99": 170.84799706935883 }, "combine": { - "p50": 16.3199994713068, - "p90": 18.880000337958336, - "p95": 19.88000050187111, - "p99": 21.880999207496643 + "p50": 120.99199742078781, + "p90": 123.10399860143661, + "p95": 128.03199887275696, + "p99": 143.99999380111694 }, "roundtrip": { - "p50": 57.20100179314613, - "p90": 60.63999980688095, - "p95": 61.72100082039833, - "p99": 64.56000357866287 + "p50": 242.01600253582, + "p90": 246.7840015888214, + "p95": 248.86399507522583, + "p99": 252.70399451255798 }, "isolatedSum": { - "p50": 56.68099783360958, - "p90": 62.321001663804054, - "p95": 64.48100134730339, - "p99": 70.80100104212761 + "p50": 282.1119949221611, + "p90": 288.35200518369675, + "p95": 294.94400322437286, + "p99": 314.84799087047577 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 42.64099895954132, - "p90": 45.680999755859375, - "p95": 47.2010001540184, - "p99": 49.47999864816666 + "p50": 186.5919977426529, + "p90": 191.42399728298187, + "p95": 193.15199553966522, + "p99": 197.31199741363525 }, "combine": { - "p50": 16.519999131560326, - "p90": 18.92000064253807, - "p95": 20.080000162124634, - "p99": 21.801000460982323 + "p50": 164.67200219631195, + "p90": 170.04799842834473, + "p95": 171.23199999332428, + "p99": 175.04000663757324 }, "roundtrip": { - "p50": 59.52100083231926, - "p90": 62.67999857664108, - "p95": 63.84100019931793, - "p99": 66.96099787950516 + "p50": 305.08801341056824, + "p90": 309.56798791885376, + "p95": 310.9759986400604, + "p99": 315.42399525642395 }, "isolatedSum": { - "p50": 59.160998091101646, - "p90": 64.60100039839745, - "p95": 67.28100031614304, - "p99": 71.28099910914898 + "p50": 351.26399993896484, + "p90": 361.4719957113266, + "p95": 364.3839955329895, + "p99": 372.3520040512085 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 6, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 42.64000058174133, - "p90": 45.8809994161129, - "p95": 47.00100049376488, - "p99": 49.959998577833176 + "p50": 239.55200612545013, + "p90": 243.68000030517578, + "p95": 245.31200528144836, + "p99": 250.62400102615356 }, "combine": { - "p50": 20.759999752044678, - "p90": 23.600000888109207, - "p95": 24.480000138282776, - "p99": 26.760000735521317 + "p50": 242.78399348258972, + "p90": 246.848002076149, + "p95": 248.60799312591553, + "p99": 251.8720030784607 }, "roundtrip": { - "p50": 64.12000209093094, - "p90": 67.08099693059921, - "p95": 67.88100302219391, - "p99": 70.36100327968597 + "p50": 442.4000084400177, + "p90": 448.35200905799866, + "p95": 450.20800828933716, + "p99": 453.92000675201416 }, "isolatedSum": { - "p50": 63.40000033378601, - "p90": 69.4810003042221, - "p95": 71.48100063204765, - "p99": 76.71999931335449 + "p50": 482.33599960803986, + "p90": 490.52800238132477, + "p95": 493.9199984073639, + "p99": 502.49600410461426 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 6, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 42.7200011909008, - "p90": 45.88000103831291, - "p95": 47.36100137233734, - "p99": 49.60000142455101 + "p50": 346.8799889087677, + "p90": 351.4240086078644, + "p95": 353.4719944000244, + "p99": 358.0799996852875 }, "combine": { - "p50": 22.679999470710754, - "p90": 25.280000641942024, - "p95": 26.159999892115593, - "p99": 27.240000665187836 + "p50": 376.6399919986725, + "p90": 383.4240138530731, + "p95": 385.79198718070984, + "p99": 474.2400050163269 }, "roundtrip": { - "p50": 65.72099775075912, - "p90": 68.64099949598312, - "p95": 69.64000314474106, - "p99": 72.2000002861023 + "p50": 684.0000152587891, + "p90": 691.3920044898987, + "p95": 693.8560009002686, + "p99": 700.4479765892029 }, "isolatedSum": { - "p50": 65.40000066161156, - "p90": 71.16000168025494, - "p95": 73.52100126445293, - "p99": 76.84000208973885 + "p50": 723.5199809074402, + "p90": 734.8480224609375, + "p95": 739.2639815807343, + "p99": 832.3200047016144 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 42.87999868392944, - "p90": 45.88000103831291, - "p95": 46.959999948740005, - "p99": 48.79999905824661 + "p50": 571.1039900779724, + "p90": 600.8960008621216, + "p95": 606.1760187149048, + "p99": 621.1839914321899 }, "combine": { - "p50": 28.119999915361404, - "p90": 30.44000081717968, - "p95": 31.401000916957855, - "p99": 33.640000969171524 + "p50": 647.5840210914612, + "p90": 655.7440161705017, + "p95": 657.9520106315613, + "p99": 664.9919748306274 }, "roundtrip": { - "p50": 71.80000096559525, - "p90": 75.15999674797058, - "p95": 76.39999687671661, - "p99": 78.31999659538269 + "p50": 1174.720048904419, + "p90": 1189.0239715576172, + "p95": 1194.3999528884888, + "p99": 1201.1200189590454 }, "isolatedSum": { - "p50": 70.99999859929085, - "p90": 76.32000185549259, - "p95": 78.36100086569786, - "p99": 82.44000002741814 + "p50": 1218.6880111694336, + "p90": 1256.6400170326233, + "p95": 1264.128029346466, + "p99": 1286.1759662628174 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 6, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1035.5839729309082, + "p90": 1058.0799579620361, + "p95": 1064.9919509887695, + "p99": 1074.463963508606 + }, + "combine": { + "p50": 1176.1280298233032, + "p90": 1185.5679750442505, + "p95": 1188.6399984359741, + "p99": 1197.376012802124 + }, + "roundtrip": { + "p50": 2155.263900756836, + "p90": 2171.488046646118, + "p95": 2174.815893173218, + "p99": 2184.2238903045654 + }, + "isolatedSum": { + "p50": 2211.7120027542114, + "p90": 2243.6479330062866, + "p95": 2253.6319494247437, + "p99": 2271.83997631073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71600,34 +73985,35 @@ ] }, { - "id": "cx-c1291ad7", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||15d7289bb70ed17", - "colorKey": "mi355x_ae729691", - "comparisonKey": "730c294e090417f2", + "id": "cx-7d1c49e4", + "identity": "h200|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_d982b749", + "comparisonKey": "d546c8db19c82066", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:10.167624+00:00", + "generatedAt": "2026-06-27T11:14:25.842054+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_06", - "sku": "mi355x", - "backend": "mori", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · balanced-rank-local", + "label": "H200 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -71637,9 +74023,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -71647,59 +74033,59 @@ }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "15d7289bb70ed17", - "workloadId": "set:5:2eebbed158fe1320", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271910050", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271910050", - "createdAt": "2026-06-26T23:57:37Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28287506806", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287506806", + "createdAt": "2026-06-27T11:14:25.842054+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 36.80099919438362, - "p90": 39.80100154876709, - "p95": 40.76100140810013, - "p99": 43.63999888300896 + "p50": 68.64000111818314, + "p90": 105.82400113344193, + "p95": 117.15199798345566, + "p99": 156.89599514007568 }, "combine": { - "p50": 15.320000238716602, - "p90": 17.480000853538513, - "p95": 18.68000067770481, - "p99": 20.999999716877937 + "p50": 59.87200140953064, + "p90": 75.13599842786789, + "p95": 80.83199709653854, + "p99": 98.75199943780899 }, "roundtrip": { - "p50": 49.07999932765961, - "p90": 51.80000141263008, - "p95": 52.76099964976311, - "p99": 53.76100167632103 + "p50": 113.79200220108032, + "p90": 150.01599490642548, + "p95": 160.73599457740784, + "p99": 198.7520009279251 }, "isolatedSum": { - "p50": 52.12099943310022, - "p90": 57.2810024023056, - "p95": 59.44100208580494, - "p99": 64.6399985998869 + "p50": 128.51200252771378, + "p90": 180.95999956130981, + "p95": 197.9839950799942, + "p99": 255.64799457788467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 5, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71708,35 +74094,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 38.07999938726425, - "p90": 40.39999842643738, - "p95": 41.20099917054176, - "p99": 42.80000180006027 + "p50": 69.37599927186966, + "p90": 103.67999970912933, + "p95": 114.3679991364479, + "p99": 147.96799421310425 }, "combine": { - "p50": 15.799999237060547, - "p90": 17.999999225139618, - "p95": 19.279999658465385, - "p99": 21.040000021457672 + "p50": 59.67999994754791, + "p90": 74.40000027418137, + "p95": 81.66400343179703, + "p99": 100.80000013113022 }, "roundtrip": { - "p50": 51.600001752376556, - "p90": 53.92000079154968, - "p95": 55.24099990725517, - "p99": 57.32100084424019 + "p50": 113.82400244474411, + "p90": 150.56000649929047, + "p95": 163.16799819469452, + "p99": 199.74400103092194 }, "isolatedSum": { - "p50": 53.8799986243248, - "p90": 58.399997651576996, - "p95": 60.48099882900715, - "p99": 63.840001821517944 + "p50": 129.05599921941757, + "p90": 178.0799999833107, + "p95": 196.03200256824493, + "p99": 248.76799434423447 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 5, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71745,35 +74131,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 32.71999955177307, - "p90": 35.5600006878376, - "p95": 36.559998989105225, - "p99": 39.000000804662704 + "p50": 67.10399687290192, + "p90": 91.45600348711014, + "p95": 103.90400141477585, + "p99": 139.615997672081 }, "combine": { - "p50": 13.72000016272068, - "p90": 15.799999237060547, - "p95": 16.599999740719795, - "p99": 18.120000138878822 + "p50": 59.39200147986412, + "p90": 71.87200337648392, + "p95": 76.09599828720093, + "p99": 94.52799707651138 }, "roundtrip": { - "p50": 45.71999981999397, - "p90": 49.04000088572502, - "p95": 49.96100068092346, - "p99": 51.44000053405762 + "p50": 110.81600189208984, + "p90": 141.59999787807465, + "p95": 150.39999783039093, + "p99": 204.12799715995789 }, "isolatedSum": { - "p50": 46.43999971449375, - "p90": 51.35999992489815, - "p95": 53.15999872982502, - "p99": 57.12000094354153 + "p50": 126.49599835276604, + "p90": 163.32800686359406, + "p95": 179.99999970197678, + "p99": 234.14399474859238 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 5, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71782,34 +74168,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 37.84099966287613, - "p90": 40.92000052332878, - "p95": 41.999999433755875, - "p99": 43.880000710487366 + "p50": 72.12799787521362, + "p90": 104.22399640083313, + "p95": 114.46399986743927, + "p99": 165.72800278663635 }, "combine": { - "p50": 14.919999986886978, - "p90": 17.27999933063984, - "p95": 18.039999529719353, - "p99": 19.55999992787838 + "p50": 60.47999858856201, + "p90": 74.20799881219864, + "p95": 82.30400085449219, + "p99": 100.09600222110748 }, "roundtrip": { - "p50": 52.241001278162, - "p90": 55.75999990105629, - "p95": 56.68000131845474, - "p99": 58.35999920964241 + "p50": 112.5119999051094, + "p90": 143.71199905872345, + "p95": 156.25600516796112, + "p99": 205.53599298000336 }, "isolatedSum": { - "p50": 52.76099964976311, - "p90": 58.19999985396862, - "p95": 60.03999896347523, - "p99": 63.440000638365746 + "p50": 132.60799646377563, + "p90": 178.43199521303177, + "p95": 196.76800072193146, + "p99": 265.82400500774384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -71819,70 +74205,182 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 38.24099898338318, - "p90": 40.92000052332878, - "p95": 41.839998215436935, - "p99": 44.16000097990036 + "p50": 73.56800138950348, + "p90": 99.35999661684036, + "p95": 106.33599758148193, + "p99": 118.81600320339203 }, "combine": { - "p50": 16.24000072479248, - "p90": 18.841000273823738, - "p95": 19.88000050187111, - "p99": 22.280000150203705 + "p50": 60.736000537872314, + "p90": 74.94399696588516, + "p95": 80.79999685287476, + "p99": 96.63999825716019 }, "roundtrip": { - "p50": 54.28000167012215, - "p90": 57.840000838041306, - "p95": 58.800000697374344, - "p99": 60.96100062131882 + "p50": 116.67200177907944, + "p90": 153.9199948310852, + "p95": 182.3360025882721, + "p99": 242.97599494457245 }, "isolatedSum": { - "p50": 54.48099970817566, - "p90": 59.76100079715252, - "p95": 61.719998717308044, - "p99": 66.44000113010406 + "p50": 134.3040019273758, + "p90": 174.30399358272552, + "p95": 187.1359944343567, + "p99": 215.45600146055222 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 1, - "recvTokensMax": 16, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.72000271081924, + "p90": 101.98400169610977, + "p95": 109.79200154542923, + "p99": 140.28799533843994 + }, + "combine": { + "p50": 62.68800050020218, + "p90": 80.38400113582611, + "p95": 86.91199868917465, + "p99": 119.71200257539749 + }, + "roundtrip": { + "p50": 116.83200299739838, + "p90": 152.19199657440186, + "p95": 162.56000101566315, + "p99": 194.75199282169342 + }, + "isolatedSum": { + "p50": 137.40800321102142, + "p90": 182.36800283193588, + "p95": 196.70400023460388, + "p99": 259.99999791383743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.48800241947174, + "p90": 127.96799838542938, + "p95": 135.3919953107834, + "p99": 240.1919960975647 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 83.96799862384796, + "p95": 89.9519994854927, + "p99": 99.61599856615067 + }, + "roundtrip": { + "p50": 128.4160017967224, + "p90": 151.74399316310883, + "p95": 159.42400693893433, + "p99": 176.12800002098083 + }, + "isolatedSum": { + "p50": 153.82400155067444, + "p90": 211.93599700927734, + "p95": 225.3439947962761, + "p99": 339.80799466371536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.46400338411331, + "p90": 113.47199976444244, + "p95": 120.19199877977371, + "p99": 153.6960005760193 + }, + "combine": { + "p50": 84.06399935483932, + "p90": 97.6639986038208, + "p95": 102.30399668216705, + "p99": 120.31999975442886 + }, + "roundtrip": { + "p50": 152.6080071926117, + "p90": 178.24000120162964, + "p95": 190.72000682353973, + "p99": 231.99999332427979 + }, + "isolatedSum": { + "p50": 174.52800273895264, + "p90": 211.13599836826324, + "p95": 222.49599546194077, + "p99": 274.01600033044815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-ace78f17", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||c8b7839b4895c1a", - "colorKey": "mi355x_62dc5cd4", - "comparisonKey": "316ae2638347880f", + "id": "cx-ab8f0534", + "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "40ee6d196d286895", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:29.418642+00:00", + "generatedAt": "2026-06-26T23:53:38.574880+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_00", - "sku": "mi355x", - "backend": "mori", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · hotspot-single", + "label": "H200 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -71892,9 +74390,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -71902,23 +74400,23 @@ }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c8b7839b4895c1a", - "workloadId": "set:5:286be993cd819ed9", + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271920340", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271920340", - "createdAt": "2026-06-26T23:57:58Z", + "id": "28271743900", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271743900", + "createdAt": "2026-06-26T23:53:38.574880+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -71926,35 +74424,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 39.84000161290169, - "p90": 42.55999997258186, - "p95": 44.08000037074089, - "p99": 48.601001501083374 + "p50": 68.64000111818314, + "p90": 89.56799656152725, + "p95": 96.41599655151367, + "p99": 126.36800110340118 }, "combine": { - "p50": 16.200000420212746, - "p90": 17.960000783205032, - "p95": 19.07999999821186, - "p99": 21.640000864863396 + "p50": 58.04799869656563, + "p90": 69.60000097751617, + "p95": 74.52800124883652, + "p99": 91.80799871683121 }, "roundtrip": { - "p50": 55.44000118970871, - "p90": 58.27999860048294, - "p95": 59.20099839568138, - "p99": 60.920000076293945 + "p50": 112.73600161075592, + "p90": 135.93600690364838, + "p95": 145.7280069589615, + "p99": 215.26400744915009 }, "isolatedSum": { - "p50": 56.04000203311443, - "p90": 60.520000755786896, - "p95": 63.16000036895275, - "p99": 70.24100236594677 + "p50": 126.68799981474876, + "p90": 159.16799753904343, + "p95": 170.9439978003502, + "p99": 218.1759998202324 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 5, + "recvTokensMax": 6, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -71963,35 +74461,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.1609990298748, - "p90": 44.920001178979874, - "p95": 45.80099880695343, - "p99": 47.800999134778976 + "p50": 69.34399902820587, + "p90": 88.22400122880936, + "p95": 94.68799829483032, + "p99": 116.15999788045883 }, "combine": { - "p50": 16.07999950647354, - "p90": 18.401000648736954, - "p95": 19.279999658465385, - "p99": 20.880000665783882 + "p50": 58.94400179386139, + "p90": 68.70400160551071, + "p95": 72.03199714422226, + "p99": 83.52000266313553 }, "roundtrip": { - "p50": 58.35999920964241, - "p90": 61.56099960207939, - "p95": 62.60000169277191, - "p99": 64.7599995136261 + "p50": 112.89600282907486, + "p90": 138.3039951324463, + "p95": 150.52799880504608, + "p99": 196.51199877262115 }, "isolatedSum": { - "p50": 58.24099853634834, - "p90": 63.32100182771683, - "p95": 65.08099846541882, - "p99": 68.68099980056286 + "p50": 128.28800082206726, + "p90": 156.92800283432007, + "p95": 166.71999543905258, + "p99": 199.68000054359436 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, - "recvTokensMax": 16, - "stragglerRank": 6, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72000,35 +74498,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 41.96000099182129, - "p90": 44.599998742341995, - "p95": 45.96000164747238, - "p99": 48.16100001335144 + "p50": 70.46400010585785, + "p90": 84.63999629020691, + "p95": 92.0960009098053, + "p99": 110.78400164842606 }, "combine": { - "p50": 19.401000812649727, - "p90": 21.880000829696655, - "p95": 23.080000653862953, - "p99": 24.12099950015545 + "p50": 60.28800085186958, + "p90": 70.91200351715088, + "p95": 75.16799867153168, + "p99": 87.5839963555336 }, "roundtrip": { - "p50": 61.68099865317345, - "p90": 65.20099937915802, - "p95": 65.99999964237213, - "p99": 67.4000009894371 + "p50": 114.20799791812897, + "p90": 135.68000495433807, + "p95": 147.64800667762756, + "p99": 195.5520063638687 }, "isolatedSum": { - "p50": 61.361001804471016, - "p90": 66.47999957203865, - "p95": 69.04000230133533, - "p99": 72.28199951350689 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 32, - "stragglerRank": 5, + "p50": 130.75200095772743, + "p90": 155.5519998073578, + "p95": 167.26399958133698, + "p99": 198.36799800395966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72037,35 +74535,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 41.80099815130234, - "p90": 44.2809984087944, - "p95": 45.559998601675034, - "p99": 48.39999973773956 + "p50": 72.38399982452393, + "p90": 103.71199995279312, + "p95": 118.72000247240067, + "p99": 215.61600267887115 }, "combine": { - "p50": 21.239999681711197, - "p90": 23.19999970495701, - "p95": 24.080000817775726, - "p99": 26.040000841021538 + "p50": 61.055999249219894, + "p90": 76.03199779987335, + "p95": 81.7599967122078, + "p99": 112.57600039243698 }, "roundtrip": { - "p50": 62.960997223854065, - "p90": 66.041000187397, - "p95": 66.91999733448029, - "p99": 68.71999800205231 + "p50": 115.84000289440155, + "p90": 143.51999759674072, + "p95": 151.67999267578125, + "p99": 190.46400487422943 }, "isolatedSum": { - "p50": 63.040997833013535, - "p90": 67.48099811375141, - "p95": 69.63999941945076, - "p99": 74.4400005787611 + "p50": 133.43999907374382, + "p90": 179.74399775266647, + "p95": 200.47999918460846, + "p99": 328.19200307130814 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 6, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72074,35 +74572,146 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.1609990298748, - "p90": 45.00000178813934, - "p95": 45.96000164747238, - "p99": 50.40000006556511 + "p50": 75.23199915885925, + "p90": 102.04800218343735, + "p95": 107.87200182676315, + "p99": 130.20800054073334 }, "combine": { - "p50": 26.599999517202377, - "p90": 28.68100069463253, - "p95": 29.96000088751316, - "p99": 31.720001250505447 + "p50": 61.792001128196716, + "p90": 71.16799801588058, + "p95": 76.64000242948532, + "p99": 86.84799820184708 }, "roundtrip": { - "p50": 69.20100003480911, - "p90": 71.76099717617035, - "p95": 72.7199986577034, - "p99": 74.16000217199326 + "p50": 116.92799627780914, + "p90": 138.2399946451187, + "p95": 147.96799421310425, + "p99": 179.967999458313 }, "isolatedSum": { - "p50": 68.76099854707718, - "p90": 73.68100248277187, - "p95": 75.92000253498554, - "p99": 82.12000131607056 + "p50": 137.02400028705597, + "p90": 173.21600019931793, + "p95": 184.51200425624847, + "p99": 217.0559987425804 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.75200295448303, + "p90": 90.52799642086029, + "p95": 98.75199943780899, + "p99": 135.48800349235535 + }, + "combine": { + "p50": 63.74400109052658, + "p90": 71.71200215816498, + "p95": 78.78399640321732, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 119.9679970741272, + "p90": 145.47200500965118, + "p95": 149.50400590896606, + "p99": 165.8879965543747 + }, + "isolatedSum": { + "p50": 138.4960040450096, + "p90": 162.23999857902527, + "p95": 177.5359958410263, + "p99": 226.56000405550003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 84.60800349712372, + "p90": 103.13600301742554, + "p95": 112.22399771213531, + "p99": 138.11199367046356 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 82.78399705886841, + "p95": 89.56799656152725, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 131.48799538612366, + "p90": 145.50399780273438, + "p95": 155.8080017566681, + "p99": 189.66400623321533 + }, + "isolatedSum": { + "p50": 156.64000064134598, + "p90": 185.92000007629395, + "p95": 201.79199427366257, + "p99": 243.03999543190002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.2080020904541, + "p90": 120.2239990234375, + "p95": 133.82400572299957, + "p99": 215.68000316619873 + }, + "combine": { + "p50": 82.8159973025322, + "p90": 92.70399808883667, + "p95": 96.12800180912018, + "p99": 107.04000294208527 + }, + "roundtrip": { + "p50": 152.22400426864624, + "p90": 168.32000017166138, + "p95": 176.2239933013916, + "p99": 196.03200256824493 + }, + "isolatedSum": { + "p50": 177.0239993929863, + "p90": 212.92799711227417, + "p95": 229.95200753211975, + "p99": 322.720006108284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, "fanoutMean": 5.3125, - "recvTokensMax": 128, - "stragglerRank": 5, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72110,34 +74719,35 @@ ] }, { - "id": "cx-2129d47b", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||4d5546b3fb85130", - "colorKey": "mi355x_570d6605", - "comparisonKey": "1ea3da47c00f36f8", + "id": "cx-3d690e39", + "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "540c08b08c068f8c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:55.992554+00:00", + "generatedAt": "2026-06-26T23:54:06.885074+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_07", - "sku": "mi355x", - "backend": "mori", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · zipf", + "label": "H200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, @@ -72147,9 +74757,9 @@ }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -72157,23 +74767,23 @@ }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "4d5546b3fb85130", - "workloadId": "set:5:f5576e2b712d38c3", + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", "workloadSource": "canonical-serialized", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271913592", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271913592", - "createdAt": "2026-06-26T23:57:44Z", + "id": "28271759919", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271759919", + "createdAt": "2026-06-26T23:54:06.885074+00:00", "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ @@ -72181,35 +74791,35 @@ "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 39.03999924659729, - "p90": 41.76099970936775, - "p95": 43.40000078082085, - "p99": 47.15999960899353 + "p50": 70.49600034952164, + "p90": 102.1760031580925, + "p95": 111.90400272607803, + "p99": 133.34399461746216 }, "combine": { - "p50": 16.359999775886536, - "p90": 18.519999459385872, - "p95": 20.12000046670437, - "p99": 23.40099960565567 + "p50": 60.5119988322258, + "p90": 72.9919970035553, + "p95": 79.55200225114822, + "p99": 90.55999666452408 }, "roundtrip": { - "p50": 53.95999923348427, - "p90": 57.20100179314613, - "p95": 58.75999853014946, - "p99": 61.20099872350693 + "p50": 113.8560026884079, + "p90": 143.5839980840683, + "p95": 150.94399452209473, + "p99": 190.14400243759155 }, "isolatedSum": { - "p50": 55.399999022483826, - "p90": 60.280999168753624, - "p95": 63.520001247525215, - "p99": 70.5609992146492 + "p50": 131.00799918174744, + "p90": 175.1680001616478, + "p95": 191.45600497722626, + "p99": 223.90399128198624 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, "recvTokensMax": 8, - "stragglerRank": 3, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72218,35 +74828,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 40.640998631715775, - "p90": 43.99999976158142, - "p95": 44.840000569820404, - "p99": 48.0009987950325 + "p50": 73.88799637556076, + "p90": 109.43999886512756, + "p95": 123.74400347471237, + "p99": 176.2239933013916 }, "combine": { - "p50": 16.519999131560326, - "p90": 18.561000004410744, - "p95": 20.24099975824356, - "p99": 23.520000278949738 + "p50": 62.463998794555664, + "p90": 76.4480009675026, + "p95": 81.37600123882294, + "p99": 89.6959975361824 }, "roundtrip": { - "p50": 55.52000179886818, - "p90": 59.321001172065735, - "p95": 60.72099879384041, - "p99": 68.88099759817123 + "p50": 118.40000003576279, + "p90": 146.7839926481247, + "p95": 154.88000214099884, + "p99": 198.0160027742386 }, "isolatedSum": { - "p50": 57.1609977632761, - "p90": 62.560999765992165, - "p95": 65.08100032806396, - "p99": 71.52099907398224 + "p50": 136.35199517011642, + "p90": 185.88799983263016, + "p95": 205.1200047135353, + "p99": 265.919990837574 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 2, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72255,35 +74865,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 41.28099977970123, - "p90": 44.16000097990036, - "p95": 45.00000178813934, - "p99": 47.68000170588493 + "p50": 72.12799787521362, + "p90": 100.8640006184578, + "p95": 107.84000158309937, + "p99": 182.5920045375824 }, "combine": { - "p50": 17.640000209212303, - "p90": 20.160000771284103, - "p95": 21.479999646544456, - "p99": 24.6799997985363 + "p50": 62.24000081419945, + "p90": 77.504001557827, + "p95": 82.36800134181976, + "p99": 100.22400319576263 }, "roundtrip": { - "p50": 59.04100090265274, - "p90": 63.07999789714813, - "p95": 64.87999856472015, - "p99": 68.83999705314636 + "p50": 116.64000153541565, + "p90": 148.3840048313141, + "p95": 158.49600732326508, + "p99": 193.34399700164795 }, "isolatedSum": { - "p50": 58.920999988913536, - "p90": 64.32000175118446, - "p95": 66.4800014346838, - "p99": 72.36000150442123 + "p50": 134.36799868941307, + "p90": 178.3680021762848, + "p95": 190.20800292491913, + "p99": 282.81600773334503 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 2, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72292,34 +74902,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 41.919998824596405, - "p90": 44.801000505685806, - "p95": 46.84000089764595, - "p99": 50.880998373031616 + "p50": 72.60800153017044, + "p90": 101.1200025677681, + "p95": 114.1119971871376, + "p99": 128.06400656700134 }, "combine": { - "p50": 19.600000232458115, - "p90": 22.120000794529915, - "p95": 23.520000278949738, - "p99": 26.799999177455902 + "p50": 63.74400109052658, + "p90": 79.26400005817413, + "p95": 85.50400286912918, + "p99": 120.03199756145477 }, "roundtrip": { - "p50": 61.000000685453415, - "p90": 64.56000357866287, - "p95": 65.88099896907806, - "p99": 69.52100247144699 + "p50": 117.53600090742111, + "p90": 147.74399995803833, + "p95": 156.8319946527481, + "p99": 184.54399704933167 }, "isolatedSum": { - "p50": 61.51999905705452, - "p90": 66.92100130021572, - "p95": 70.36000117659569, - "p99": 77.68099755048752 + "p50": 136.35200262069702, + "p90": 180.38400262594223, + "p95": 199.61600005626678, + "p99": 248.09600412845612 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, "stragglerRank": 3, "correct": true, "samplesPooled": 600, @@ -72329,290 +74939,146 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 41.839998215436935, - "p90": 44.920001178979874, - "p95": 46.92000150680542, - "p99": 50.1599982380867 + "p50": 75.9039968252182, + "p90": 101.79200023412704, + "p95": 111.77600175142288, + "p99": 127.9039978981018 }, "combine": { - "p50": 24.481000378727913, - "p90": 27.720000594854355, - "p95": 30.561000108718872, - "p99": 59.321001172065735 + "p50": 64.41599875688553, + "p90": 79.68000322580338, + "p95": 84.06399935483932, + "p99": 103.61599922180176 }, "roundtrip": { - "p50": 66.23999774456024, - "p90": 69.36100125312805, - "p95": 70.47999650239944, - "p99": 73.36000353097916 + "p50": 124.09599870443344, + "p90": 154.91199493408203, + "p95": 167.35999286174774, + "p99": 218.6560034751892 }, "isolatedSum": { - "p50": 66.32099859416485, - "p90": 72.64000177383423, - "p95": 77.48100161552429, - "p99": 109.48099941015244 + "p50": 140.31999558210373, + "p90": 181.47200345993042, + "p95": 195.8400011062622, + "p99": 231.51999711990356 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-47886ba2", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||5c00b1a0c13aa3e", - "colorKey": "mi355x_6fd30e97", - "comparisonKey": "41d88b5d4da0110a", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:43.491121+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_03", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · zipf-heavy", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5c00b1a0c13aa3e", - "workloadId": "set:5:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271916622", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271916622", - "createdAt": "2026-06-26T23:57:51Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 38.32000121474266, - "p90": 40.28100147843361, - "p95": 41.600000113248825, - "p99": 46.31999880075455 - }, - "combine": { - "p50": 15.720000490546227, - "p90": 17.03999936580658, - "p95": 18.640000373125076, - "p99": 20.800000056624413 - }, - "roundtrip": { - "p50": 51.16099864244461, - "p90": 53.55999991297722, - "p95": 54.96000126004219, - "p99": 57.760998606681824 - }, - "isolatedSum": { - "p50": 54.04000170528889, - "p90": 57.32100084424019, - "p95": 60.2400004863739, - "p99": 67.11999885737896 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 40.47999903559685, - "p90": 42.64099895954132, - "p95": 44.47999969124794, - "p99": 48.760998994112015 - }, - "combine": { - "p50": 16.00099913775921, - "p90": 17.160000279545784, - "p95": 18.039999529719353, - "p99": 20.800000056624413 - }, - "roundtrip": { - "p50": 53.16000059247017, - "p90": 56.07999861240387, - "p95": 57.64099955558777, - "p99": 60.08100137114525 - }, - "isolatedSum": { - "p50": 56.480998173356056, - "p90": 59.800999239087105, - "p95": 62.51999922096729, - "p99": 69.56099905073643 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 2, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 41.200000792741776, - "p90": 43.241001665592194, - "p95": 44.52100023627281, - "p99": 48.280999064445496 + "p50": 77.63200253248215, + "p90": 102.08000242710114, + "p95": 110.1439967751503, + "p99": 138.5280042886734 }, "combine": { - "p50": 17.240000888705254, - "p90": 18.519999459385872, - "p95": 20.19999921321869, - "p99": 22.5210003554821 + "p50": 68.4799998998642, + "p90": 83.45600217580795, + "p95": 89.50400352478027, + "p99": 97.82399982213974 }, "roundtrip": { - "p50": 56.561000645160675, - "p90": 59.241000562906265, - "p95": 60.440998524427414, - "p99": 64.4410029053688 + "p50": 122.81599640846252, + "p90": 153.50399911403656, + "p95": 163.13600540161133, + "p99": 190.5599981546402 }, "isolatedSum": { - "p50": 58.44000168144703, - "p90": 61.761001124978065, - "p95": 64.7209994494915, - "p99": 70.8019994199276 + "p50": 146.11200243234634, + "p90": 185.5360046029091, + "p95": 199.64800029993057, + "p99": 236.35200411081314 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, - "stragglerRank": 2, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 41.31999984383583, - "p90": 43.28100010752678, - "p95": 44.679999351501465, - "p99": 46.480000019073486 + "p50": 91.90399944782257, + "p90": 113.08799684047699, + "p95": 123.52000176906586, + "p99": 162.9759967327118 }, "combine": { - "p50": 18.8400000333786, - "p90": 20.041000097990036, - "p95": 21.240999922156334, - "p99": 24.441000074148178 + "p50": 77.15199887752533, + "p90": 91.13600105047226, + "p95": 97.59999811649323, + "p99": 112.06399649381638 }, "roundtrip": { - "p50": 58.761000633239746, - "p90": 61.43999844789505, - "p95": 63.1600022315979, - "p99": 65.52000343799591 + "p50": 140.47999680042267, + "p90": 166.75199568271637, + "p95": 175.9359985589981, + "p99": 250.20799040794373 }, "isolatedSum": { - "p50": 60.15999987721443, - "p90": 63.322000205516815, - "p95": 65.9209992736578, - "p99": 70.92100009322166 + "p50": 169.0559983253479, + "p90": 204.22399789094925, + "p95": 221.11999988555908, + "p99": 275.03999322652817 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 2, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 41.40099883079529, - "p90": 43.480001389980316, - "p95": 44.440001249313354, - "p99": 46.00000008940697 + "p50": 99.07200187444687, + "p90": 122.27199971675873, + "p95": 127.42400169372559, + "p99": 146.7519998550415 }, "combine": { - "p50": 22.87999913096428, - "p90": 24.6799997985363, - "p95": 26.559999212622643, - "p99": 29.40100058913231 + "p50": 90.87999910116196, + "p90": 105.3759977221489, + "p95": 109.37599837779999, + "p99": 125.37600100040436 }, "roundtrip": { - "p50": 63.19999694824219, - "p90": 65.76000154018402, - "p95": 67.28000193834305, - "p99": 69.64100152254105 + "p50": 166.4319932460785, + "p90": 186.5919977426529, + "p95": 193.12000274658203, + "p99": 222.01600670814514 }, "isolatedSum": { - "p50": 64.28099796175957, - "p90": 68.16000118851662, - "p95": 71.000000461936, - "p99": 75.40100067853928 + "p50": 189.95200097560883, + "p90": 227.64799743890762, + "p95": 236.80000007152557, + "p99": 272.12800085544586 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, - "stragglerRank": 2, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72620,46 +75086,47 @@ ] }, { - "id": "cx-8d163d45", - "identity": "mi355x|mori|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||d42040086b5de07", - "colorKey": "mi355x_65e339f9", - "comparisonKey": "2ba4cba3af48c2b3", + "id": "cx-59f585e0", + "identity": "h200|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d982b749", + "comparisonKey": "6df8e885c58ea75d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:39:01.384245+00:00", + "generatedAt": "2026-06-27T11:13:46.508858+00:00", "status": "valid", "publicationStatus": "official", - "runner": "mi355x-amds_07", - "sku": "mi355x", - "backend": "mori", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", "worldSize": 8, "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · zipf+eplb", + "label": "H200 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -72667,59 +75134,59 @@ }, "placement": { "kind": "packed", - "nodes": 2, + "nodes": 1, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d42040086b5de07", - "workloadId": "set:5:f5576e2b712d38c3", + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.875, - "eplbImbalanceAfter": 1.0033482142857144, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271245352", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271245352", - "createdAt": "2026-06-26T23:36:55Z", - "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + "id": "28287495061", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287495061", + "createdAt": "2026-06-27T11:13:46.508858+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 39.319999516010284, - "p90": 42.11999848484993, - "p95": 43.15999895334244, - "p99": 46.52100056409836 + "p50": 71.96799665689468, + "p90": 97.9200005531311, + "p95": 106.1440035700798, + "p99": 119.10399794578552 }, "combine": { - "p50": 15.399999916553497, - "p90": 17.601000145077705, - "p95": 18.75999942421913, - "p99": 21.320000290870667 + "p50": 65.95200300216675, + "p90": 76.12799853086472, + "p95": 81.56800270080566, + "p99": 110.07999628782272 }, "roundtrip": { - "p50": 54.23999950289726, - "p90": 57.440001517534256, - "p95": 58.921001851558685, - "p99": 60.95999851822853 + "p50": 118.6240017414093, + "p90": 145.50399780273438, + "p95": 153.9520025253296, + "p99": 180.63999712467194 }, "isolatedSum": { - "p50": 54.71999943256378, - "p90": 59.720998629927635, - "p95": 61.91999837756157, - "p99": 67.84100085496902 + "p50": 137.91999965906143, + "p90": 174.04799908399582, + "p95": 187.71200627088547, + "p99": 229.18399423360825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 2, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72728,35 +75195,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 41.71999916434288, - "p90": 44.84099894762039, - "p95": 46.4400015771389, - "p99": 49.15999993681908 + "p50": 72.95999675989151, + "p90": 100.09600222110748, + "p95": 107.84000158309937, + "p99": 141.76000654697418 }, "combine": { - "p50": 15.599999576807022, - "p90": 17.839999869465828, - "p95": 19.88000050187111, - "p99": 22.5600004196167 + "p50": 66.17599725723267, + "p90": 77.37600058317184, + "p95": 85.02399921417236, + "p99": 103.13600301742554 }, "roundtrip": { - "p50": 57.08099901676178, - "p90": 60.67999824881554, - "p95": 61.59999966621399, - "p99": 63.48100304603577 + "p50": 120.60800194740295, + "p90": 148.41599762439728, + "p95": 158.1439971923828, + "p99": 177.5359958410263 }, "isolatedSum": { - "p50": 57.3199987411499, - "p90": 62.68099881708622, - "p95": 66.32000207901001, - "p99": 71.72000035643578 + "p50": 139.13599401712418, + "p90": 177.47200280427933, + "p95": 192.86400079727173, + "p99": 244.89600956439972 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1161216, - "combineLogicalBytes": 1161216, - "fanoutMean": 5.0625, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 2, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72765,35 +75232,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 41.88000038266182, - "p90": 44.08099874854088, - "p95": 45.120999217033386, - "p99": 48.239998519420624 + "p50": 73.60000163316727, + "p90": 98.49599748849869, + "p95": 112.76800185441971, + "p99": 134.8479986190796 }, "combine": { - "p50": 18.719999119639397, - "p90": 21.04100026190281, - "p95": 22.760000079870224, - "p99": 26.760000735521317 + "p50": 67.00800359249115, + "p90": 76.54400169849396, + "p95": 85.50400286912918, + "p99": 110.17599701881409 }, "roundtrip": { - "p50": 61.43999844789505, - "p90": 64.43999707698822, - "p95": 65.68100303411484, - "p99": 67.87999719381332 + "p50": 118.8800036907196, + "p90": 144.57599818706512, + "p95": 156.44800662994385, + "p99": 188.83199989795685 }, "isolatedSum": { - "p50": 60.599999502301216, - "p90": 65.12199901044369, - "p95": 67.88099929690361, - "p99": 74.99999925494194 + "p50": 140.60800522565842, + "p90": 175.03999918699265, + "p95": 198.2720047235489, + "p99": 245.02399563789368 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 23, - "stragglerRank": 2, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72802,35 +75269,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 41.839998215436935, - "p90": 44.79999840259552, - "p95": 46.23999819159508, - "p99": 48.36000129580498 + "p50": 73.18399846553802, + "p90": 94.24000233411789, + "p95": 103.00800204277039, + "p99": 124.57600235939026 }, "combine": { - "p50": 21.199999377131462, - "p90": 22.95999974012375, - "p95": 24.19999986886978, - "p99": 26.040000841021538 + "p50": 67.1359971165657, + "p90": 76.67200267314911, + "p95": 87.42400258779526, + "p99": 107.26399719715118 }, "roundtrip": { - "p50": 61.51999905705452, - "p90": 64.92000073194504, - "p95": 65.92000275850296, - "p99": 68.08000057935715 + "p50": 122.3360002040863, + "p90": 178.39999496936798, + "p95": 188.54400515556335, + "p99": 224.31999444961548 }, "isolatedSum": { - "p50": 63.0399975925684, - "p90": 67.75999814271927, - "p95": 70.43999806046486, - "p99": 74.40000213682652 + "p50": 140.31999558210373, + "p90": 170.912005007267, + "p95": 190.43200463056564, + "p99": 231.83999955654144 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4845568, - "combineLogicalBytes": 4845568, - "fanoutMean": 5.28125, - "recvTokensMax": 45, - "stragglerRank": 2, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72839,142 +75306,254 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.319998145103455, - "p90": 44.759999960660934, - "p95": 46.28000035881996, - "p99": 49.240998923778534 + "p50": 74.27199929952621, + "p90": 96.76799923181534, + "p95": 109.18399691581726, + "p99": 126.97599828243256 }, "combine": { - "p50": 24.879999458789825, - "p90": 27.079999446868896, - "p95": 28.440000489354134, - "p99": 56.88000097870827 + "p50": 68.1919977068901, + "p90": 80.51200211048126, + "p95": 88.51200342178345, + "p99": 103.84000092744827 }, "roundtrip": { - "p50": 66.3599967956543, - "p90": 69.95999813079834, - "p95": 70.91999799013138, - "p99": 73.00099730491638 + "p50": 122.75200337171555, + "p90": 148.70400726795197, + "p95": 161.3440066576004, + "p99": 200.6080001592636 }, "isolatedSum": { - "p50": 67.19999760389328, - "p90": 71.83999940752983, - "p95": 74.7200008481741, - "p99": 106.1209999024868 + "p50": 142.46399700641632, + "p90": 177.2800013422966, + "p95": 197.6960003376007, + "p99": 230.81599920988083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9676800, - "combineLogicalBytes": 9676800, - "fanoutMean": 5.2734375, - "recvTokensMax": 88, - "stragglerRank": 2, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-2d0599c0", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", - "colorKey": "mi355x_2fa43515", - "comparisonKey": "2796ed88af4b14b0", - "schemaVersion": 3, - "generatedAt": "2026-06-26T15:40:45.756534+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "mi355x-amds_04", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 (norm)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "unknown", - "conformanceClass": "minimum-functional", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.79199761152267, + "p90": 118.30399930477142, + "p95": 128.28800082206726, + "p99": 147.87200093269348 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 87.55200356245041, + "p95": 92.0960009098053, + "p99": 110.07999628782272 + }, + "roundtrip": { + "p50": 130.72000443935394, + "p90": 155.20000457763672, + "p95": 167.4560010433197, + "p99": 208.48000049591064 + }, + "isolatedSum": { + "p50": 160.19199788570404, + "p90": 205.85600286722183, + "p95": 220.38400173187256, + "p99": 257.9519972205162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.07200056314468, + "p90": 110.81600189208984, + "p95": 117.53600090742111, + "p99": 137.1839940547943 + }, + "combine": { + "p50": 83.26400071382523, + "p90": 94.36800330877304, + "p95": 99.71199929714203, + "p99": 128.00000607967377 + }, + "roundtrip": { + "p50": 149.9200016260147, + "p90": 169.5680022239685, + "p95": 179.29600179195404, + "p99": 200.41599869728088 + }, + "isolatedSum": { + "p50": 174.3360012769699, + "p90": 205.18400520086288, + "p95": 217.24800020456314, + "p99": 265.1840001344681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.55999994277954, + "p90": 124.09599870443344, + "p95": 132.192000746727, + "p99": 179.00800704956055 + }, + "combine": { + "p50": 96.41599655151367, + "p90": 108.51199924945831, + "p95": 115.84000289440155, + "p99": 140.79999923706055 + }, + "roundtrip": { + "p50": 180.38399517536163, + "p90": 201.02399587631226, + "p95": 209.75999534130096, + "p99": 226.6560047864914 + }, + "isolatedSum": { + "p50": 206.9759964942932, + "p90": 232.60799795389175, + "p95": 248.03200364112854, + "p99": 319.8080062866211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e3311b84", + "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "fc31c0a33afa32cc", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:56.726240+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28247575150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", - "createdAt": "2026-06-26T15:22:26Z", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + "id": "28271775418", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271775418", + "createdAt": "2026-06-26T23:54:56.726240+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 40.55999964475632, - "p90": 43.15999895334244, - "p95": 44.881001114845276, - "p99": 47.55999892950058 + "p50": 73.05599749088287, + "p90": 102.7199998497963, + "p95": 111.35999858379364, + "p99": 123.00799787044525 }, "combine": { - "p50": 16.119999811053276, - "p90": 18.719999119639397, - "p95": 19.840000197291374, - "p99": 22.520000115036964 + "p50": 65.92000275850296, + "p90": 79.77599650621414, + "p95": 88.44800293445587, + "p99": 126.30400061607361 }, "roundtrip": { - "p50": 56.040000170469284, - "p90": 59.20000001788139, - "p95": 60.80099940299988, - "p99": 63.120998442173004 + "p50": 118.78400295972824, + "p90": 148.28799664974213, + "p95": 155.8080017566681, + "p99": 184.64000523090363 }, "isolatedSum": { - "p50": 56.67999945580959, - "p90": 61.879998072981834, - "p95": 64.72100131213665, - "p99": 70.07999904453754 + "p50": 138.97600024938583, + "p90": 182.49599635601044, + "p95": 199.8080015182495, + "p99": 249.31199848651886 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 0, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -72983,35 +75562,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 42.55999997258186, - "p90": 45.441001653671265, - "p95": 47.040000557899475, - "p99": 49.959998577833176 + "p50": 74.5920017361641, + "p90": 107.07200318574905, + "p95": 120.51200121641159, + "p99": 142.87999272346497 }, "combine": { - "p50": 16.16000011563301, - "p90": 18.360000103712082, - "p95": 19.600000232458115, - "p99": 22.63999916613102 + "p50": 67.03999638557434, + "p90": 84.73599702119827, + "p95": 92.12800115346909, + "p99": 114.07999694347382 }, "roundtrip": { - "p50": 58.83999913930893, - "p90": 61.88099831342697, - "p95": 63.48100304603577, - "p99": 65.40100276470184 + "p50": 120.38400024175644, + "p90": 157.18400478363037, + "p95": 169.24799978733063, + "p99": 195.68000733852386 }, "isolatedSum": { - "p50": 58.720000088214874, - "p90": 63.80100175738335, - "p95": 66.64000079035759, - "p99": 72.5999977439642 + "p50": 141.63199812173843, + "p90": 191.80800020694733, + "p95": 212.64000236988068, + "p99": 256.9599896669388 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -73020,35 +75599,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 42.160000652074814, - "p90": 44.840000569820404, - "p95": 46.28000035881996, - "p99": 49.84100162982941 + "p50": 72.95999675989151, + "p90": 101.05600208044052, + "p95": 114.68800157308578, + "p99": 137.472003698349 }, "combine": { - "p50": 19.039999693632126, - "p90": 22.1599992364645, - "p95": 23.48100021481514, - "p99": 54.63999882340431 + "p50": 66.14399701356888, + "p90": 79.23199981451035, + "p95": 84.06399935483932, + "p99": 93.50399672985077 }, "roundtrip": { - "p50": 61.59999966621399, - "p90": 64.71999734640121, - "p95": 65.76000154018402, - "p99": 68.36000084877014 + "p50": 120.99199742078781, + "p90": 154.81600165367126, + "p95": 165.95199704170227, + "p99": 220.41599452495575 }, "isolatedSum": { - "p50": 61.20000034570694, - "p90": 66.9999998062849, - "p95": 69.7610005736351, - "p99": 104.48100045323372 + "p50": 139.1039937734604, + "p90": 180.28800189495087, + "p95": 198.7520009279251, + "p99": 230.97600042819977 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 0, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -73057,35 +75636,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 42.52000153064728, - "p90": 45.1200008392334, - "p95": 46.080999076366425, - "p99": 48.8400012254715 + "p50": 73.08799773454666, + "p90": 102.78400033712387, + "p95": 110.88000237941742, + "p99": 142.17600226402283 }, "combine": { - "p50": 20.479999482631683, - "p90": 22.520000115036964, - "p95": 23.479999974370003, - "p99": 25.800000876188278 + "p50": 67.90400296449661, + "p90": 83.29600095748901, + "p95": 89.31200206279755, + "p99": 102.30399668216705 }, "roundtrip": { - "p50": 62.67999857664108, - "p90": 65.5599981546402, - "p95": 66.880002617836, - "p99": 68.56100261211395 + "p50": 120.95999717712402, + "p90": 156.73600137233734, + "p95": 165.56799411773682, + "p99": 189.43999707698822 }, "isolatedSum": { - "p50": 63.00000101327896, - "p90": 67.64000095427036, - "p95": 69.56099905073643, - "p99": 74.64000210165977 + "p50": 140.99200069904327, + "p90": 186.08000129461288, + "p95": 200.19200444221497, + "p99": 244.47999894618988 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 0, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -73094,179 +75673,54592 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 42.67999902367592, - "p90": 45.27999833226204, - "p95": 46.799998730421066, - "p99": 49.720000475645065 + "p50": 74.81600344181061, + "p90": 102.65599936246872, + "p95": 110.68800091743469, + "p99": 122.49600142240524 }, "combine": { - "p50": 24.921000003814697, - "p90": 27.240000665187836, - "p95": 28.07999961078167, - "p99": 30.27999959886074 + "p50": 68.2239979505539, + "p90": 86.14400029182434, + "p95": 90.4960036277771, + "p99": 105.95200210809708 }, "roundtrip": { - "p50": 67.9209977388382, - "p90": 71.04100286960602, - "p95": 72.12000340223312, - "p99": 74.08100366592407 + "p50": 121.44000083208084, + "p90": 152.25599706172943, + "p95": 161.40800714492798, + "p99": 200.9280025959015 }, "isolatedSum": { - "p50": 67.60099902749062, - "p90": 72.51999899744987, - "p95": 74.87999834120274, - "p99": 80.0000000745058 + "p50": 143.0400013923645, + "p90": 188.79999965429306, + "p95": 201.1840045452118, + "p99": 228.44800353050232 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 0, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.49600231647491, + "p90": 114.01599645614624, + "p95": 123.74400347471237, + "p99": 148.3519971370697 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 88.60799670219421, + "p95": 94.11200135946274, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 128.54400277137756, + "p90": 162.33600676059723, + "p95": 178.20799350738525, + "p99": 222.30400145053864 + }, + "isolatedSum": { + "p50": 156.64000064134598, + "p90": 202.62399315834045, + "p95": 217.8560048341751, + "p99": 255.16799837350845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.36800330877304, + "p90": 133.02400708198547, + "p95": 138.49599659442902, + "p99": 182.20800161361694 + }, + "combine": { + "p50": 81.44000172615051, + "p90": 95.42399644851685, + "p95": 100.5759984254837, + "p99": 123.74400347471237 + }, + "roundtrip": { + "p50": 151.2320041656494, + "p90": 172.03199863433838, + "p95": 182.17599391937256, + "p99": 404.1599929332733 + }, + "isolatedSum": { + "p50": 175.80800503492355, + "p90": 228.44800353050232, + "p95": 239.07199501991272, + "p99": 305.9520050883293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.0880036354065, + "p90": 134.68800485134125, + "p95": 142.752006649971, + "p99": 173.3119934797287 + }, + "combine": { + "p50": 97.43999689817429, + "p90": 114.97599631547928, + "p95": 121.08799815177917, + "p99": 138.75199854373932 + }, + "roundtrip": { + "p50": 180.1919937133789, + "p90": 205.56800067424774, + "p95": 210.07999777793884, + "p99": 237.7600073814392 + }, + "isolatedSum": { + "p50": 206.52800053358078, + "p90": 249.66400116682053, + "p95": 263.8400048017502, + "p99": 312.063992023468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 } ] - } - ], - "failures": [ - { - "id": "cxf-25e7e895", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:49:09.827299+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28271594334", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", - "createdAt": "2026-06-26T23:47:39Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } - }, - { - "id": "cxf-6e691abd", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "generatedAt": "2026-06-26T17:32:59.549027+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28254359089", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", - "createdAt": "2026-06-26T17:27:42Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - } }, { - "id": "cxf-433580a5", - "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:49:16.484836+00:00", - "publicationStatus": "diagnostic", + "id": "cx-a3bb3bd5", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "1e550a8055ce0039", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:16.783949+00:00", "status": "valid", - "sku": "h100", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", "backend": "deepep", "phase": "decode", - "config": "fp8/ll/runtime-visible", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28271598000", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", - "createdAt": "2026-06-26T23:47:46Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272139795", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272139795", + "createdAt": "2026-06-27T00:06:16.783949+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 82.65600353479385, + "p90": 133.59999656677246, + "p95": 142.59199798107147, + "p99": 158.4320068359375 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 99.61599856615067, + "p95": 103.84000092744827, + "p99": 158.1760048866272 + }, + "roundtrip": { + "p50": 128.35200130939484, + "p90": 157.21599757671356, + "p95": 169.63200271129608, + "p99": 325.6959915161133 + }, + "isolatedSum": { + "p50": 159.04000401496887, + "p90": 233.21599513292313, + "p95": 246.43199890851974, + "p99": 316.6080117225647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.60000163316727, + "p90": 94.81599926948547, + "p95": 101.82400047779083, + "p99": 127.32799351215363 + }, + "combine": { + "p50": 70.23999840021133, + "p90": 99.16800260543823, + "p95": 101.34399682283401, + "p99": 121.34400010108948 + }, + "roundtrip": { + "p50": 130.5599957704544, + "p90": 186.46399676799774, + "p95": 191.3280040025711, + "p99": 227.48799622058868 + }, + "isolatedSum": { + "p50": 143.8400000333786, + "p90": 193.9840018749237, + "p95": 203.16799730062485, + "p99": 248.6719936132431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.70399677753448, + "p90": 100.44799745082855, + "p95": 106.55999928712845, + "p99": 121.18399888277054 + }, + "combine": { + "p50": 77.47200131416321, + "p90": 89.47200328111649, + "p95": 95.32800316810608, + "p99": 106.1440035700798 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 158.49600732326508, + "p95": 167.42399334907532, + "p99": 188.54400515556335 + }, + "isolatedSum": { + "p50": 162.1759980916977, + "p90": 189.92000073194504, + "p95": 201.88800245523453, + "p99": 227.32800245285034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.69600212574005, + "p90": 133.15199315547943, + "p95": 140.25600254535675, + "p99": 154.7199934720993 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 123.55200201272964, + "p95": 129.50399518013, + "p99": 141.85599982738495 + }, + "roundtrip": { + "p50": 196.83200120925903, + "p90": 213.69600296020508, + "p95": 222.04799950122833, + "p99": 265.8880054950714 + }, + "isolatedSum": { + "p50": 223.52000325918198, + "p90": 256.7039951682091, + "p95": 269.75999772548676, + "p99": 296.57599329948425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ca51f4f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d982b749", + "comparisonKey": "a8d7aa1ea70e9702", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:23.408406+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286432534", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286432534", + "createdAt": "2026-06-27T10:26:23.408406+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.80000299215317, + "p90": 86.7839977145195, + "p95": 94.08000111579895, + "p99": 129.12000715732574 + }, + "combine": { + "p50": 69.82400268316269, + "p90": 75.68000257015228, + "p95": 78.75200361013412, + "p99": 84.927998483181 + }, + "roundtrip": { + "p50": 124.64000284671783, + "p90": 133.88800621032715, + "p95": 138.65600526332855, + "p99": 154.62400019168854 + }, + "isolatedSum": { + "p50": 142.62400567531586, + "p90": 162.46400028467178, + "p95": 172.83200472593307, + "p99": 214.04800564050674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.25599884986877, + "p90": 81.53600245714188, + "p95": 87.64799684286118, + "p99": 115.68000167608261 + }, + "combine": { + "p50": 69.7920024394989, + "p90": 73.72800260782242, + "p95": 78.40000092983246, + "p99": 85.40800213813782 + }, + "roundtrip": { + "p50": 123.74400347471237, + "p90": 142.84799993038177, + "p95": 166.30400717258453, + "p99": 190.2720034122467 + }, + "isolatedSum": { + "p50": 142.04800128936768, + "p90": 155.2640050649643, + "p95": 166.04799777269363, + "p99": 201.08800381422043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.11999797821045, + "p90": 78.87999713420868, + "p95": 81.82399719953537, + "p99": 103.71199995279312 + }, + "combine": { + "p50": 70.46400010585785, + "p90": 76.92799717187881, + "p95": 79.48800176382065, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 126.08000636100769, + "p90": 152.41600573062897, + "p95": 166.55999422073364, + "p99": 194.0159946680069 + }, + "isolatedSum": { + "p50": 143.5839980840683, + "p90": 155.8079943060875, + "p95": 161.31199896335602, + "p99": 194.7840005159378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.49600100517273, + "p90": 88.41600269079208, + "p95": 94.65599805116653, + "p99": 108.92800241708755 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 86.68799698352814, + "p95": 92.22400188446045, + "p99": 99.42399710416794 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 137.9839926958084, + "p95": 149.98400211334229, + "p99": 160.35200655460358 + }, + "isolatedSum": { + "p50": 145.56799829006195, + "p90": 175.10399967432022, + "p95": 186.87999993562698, + "p99": 208.3519995212555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.52000200748444, + "p90": 95.10400146245956, + "p95": 101.56799852848053, + "p99": 126.11199915409088 + }, + "combine": { + "p50": 74.5920017361641, + "p90": 89.4400030374527, + "p95": 96.79999947547913, + "p99": 107.13600367307663 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 145.05599439144135, + "p95": 155.2319973707199, + "p99": 176.32000148296356 + }, + "isolatedSum": { + "p50": 154.11200374364853, + "p90": 184.54400449991226, + "p95": 198.36799800395966, + "p99": 233.2480028271675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.15199953317642, + "p90": 91.67999774217606, + "p95": 97.24800288677216, + "p99": 135.3919953107834 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 85.66399663686752, + "p95": 87.90399879217148, + "p99": 94.78399902582169 + }, + "roundtrip": { + "p50": 136.7039978504181, + "p90": 147.74399995803833, + "p95": 156.51200711727142, + "p99": 208.48000049591064 + }, + "isolatedSum": { + "p50": 159.7440019249916, + "p90": 177.34399437904358, + "p95": 185.15200167894363, + "p99": 230.17599433660507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.54399752616882, + "p90": 109.72800105810165, + "p95": 117.63200163841248, + "p99": 141.59999787807465 + }, + "combine": { + "p50": 88.57599645853043, + "p90": 97.82399982213974, + "p95": 102.04800218343735, + "p99": 136.4160031080246 + }, + "roundtrip": { + "p50": 163.26400637626648, + "p90": 180.92800676822662, + "p95": 191.13600254058838, + "p99": 213.56800198554993 + }, + "isolatedSum": { + "p50": 185.11999398469925, + "p90": 207.5520008802414, + "p95": 219.68000382184982, + "p99": 278.01600098609924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.79200285673141, + "p90": 128.00000607967377, + "p95": 133.63200426101685, + "p99": 155.74400126934052 + }, + "combine": { + "p50": 104.89600151777267, + "p90": 113.15199732780457, + "p95": 118.75200271606445, + "p99": 133.40799510478973 + }, + "roundtrip": { + "p50": 196.79999351501465, + "p90": 208.8959962129593, + "p95": 215.10399878025055, + "p99": 228.35199534893036 + }, + "isolatedSum": { + "p50": 222.6880043745041, + "p90": 241.15200340747833, + "p95": 252.3840069770813, + "p99": 289.15199637413025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b7604172", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", + "colorKey": "h200_d982b749", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:14:07.082435+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": "set:3:07d544ac2af401ec", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272379468", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272379468", + "createdAt": "2026-06-27T00:14:07.082435+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.86400347948074, + "p90": 82.24000036716461, + "p95": 88.73599767684937, + "p99": 117.66400188207626 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 75.39200037717819, + "p95": 80.6720033288002, + "p99": 96.0640013217926 + }, + "roundtrip": { + "p50": 123.90399724245071, + "p90": 139.74399864673615, + "p95": 148.47999811172485, + "p99": 178.75200510025024 + }, + "isolatedSum": { + "p50": 142.88000017404556, + "p90": 157.6320007443428, + "p95": 169.40800100564957, + "p99": 213.72800320386887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.66400343179703, + "p90": 93.53599697351456, + "p95": 100.70399940013885, + "p99": 128.09599936008453 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 84.51200276613235, + "p95": 89.02399986982346, + "p99": 123.6800029873848 + }, + "roundtrip": { + "p50": 135.13599336147308, + "p90": 146.7200070619583, + "p95": 153.9199948310852, + "p99": 176.89600586891174 + }, + "isolatedSum": { + "p50": 159.7760021686554, + "p90": 178.0479997396469, + "p95": 189.7279992699623, + "p99": 251.77600234746933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.63200163841248, + "p90": 127.3919939994812, + "p95": 134.0479999780655, + "p99": 154.94400262832642 + }, + "combine": { + "p50": 104.67199981212616, + "p90": 115.42399972677231, + "p95": 121.98399752378464, + "p99": 159.93599593639374 + }, + "roundtrip": { + "p50": 196.25599682331085, + "p90": 206.08000457286835, + "p95": 214.08000588417053, + "p99": 245.27999758720398 + }, + "isolatedSum": { + "p50": 222.30400145053864, + "p90": 242.8159937262535, + "p95": 256.0319975018501, + "p99": 314.87999856472015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-875c4f49", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "h200_d982b749", + "comparisonKey": "c8b8b28ca3d145bb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:54:14.463003+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28273509838", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273509838", + "createdAt": "2026-06-27T00:54:14.463003+00:00", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.3919997215271, + "p90": 87.5839963555336, + "p95": 96.3520035147667, + "p99": 139.55199718475342 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 79.55200225114822, + "p95": 84.95999872684479, + "p99": 111.32799834012985 + }, + "roundtrip": { + "p50": 119.55200135707855, + "p90": 147.20000326633453, + "p95": 157.18400478363037, + "p99": 204.6079933643341 + }, + "isolatedSum": { + "p50": 139.48799669742584, + "p90": 167.13599860668182, + "p95": 181.31200224161148, + "p99": 250.87999552488327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.9919970035553, + "p90": 94.36800330877304, + "p95": 103.13600301742554, + "p99": 130.68799674510956 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 80.9599980711937, + "p95": 88.19200098514557, + "p99": 105.15200346708298 + }, + "roundtrip": { + "p50": 121.5360015630722, + "p90": 147.16799557209015, + "p95": 157.98400342464447, + "p99": 185.92000007629395 + }, + "isolatedSum": { + "p50": 141.11999422311783, + "p90": 175.32800137996674, + "p95": 191.3280040025711, + "p99": 235.84000021219254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.02399724721909, + "p90": 95.48799693584442, + "p95": 104.86400127410889, + "p99": 133.08799266815186 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 80.57600259780884, + "p95": 86.30400151014328, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 120.12799829244614, + "p90": 145.56799829006195, + "p95": 155.64799308776855, + "p99": 182.68799781799316 + }, + "isolatedSum": { + "p50": 141.79199934005737, + "p90": 176.06399953365326, + "p95": 191.16800278425217, + "p99": 239.00799453258514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.66400212049484, + "p90": 93.75999867916107, + "p95": 98.65599870681763, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 80.54400235414505, + "p95": 82.49600231647491, + "p99": 91.77599847316742 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 145.9839940071106, + "p95": 156.99200332164764, + "p99": 216.35200083255768 + }, + "isolatedSum": { + "p50": 142.43200421333313, + "p90": 174.30400103330612, + "p95": 181.15200102329254, + "p99": 204.95999604463577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.91999661922455, + "p90": 96.67199850082397, + "p95": 103.2319962978363, + "p99": 125.34399330615997 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 84.03199911117554, + "p95": 89.59999680519104, + "p99": 103.87200117111206 + }, + "roundtrip": { + "p50": 123.9359974861145, + "p90": 155.8080017566681, + "p95": 170.49600183963776, + "p99": 205.6960016489029 + }, + "isolatedSum": { + "p50": 144.6719989180565, + "p90": 180.7039976119995, + "p95": 192.83199310302734, + "p99": 229.21599447727203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19b41153", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "fb9666d12f9a34f8", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:55.021886+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272132556", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272132556", + "createdAt": "2026-06-27T00:05:55.021886+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.31999933719635, + "p90": 95.0080007314682, + "p95": 99.93600100278854, + "p99": 117.69600212574005 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 79.55200225114822, + "p95": 85.79199761152267, + "p99": 114.04799669981003 + }, + "roundtrip": { + "p50": 120.70400267839432, + "p90": 148.60799908638, + "p95": 156.54399991035461, + "p99": 199.0399956703186 + }, + "isolatedSum": { + "p50": 140.32000303268433, + "p90": 174.56000298261642, + "p95": 185.72799861431122, + "p99": 231.74399882555008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.02399724721909, + "p90": 95.87199985980988, + "p95": 102.91200131177902, + "p99": 124.35200065374374 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 82.75199681520462, + "p95": 89.53599631786346, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 123.10399860143661, + "p90": 151.39199793338776, + "p95": 160.19199788570404, + "p99": 189.69599902629852 + }, + "isolatedSum": { + "p50": 141.695998609066, + "p90": 178.6239966750145, + "p95": 192.4479976296425, + "p99": 237.31200397014618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 87.52000331878662, + "p90": 135.23200154304504, + "p95": 142.04800128936768, + "p99": 161.21600568294525 + }, + "combine": { + "p50": 77.504001557827, + "p90": 92.38400310277939, + "p95": 97.120001912117, + "p99": 111.77600175142288 + }, + "roundtrip": { + "p50": 135.77599823474884, + "p90": 158.81599485874176, + "p95": 168.92799735069275, + "p99": 212.67199516296387 + }, + "isolatedSum": { + "p50": 165.02400487661362, + "p90": 227.61600464582443, + "p95": 239.16800320148468, + "p99": 272.99200743436813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.67200177907944, + "p90": 136.83199882507324, + "p95": 145.79200744628906, + "p99": 161.6320013999939 + }, + "combine": { + "p50": 105.76000064611435, + "p90": 121.63200229406357, + "p95": 128.06400656700134, + "p99": 140.60799777507782 + }, + "roundtrip": { + "p50": 195.93599438667297, + "p90": 217.3759937286377, + "p95": 223.4240025281906, + "p99": 252.9279887676239 + }, + "isolatedSum": { + "p50": 222.4320024251938, + "p90": 258.4640011191368, + "p95": 273.8560140132904, + "p99": 302.2399991750717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6b3584db", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "0dade16dc8be5c94", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:19.346761+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "wide-dynamic-range", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272136313", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272136313", + "createdAt": "2026-06-27T00:06:19.346761+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.13599842786789, + "p90": 99.84000027179718, + "p95": 112.38399893045425, + "p99": 175.48799514770508 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 81.95199817419052, + "p95": 87.3280018568039, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 127.51999497413635, + "p90": 157.9200029373169, + "p95": 171.7119961977005, + "p99": 223.26399385929108 + }, + "isolatedSum": { + "p50": 145.08800208568573, + "p90": 181.7919984459877, + "p95": 199.71200078725815, + "p99": 293.43999177217484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.03199779987335, + "p90": 112.73600161075592, + "p95": 125.37600100040436, + "p99": 209.4080001115799 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 86.30400151014328, + "p95": 95.13600170612335, + "p99": 123.16799908876419 + }, + "roundtrip": { + "p50": 125.11999905109406, + "p90": 156.99200332164764, + "p95": 177.47199535369873, + "p99": 251.64800882339478 + }, + "isolatedSum": { + "p50": 146.94400131702423, + "p90": 199.0400031208992, + "p95": 220.5120027065277, + "p99": 332.5759992003441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.28800106048584, + "p90": 108.15999656915665, + "p95": 115.29599875211716, + "p99": 152.70400047302246 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 91.87199920415878, + "p95": 98.55999797582626, + "p99": 110.17599701881409 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 167.23200678825378, + "p95": 179.45599555969238, + "p99": 238.91200125217438 + }, + "isolatedSum": { + "p50": 162.6560017466545, + "p90": 200.03199577331543, + "p95": 213.85599672794342, + "p99": 262.87999749183655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.09599739313126, + "p90": 137.56799697875977, + "p95": 140.76800644397736, + "p99": 157.47199952602386 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 122.97599762678146, + "p95": 125.50400197505951, + "p99": 148.5760062932968 + }, + "roundtrip": { + "p50": 198.7520009279251, + "p90": 219.2319929599762, + "p95": 227.58400440216064, + "p99": 269.3440020084381 + }, + "isolatedSum": { + "p50": 220.44799476861954, + "p90": 260.54399460554123, + "p95": 266.27200841903687, + "p99": 306.0480058193207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4f3e72f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", + "colorKey": "h200_d982b749", + "comparisonKey": "c5d592397744e4a1", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:52.426268+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272129001", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272129001", + "createdAt": "2026-06-27T00:05:52.426268+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.7600028514862, + "p90": 98.59199821949005, + "p95": 107.77600109577179, + "p99": 133.31200182437897 + }, + "combine": { + "p50": 70.592001080513, + "p90": 84.54400300979614, + "p95": 90.43200314044952, + "p99": 139.26400244235992 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 159.87199544906616, + "p95": 172.57599532604218, + "p99": 367.2960102558136 + }, + "isolatedSum": { + "p50": 144.3520039319992, + "p90": 183.1360012292862, + "p95": 198.2080042362213, + "p99": 272.5760042667389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.87999647855759, + "p90": 107.04000294208527, + "p95": 128.80000472068787, + "p99": 359.391987323761 + }, + "combine": { + "p50": 70.49600034952164, + "p90": 84.06399935483932, + "p95": 89.88799899816513, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 155.10399639606476, + "p95": 165.72800278663635, + "p99": 202.7519941329956 + }, + "isolatedSum": { + "p50": 145.37599682807922, + "p90": 191.1040022969246, + "p95": 218.688003718853, + "p99": 462.3679891228676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.90399813652039, + "p90": 107.07200318574905, + "p95": 115.9679964184761, + "p99": 136.51199638843536 + }, + "combine": { + "p50": 78.33600044250488, + "p90": 91.93599969148636, + "p95": 97.69599884748459, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 137.2160017490387, + "p90": 170.23999989032745, + "p95": 181.37599527835846, + "p99": 215.36000072956085 + }, + "isolatedSum": { + "p50": 162.23999857902527, + "p90": 199.0080028772354, + "p95": 213.6639952659607, + "p99": 245.34399807453156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.99999666213989, + "p90": 135.71199774742126, + "p95": 143.8400000333786, + "p99": 168.67199540138245 + }, + "combine": { + "p50": 104.73600029945374, + "p90": 121.47200107574463, + "p95": 125.47199428081512, + "p99": 163.00800442695618 + }, + "roundtrip": { + "p50": 196.6720074415207, + "p90": 216.19200706481934, + "p95": 220.5120027065277, + "p99": 240.1919960975647 + }, + "isolatedSum": { + "p50": 220.73599696159363, + "p90": 257.1839988231659, + "p95": 269.3119943141937, + "p99": 331.6799998283386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb6d6f9b", + "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "4a72e21e2f542236", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:45.031759+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271615137", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271615137", + "createdAt": "2026-06-26T23:49:45.031759+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.97599655389786, + "p90": 85.50400286912918, + "p95": 95.36000341176987, + "p99": 316.79999828338623 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 74.72000271081924, + "p95": 78.72000336647034, + "p99": 94.2080020904541 + }, + "roundtrip": { + "p50": 122.56000190973282, + "p90": 143.26399564743042, + "p95": 153.1199961900711, + "p99": 172.2240000963211 + }, + "isolatedSum": { + "p50": 139.67999815940857, + "p90": 160.22400557994843, + "p95": 174.0800067782402, + "p99": 411.00800037384033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.12799787521362, + "p90": 85.24800091981888, + "p95": 91.93599969148636, + "p99": 119.48800086975098 + }, + "combine": { + "p50": 68.57600063085556, + "p90": 72.83200323581696, + "p95": 77.15199887752533, + "p99": 83.45600217580795 + }, + "roundtrip": { + "p50": 120.83200365304947, + "p90": 129.2160004377365, + "p95": 133.215993642807, + "p99": 145.75999975204468 + }, + "isolatedSum": { + "p50": 140.70399850606918, + "p90": 158.08000415563583, + "p95": 169.0879985690117, + "p99": 202.94400304555893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.63200187683105, + "p90": 90.84799885749817, + "p95": 103.64799946546555, + "p99": 133.02400708198547 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 80.86399734020233, + "p95": 87.74399757385254, + "p99": 105.6319996714592 + }, + "roundtrip": { + "p50": 123.64800274372101, + "p90": 149.59999918937683, + "p95": 158.33599865436554, + "p99": 186.0480010509491 + }, + "isolatedSum": { + "p50": 144.16000247001648, + "p90": 171.7119961977005, + "p95": 191.39199703931808, + "p99": 238.65600675344467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.18399846553802, + "p90": 83.03999900817871, + "p95": 94.91200000047684, + "p99": 104.09600287675858 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 77.82399654388428, + "p95": 83.10399949550629, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 123.52000176906586, + "p90": 143.19999516010284, + "p95": 152.0960032939911, + "p99": 205.08800446987152 + }, + "isolatedSum": { + "p50": 142.46399700641632, + "p90": 160.863995552063, + "p95": 178.01599949598312, + "p99": 214.1440063714981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.16799801588058, + "p90": 83.36000144481659, + "p95": 94.11200135946274, + "p99": 106.46399855613708 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 78.07999849319458, + "p95": 83.20000022649765, + "p99": 95.71199864149094 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 144.0960019826889, + "p95": 155.008003115654, + "p99": 204.3839991092682 + }, + "isolatedSum": { + "p50": 141.2159949541092, + "p90": 161.43999993801117, + "p95": 177.3120015859604, + "p99": 202.17599719762802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.60800284147263, + "p90": 89.59999680519104, + "p95": 94.81599926948547, + "p99": 117.53600090742111 + }, + "combine": { + "p50": 77.08799839019775, + "p90": 81.95199817419052, + "p95": 87.3280018568039, + "p99": 95.0080007314682 + }, + "roundtrip": { + "p50": 135.19999384880066, + "p90": 148.47999811172485, + "p95": 156.63999319076538, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 157.69600123167038, + "p90": 171.55199497938156, + "p95": 182.14400112628937, + "p99": 212.5440016388893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.68799829483032, + "p90": 114.97599631547928, + "p95": 122.36800044775009, + "p99": 148.03199470043182 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 97.59999811649323, + "p95": 102.9760017991066, + "p99": 113.95200341939926 + }, + "roundtrip": { + "p50": 158.87999534606934, + "p90": 176.15999281406403, + "p95": 185.2159947156906, + "p99": 225.600004196167 + }, + "isolatedSum": { + "p50": 182.0800006389618, + "p90": 212.5759944319725, + "p95": 225.3440022468567, + "p99": 261.9839981198311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.22399836778641, + "p90": 125.18399953842163, + "p95": 132.32000172138214, + "p99": 140.83200693130493 + }, + "combine": { + "p50": 105.34399747848511, + "p90": 111.32799834012985, + "p95": 116.28799885511398, + "p99": 123.83999675512314 + }, + "roundtrip": { + "p50": 197.60000705718994, + "p90": 207.2640061378479, + "p95": 214.81600403785706, + "p99": 241.05599522590637 + }, + "isolatedSum": { + "p50": 221.56799584627151, + "p90": 236.51199787855148, + "p95": 248.60800057649612, + "p99": 264.67200368642807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-55459bb6", + "identity": "h200|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_d982b749", + "comparisonKey": "a923f4d59c22dd5b", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:11.699427+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287501303", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287501303", + "createdAt": "2026-06-27T11:14:11.699427+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.89600372314453, + "p90": 89.9519994854927, + "p95": 103.16800326108932, + "p99": 117.63200163841248 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 81.34400099515915, + "p95": 87.26400136947632, + "p99": 99.84000027179718 + }, + "roundtrip": { + "p50": 125.2799928188324, + "p90": 158.52800011634827, + "p95": 173.2800006866455, + "p99": 205.02400398254395 + }, + "isolatedSum": { + "p50": 141.6960060596466, + "p90": 171.29600048065186, + "p95": 190.43200463056564, + "p99": 217.47200191020966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.31999999284744, + "p90": 108.83200168609619, + "p95": 118.65600198507309, + "p99": 149.9519944190979 + }, + "combine": { + "p50": 70.23999840021133, + "p90": 85.85599809885025, + "p95": 93.98400038480759, + "p99": 114.656001329422 + }, + "roundtrip": { + "p50": 127.77599692344666, + "p90": 164.44799304008484, + "p95": 174.23999309539795, + "p99": 215.96799790859222 + }, + "isolatedSum": { + "p50": 146.55999839305878, + "p90": 194.68799978494644, + "p95": 212.64000236988068, + "p99": 264.6079957485199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.89599692821503, + "p90": 109.8560020327568, + "p95": 119.07199770212173, + "p99": 162.11199760437012 + }, + "combine": { + "p50": 71.00799679756165, + "p90": 83.90399813652039, + "p95": 90.08000046014786, + "p99": 109.15199667215347 + }, + "roundtrip": { + "p50": 130.43199479579926, + "p90": 159.39199924468994, + "p95": 176.57600343227386, + "p99": 223.51999580860138 + }, + "isolatedSum": { + "p50": 147.90399372577667, + "p90": 193.7600001692772, + "p95": 209.1519981622696, + "p99": 271.2639942765236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.02399790287018, + "p90": 101.31199657917023, + "p95": 112.44799941778183, + "p99": 129.85600531101227 + }, + "combine": { + "p50": 71.71200215816498, + "p90": 84.927998483181, + "p95": 90.97599983215332, + "p99": 101.95200145244598 + }, + "roundtrip": { + "p50": 128.76799702644348, + "p90": 155.13600409030914, + "p95": 164.32000696659088, + "p99": 192.51200556755066 + }, + "isolatedSum": { + "p50": 148.73600006103516, + "p90": 186.23999506235123, + "p95": 203.42399924993515, + "p99": 231.80800676345825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.6160027384758, + "p90": 105.47199845314026, + "p95": 115.23199826478958, + "p99": 141.53599739074707 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 86.14400029182434, + "p95": 92.00000017881393, + "p99": 109.40799862146378 + }, + "roundtrip": { + "p50": 131.20000064373016, + "p90": 157.27999806404114, + "p95": 168.64000260829926, + "p99": 207.74400234222412 + }, + "isolatedSum": { + "p50": 152.0320028066635, + "p90": 191.6159987449646, + "p95": 207.23199844360352, + "p99": 250.94399601221085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.86399865150452, + "p90": 114.72000181674957, + "p95": 122.8799968957901, + "p99": 132.47999548912048 + }, + "combine": { + "p50": 79.26400005817413, + "p90": 90.01599997282028, + "p95": 98.01600128412247, + "p99": 109.92000252008438 + }, + "roundtrip": { + "p50": 139.77600634098053, + "p90": 164.2879992723465, + "p95": 177.63200402259827, + "p99": 194.59199905395508 + }, + "isolatedSum": { + "p50": 168.12799870967865, + "p90": 204.73600178956985, + "p95": 220.89599817991257, + "p99": 242.39999800920486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.88000041246414, + "p90": 118.01599711179733, + "p95": 126.81600451469421, + "p99": 194.2719966173172 + }, + "combine": { + "p50": 88.22400122880936, + "p90": 102.27199643850327, + "p95": 108.2879975438118, + "p99": 123.07199835777283 + }, + "roundtrip": { + "p50": 163.55200111865997, + "p90": 186.11200153827667, + "p95": 196.83200120925903, + "p99": 244.3840056657791 + }, + "isolatedSum": { + "p50": 187.1040016412735, + "p90": 220.2879935503006, + "p95": 235.104002058506, + "p99": 317.34399497509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.03999745845795, + "p90": 139.42399621009827, + "p95": 145.4080045223236, + "p99": 166.24000668525696 + }, + "combine": { + "p50": 106.36799782514572, + "p90": 121.37600034475327, + "p95": 126.3359934091568, + "p99": 143.0400013923645 + }, + "roundtrip": { + "p50": 199.072003364563, + "p90": 215.26400744915009, + "p95": 224.48000311851501, + "p99": 243.3599978685379 + }, + "isolatedSum": { + "p50": 225.40799528360367, + "p90": 260.79999655485153, + "p95": 271.7439979314804, + "p99": 309.28000807762146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dea4952a", + "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "f2cda8ef40003c42", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:13.205485+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271728983", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271728983", + "createdAt": "2026-06-26T23:53:13.205485+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.53600114583969, + "p90": 98.14400225877762, + "p95": 107.45599865913391, + "p99": 121.63200229406357 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 82.24000036716461, + "p95": 87.26400136947632, + "p99": 110.07999628782272 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 155.39200603961945, + "p95": 163.68000209331512, + "p99": 201.6959935426712 + }, + "isolatedSum": { + "p50": 141.75999909639359, + "p90": 180.38400262594223, + "p95": 194.72000002861023, + "p99": 231.7119985818863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.87199658155441, + "p90": 103.2319962978363, + "p95": 112.15999722480774, + "p99": 193.05600225925446 + }, + "combine": { + "p50": 68.60800087451935, + "p90": 83.5840031504631, + "p95": 90.30400216579437, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 123.23199957609177, + "p90": 153.31199765205383, + "p95": 164.38399255275726, + "p99": 185.37600338459015 + }, + "isolatedSum": { + "p50": 144.47999745607376, + "p90": 186.8159994482994, + "p95": 202.4639993906021, + "p99": 322.6560056209564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.87999647855759, + "p90": 96.92800045013428, + "p95": 105.85600137710571, + "p99": 121.15199863910675 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 82.56000280380249, + "p95": 90.30400216579437, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 125.2799928188324, + "p90": 152.28800475597382, + "p95": 160.8320027589798, + "p99": 174.55999553203583 + }, + "isolatedSum": { + "p50": 144.15999501943588, + "p90": 179.48800325393677, + "p95": 196.16000354290009, + "p99": 223.2000008225441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.54400169849396, + "p90": 104.2879968881607, + "p95": 111.42399907112122, + "p99": 138.5599970817566 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 85.66399663686752, + "p95": 91.67999774217606, + "p99": 102.59199887514114 + }, + "roundtrip": { + "p50": 126.39999389648438, + "p90": 154.55999970436096, + "p95": 166.97600483894348, + "p99": 208.67200195789337 + }, + "isolatedSum": { + "p50": 147.07200229167938, + "p90": 189.95199352502823, + "p95": 203.10399681329727, + "p99": 241.15199595689774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.40000092983246, + "p90": 99.07200187444687, + "p95": 105.98400235176086, + "p99": 126.3359934091568 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 85.40800213813782, + "p95": 90.27200192213058, + "p99": 109.40799862146378 + }, + "roundtrip": { + "p50": 129.02399897575378, + "p90": 156.2879979610443, + "p95": 166.143998503685, + "p99": 196.51199877262115 + }, + "isolatedSum": { + "p50": 150.01600235700607, + "p90": 184.4800040125847, + "p95": 196.25600427389145, + "p99": 235.74399203062057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.40800213813782, + "p90": 105.12000322341919, + "p95": 114.04799669981003, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 90.59199690818787, + "p95": 96.63999825716019, + "p99": 105.18400371074677 + }, + "roundtrip": { + "p50": 137.2160017490387, + "p90": 163.07200491428375, + "p95": 172.35200107097626, + "p99": 208.064004778862 + }, + "isolatedSum": { + "p50": 163.32799941301346, + "p90": 195.71200013160706, + "p95": 210.68799495697021, + "p99": 236.89600825309753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.55199807882309, + "p90": 126.65599584579468, + "p95": 135.00800728797913, + "p99": 167.10400581359863 + }, + "combine": { + "p50": 89.24800157546997, + "p90": 106.1440035700798, + "p95": 111.23199760913849, + "p99": 126.65599584579468 + }, + "roundtrip": { + "p50": 162.9759967327118, + "p90": 185.88800728321075, + "p95": 193.6960071325302, + "p99": 255.87201118469238 + }, + "isolatedSum": { + "p50": 188.79999965429306, + "p90": 232.79999941587448, + "p95": 246.24000489711761, + "p99": 293.7600016593933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.367999792099, + "p90": 135.42400300502777, + "p95": 143.5520052909851, + "p99": 181.88799917697906 + }, + "combine": { + "p50": 105.34399747848511, + "p90": 119.99999731779099, + "p95": 126.78399682044983, + "p99": 139.0399932861328 + }, + "roundtrip": { + "p50": 197.53600656986237, + "p90": 215.83999693393707, + "p95": 224.48000311851501, + "p99": 253.1839907169342 + }, + "isolatedSum": { + "p50": 223.7119972705841, + "p90": 255.42400032281876, + "p95": 270.33600211143494, + "p99": 320.9279924631119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-14a4cdc0", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_b02e4015", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:31.348412+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272358996", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272358996", + "createdAt": "2026-06-27T00:13:31.348412+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.75200229883194, + "p90": 98.39999675750732, + "p95": 108.57599973678589, + "p99": 136.03200018405914 + }, + "combine": { + "p50": 67.52000004053116, + "p90": 79.83999699354172, + "p95": 84.09599959850311, + "p99": 104.09600287675858 + }, + "roundtrip": { + "p50": 122.8799968957901, + "p90": 146.62399888038635, + "p95": 155.32800555229187, + "p99": 178.3359944820404 + }, + "isolatedSum": { + "p50": 138.2720023393631, + "p90": 178.23999375104904, + "p95": 192.671999335289, + "p99": 240.12800306081772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.44800227880478, + "p90": 110.30399799346924, + "p95": 138.20800185203552, + "p99": 196.22400403022766 + }, + "combine": { + "p50": 75.16799867153168, + "p90": 85.34400165081024, + "p95": 91.00800007581711, + "p99": 101.02400183677673 + }, + "roundtrip": { + "p50": 135.3919953107834, + "p90": 156.3200056552887, + "p95": 166.4000004529953, + "p99": 198.36799800395966 + }, + "isolatedSum": { + "p50": 159.61600095033646, + "p90": 195.64799964427948, + "p95": 229.21600192785263, + "p99": 297.2480058670044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.48000228405, + "p90": 142.91200041770935, + "p95": 151.36000514030457, + "p99": 290.0159955024719 + }, + "combine": { + "p50": 111.455999314785, + "p90": 123.6800029873848, + "p95": 127.93600559234619, + "p99": 143.71199905872345 + }, + "roundtrip": { + "p50": 210.81599593162537, + "p90": 223.26399385929108, + "p95": 229.34399545192719, + "p99": 257.79199600219727 + }, + "isolatedSum": { + "p50": 239.936001598835, + "p90": 266.59200340509415, + "p95": 279.29601073265076, + "p99": 433.7279945611954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4bdc0b92", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_ad2e3b5c", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:31.907403+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272362308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272362308", + "createdAt": "2026-06-27T00:13:31.907403+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.32799923419952, + "p90": 93.82399916648865, + "p95": 109.72800105810165, + "p99": 145.1520025730133 + }, + "combine": { + "p50": 66.880002617836, + "p90": 72.25599884986877, + "p95": 80.32000064849854, + "p99": 91.39200299978256 + }, + "roundtrip": { + "p50": 123.48800152540207, + "p90": 140.51200449466705, + "p95": 156.8319946527481, + "p99": 195.64799964427948 + }, + "isolatedSum": { + "p50": 138.20800185203552, + "p90": 166.07999801635742, + "p95": 190.0480017066002, + "p99": 236.54400557279587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.4720019698143, + "p90": 136.48000359535217, + "p95": 151.13599598407745, + "p99": 198.04799556732178 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 89.47200328111649, + "p95": 102.91200131177902, + "p99": 122.36800044775009 + }, + "roundtrip": { + "p50": 134.5279961824417, + "p90": 149.31200444698334, + "p95": 162.9440039396286, + "p99": 204.73599433898926 + }, + "isolatedSum": { + "p50": 157.28000551462173, + "p90": 225.95200687646866, + "p95": 254.04799729585648, + "p99": 320.41599601507187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.60800260305405, + "p90": 131.80799782276154, + "p95": 137.2160017490387, + "p99": 164.35199975967407 + }, + "combine": { + "p50": 111.00800335407257, + "p90": 119.39200013875961, + "p95": 125.5359947681427, + "p99": 155.03999590873718 + }, + "roundtrip": { + "p50": 208.41600000858307, + "p90": 218.6560034751892, + "p95": 229.72799837589264, + "p99": 263.3279860019684 + }, + "isolatedSum": { + "p50": 235.61600595712662, + "p90": 251.19999796152115, + "p95": 262.7519965171814, + "p99": 319.39199566841125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fcadbf18", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_ae2e3cef", + "comparisonKey": "7784b2ab75c0721c", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:36.495887+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272365812", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272365812", + "createdAt": "2026-06-27T00:13:36.495887+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.48800045251846, + "p90": 94.46399658918381, + "p95": 99.29600358009338, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 80.09599894285202, + "p95": 84.54400300979614, + "p99": 108.31999778747559 + }, + "roundtrip": { + "p50": 119.61600184440613, + "p90": 148.83199334144592, + "p95": 158.01599621772766, + "p99": 279.9359858036041 + }, + "isolatedSum": { + "p50": 138.59199732542038, + "p90": 174.55999553203583, + "p95": 183.84000658988953, + "p99": 218.55999529361725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.83199709653854, + "p90": 99.45599734783173, + "p95": 105.05600273609161, + "p99": 118.07999759912491 + }, + "combine": { + "p50": 75.23199915885925, + "p90": 87.52000331878662, + "p95": 92.0960009098053, + "p99": 108.51199924945831 + }, + "roundtrip": { + "p50": 133.91999900341034, + "p90": 154.78399395942688, + "p95": 162.04799711704254, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 156.0639962553978, + "p90": 186.97600066661835, + "p95": 197.1520036458969, + "p99": 226.59199684858322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.15200674533844, + "p90": 135.96799969673157, + "p95": 141.79199934005737, + "p99": 205.34400641918182 + }, + "combine": { + "p50": 109.72800105810165, + "p90": 120.15999853610992, + "p95": 123.36000055074692, + "p99": 136.7039978504181 + }, + "roundtrip": { + "p50": 207.96799659729004, + "p90": 225.50399601459503, + "p95": 231.77599906921387, + "p99": 246.20799720287323 + }, + "isolatedSum": { + "p50": 234.8800078034401, + "p90": 256.1279982328415, + "p95": 265.1519998908043, + "p99": 342.0480042695999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f361a9a4", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", + "colorKey": "h200_b5c683eb", + "comparisonKey": "d82096ba4baa0cd5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:27.284944+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2279937619f3971", + "workloadId": "set:4:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271830346", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271830346", + "createdAt": "2026-06-26T23:56:27.284944+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 76.73600316047668, + "p90": 126.24000012874603, + "p95": 134.46399569511414, + "p99": 156.63999319076538 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 83.71199667453766, + "p95": 87.07199990749359, + "p99": 99.13600236177444 + }, + "roundtrip": { + "p50": 128.38399410247803, + "p90": 148.03199470043182, + "p95": 154.62400019168854, + "p99": 179.6479970216751 + }, + "isolatedSum": { + "p50": 149.1520032286644, + "p90": 209.9519968032837, + "p95": 221.53599560260773, + "p99": 255.77599555253983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.83999633789062, + "p90": 87.99999952316284, + "p95": 98.11200201511383, + "p99": 113.02399635314941 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 79.6160027384758, + "p95": 85.56800335645676, + "p99": 95.87199985980988 + }, + "roundtrip": { + "p50": 126.81600451469421, + "p90": 139.67999815940857, + "p95": 149.63200688362122, + "p99": 170.20800709724426 + }, + "isolatedSum": { + "p50": 147.67999947071075, + "p90": 167.61600226163864, + "p95": 183.6800053715706, + "p99": 208.8959962129593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.73599833250046, + "p90": 105.50399869680405, + "p95": 112.41599917411804, + "p99": 132.60799646377563 + }, + "combine": { + "p50": 81.98399841785431, + "p90": 93.56799721717834, + "p95": 99.58399832248688, + "p99": 112.57600039243698 + }, + "roundtrip": { + "p50": 148.70400726795197, + "p90": 168.7999963760376, + "p95": 180.7679980993271, + "p99": 196.6720074415207 + }, + "isolatedSum": { + "p50": 174.71999675035477, + "p90": 199.0719959139824, + "p95": 211.99999749660492, + "p99": 245.18399685621262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.00800049304962, + "p90": 175.99999904632568, + "p95": 181.34400248527527, + "p99": 197.91999459266663 + }, + "combine": { + "p50": 127.20000743865967, + "p90": 150.68799257278442, + "p95": 153.6639928817749, + "p99": 160.5439931154251 + }, + "roundtrip": { + "p50": 232.92799293994904, + "p90": 266.04801416397095, + "p95": 271.5199887752533, + "p99": 294.20799016952515 + }, + "isolatedSum": { + "p50": 266.2080079317093, + "p90": 326.6879916191101, + "p95": 335.00799536705017, + "p99": 358.46398770809174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d65f5a76", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_b5c683eb", + "comparisonKey": "d82096ba4baa0cd5", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:47.642624+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272028751", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272028751", + "createdAt": "2026-06-27T00:02:47.642624+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.99999755620956, + "p90": 106.175996363163, + "p95": 117.60000139474869, + "p99": 352.512001991272 + }, + "combine": { + "p50": 70.68800181150436, + "p90": 85.9839990735054, + "p95": 90.52799642086029, + "p99": 104.12800312042236 + }, + "roundtrip": { + "p50": 124.60800260305405, + "p90": 158.62399339675903, + "p95": 166.46400094032288, + "p99": 186.27199530601501 + }, + "isolatedSum": { + "p50": 146.68799936771393, + "p90": 192.1599954366684, + "p95": 208.12799781560898, + "p99": 456.64000511169434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.65600222349167, + "p90": 98.91200065612793, + "p95": 104.09600287675858, + "p99": 114.84800279140472 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 87.20000088214874, + "p95": 91.32800251245499, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 161.02400422096252, + "p95": 170.78399658203125, + "p99": 197.05599546432495 + }, + "isolatedSum": { + "p50": 145.31200379133224, + "p90": 186.11200153827667, + "p95": 195.42400538921356, + "p99": 221.3120013475418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.20799881219864, + "p90": 102.24000364542007, + "p95": 111.35999858379364, + "p99": 129.63199615478516 + }, + "combine": { + "p50": 71.87200337648392, + "p90": 88.22400122880936, + "p95": 94.52799707651138, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 125.08800625801086, + "p90": 153.53600680828094, + "p95": 163.87200355529785, + "p99": 176.86399817466736 + }, + "isolatedSum": { + "p50": 146.08000218868256, + "p90": 190.46400487422943, + "p95": 205.88799566030502, + "p99": 244.89599466323853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.30399954319, + "p90": 98.49599748849869, + "p95": 106.59199953079224, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 86.36800199747086, + "p95": 90.52799642086029, + "p99": 109.40799862146378 + }, + "roundtrip": { + "p50": 124.64000284671783, + "p90": 156.73600137233734, + "p95": 164.48000073432922, + "p99": 189.15200233459473 + }, + "isolatedSum": { + "p50": 145.4719975590706, + "p90": 184.86399948596954, + "p95": 197.11999595165253, + "p99": 229.5999974012375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.26399940252304, + "p90": 102.59199887514114, + "p95": 109.76000130176544, + "p99": 125.59999525547028 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 91.23200178146362, + "p95": 94.94400024414062, + "p99": 105.82400113344193 + }, + "roundtrip": { + "p50": 128.7039965391159, + "p90": 160.51200032234192, + "p95": 171.07200622558594, + "p99": 223.13599288463593 + }, + "isolatedSum": { + "p50": 152.19199657440186, + "p90": 193.82400065660477, + "p95": 204.70400154590607, + "p99": 231.4239963889122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.59199690818787, + "p90": 110.52799969911575, + "p95": 119.00799721479416, + "p99": 143.39199662208557 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 98.11200201511383, + "p95": 105.79200088977814, + "p99": 123.4240010380745 + }, + "roundtrip": { + "p50": 145.4080045223236, + "p90": 173.0239987373352, + "p95": 180.4479956626892, + "p99": 203.45599949359894 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 208.64000171422958, + "p95": 224.7999981045723, + "p99": 266.81599766016006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.84800213575363, + "p90": 130.17599284648895, + "p95": 137.56799697875977, + "p99": 212.12799847126007 + }, + "combine": { + "p50": 95.13600170612335, + "p90": 114.20799791812897, + "p95": 124.57600235939026, + "p99": 243.42399835586548 + }, + "roundtrip": { + "p50": 178.14399302005768, + "p90": 205.24799823760986, + "p95": 233.40800404548645, + "p99": 432.2560131549835 + }, + "isolatedSum": { + "p50": 205.98400384187698, + "p90": 244.38399076461792, + "p95": 262.14399933815, + "p99": 455.55199682712555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.44000279903412, + "p90": 152.48000621795654, + "p95": 157.95199573040009, + "p99": 172.2240000963211 + }, + "combine": { + "p50": 125.72799623012543, + "p90": 140.60799777507782, + "p95": 145.31199634075165, + "p99": 176.7359972000122 + }, + "roundtrip": { + "p50": 237.2480034828186, + "p90": 255.51998615264893, + "p95": 262.65600323677063, + "p99": 295.9040105342865 + }, + "isolatedSum": { + "p50": 259.16799902915955, + "p90": 293.08800399303436, + "p95": 303.26399207115173, + "p99": 348.9599972963333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26bc6c27", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_d0dfa19a", + "comparisonKey": "5d5c9be2dc9b5f1f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:33.428125+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": "set:4:2eebbed158fe1320", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271837870", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271837870", + "createdAt": "2026-06-26T23:56:33.428125+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 65.95200300216675, + "p90": 102.7199998497963, + "p95": 115.55200070142746, + "p99": 166.6560024023056 + }, + "combine": { + "p50": 58.6559996008873, + "p90": 72.4480003118515, + "p95": 78.59200239181519, + "p99": 95.64799815416336 + }, + "roundtrip": { + "p50": 112.44799941778183, + "p90": 152.70400047302246, + "p95": 159.2320054769516, + "p99": 181.2479943037033 + }, + "isolatedSum": { + "p50": 124.60800260305405, + "p90": 175.1680001616478, + "p95": 194.14400309324265, + "p99": 262.30400055646896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.9360032081604, + "p90": 85.66399663686752, + "p95": 95.0080007314682, + "p99": 111.00800335407257 + }, + "combine": { + "p50": 59.93599817156792, + "p90": 70.88000327348709, + "p95": 77.18399912118912, + "p99": 92.03200042247772 + }, + "roundtrip": { + "p50": 112.2559979557991, + "p90": 138.11199367046356, + "p95": 150.2400040626526, + "p99": 209.6319943666458 + }, + "isolatedSum": { + "p50": 127.87200137972832, + "p90": 156.54399991035461, + "p95": 172.19199985265732, + "p99": 203.0400037765503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.97599852085114, + "p90": 103.71199995279312, + "p95": 112.15999722480774, + "p99": 135.23200154304504 + }, + "combine": { + "p50": 69.76000219583511, + "p90": 85.05599945783615, + "p95": 93.88799965381622, + "p99": 128.60800325870514 + }, + "roundtrip": { + "p50": 125.56800246238708, + "p90": 148.70400726795197, + "p95": 165.92000424861908, + "p99": 200.3519982099533 + }, + "isolatedSum": { + "p50": 152.73600071668625, + "p90": 188.76799941062927, + "p95": 206.04799687862396, + "p99": 263.8400048017502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.39200168848038, + "p90": 109.27999764680862, + "p95": 120.28799951076508, + "p99": 153.18399667739868 + }, + "combine": { + "p50": 69.50400024652481, + "p90": 82.87999778985977, + "p95": 90.27200192213058, + "p99": 100.89600086212158 + }, + "roundtrip": { + "p50": 128.67200374603271, + "p90": 153.53600680828094, + "p95": 162.62400150299072, + "p99": 190.65600633621216 + }, + "isolatedSum": { + "p50": 152.8960019350052, + "p90": 192.1599954366684, + "p95": 210.56000143289566, + "p99": 254.07999753952026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b2e52442", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_06544e53", + "comparisonKey": "57040e121807e028", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:47.649756+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272031884", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272031884", + "createdAt": "2026-06-27T00:02:47.649756+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.15999746322632, + "p90": 100.60799866914749, + "p95": 110.72000116109848, + "p99": 138.75199854373932 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 72.86400347948074, + "p95": 79.3600007891655, + "p99": 86.11200004816055 + }, + "roundtrip": { + "p50": 116.92799627780914, + "p90": 150.2079963684082, + "p95": 158.6879938840866, + "p99": 184.83200669288635 + }, + "isolatedSum": { + "p50": 128.86399775743484, + "p90": 173.47200214862823, + "p95": 190.08000195026398, + "p99": 224.86399859189987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.29599899053574, + "p90": 104.54399883747101, + "p95": 113.8560026884079, + "p99": 152.99199521541595 + }, + "combine": { + "p50": 61.983998864889145, + "p90": 78.97599786520004, + "p95": 83.5840031504631, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 116.5120005607605, + "p90": 149.88799393177032, + "p95": 163.71199488639832, + "p99": 195.45599818229675 + }, + "isolatedSum": { + "p50": 133.27999785542488, + "p90": 183.51999670267105, + "p95": 197.440005838871, + "p99": 251.48799270391464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.4480003118515, + "p90": 95.04000097513199, + "p95": 104.63999956846237, + "p99": 125.40799379348755 + }, + "combine": { + "p50": 61.664000153541565, + "p90": 73.02399724721909, + "p95": 81.82399719953537, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 117.08799749612808, + "p90": 144.41600441932678, + "p95": 157.72800147533417, + "p99": 314.88001346588135 + }, + "isolatedSum": { + "p50": 134.11200046539307, + "p90": 168.06399822235107, + "p95": 186.46399676799774, + "p99": 224.99199211597443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.95199686288834, + "p90": 99.2640033364296, + "p95": 105.8880016207695, + "p99": 122.27199971675873 + }, + "combine": { + "p50": 62.6240000128746, + "p90": 84.25600081682205, + "p95": 90.11200070381165, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 116.15999788045883, + "p90": 150.36800503730774, + "p95": 161.69600188732147, + "p99": 189.08800184726715 + }, + "isolatedSum": { + "p50": 136.57599687576294, + "p90": 183.52000415325165, + "p95": 196.00000232458115, + "p99": 225.0560000538826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.87199658155441, + "p90": 96.70399874448776, + "p95": 106.01600259542465, + "p99": 122.94399738311768 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 78.65600287914276, + "p95": 84.73599702119827, + "p99": 96.6079980134964 + }, + "roundtrip": { + "p50": 117.15199798345566, + "p90": 145.11999487876892, + "p95": 153.47200632095337, + "p99": 190.75199961662292 + }, + "isolatedSum": { + "p50": 138.8159990310669, + "p90": 175.36000162363052, + "p95": 190.75199961662292, + "p99": 219.55199539661407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.82399654388428, + "p90": 99.32799637317657, + "p95": 108.22399705648422, + "p99": 131.52000308036804 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 80.35200089216232, + "p95": 87.74399757385254, + "p99": 170.23999989032745 + }, + "roundtrip": { + "p50": 119.90399658679962, + "p90": 146.7519998550415, + "p95": 154.4959992170334, + "p99": 167.4879938364029 + }, + "isolatedSum": { + "p50": 144.15999501943588, + "p90": 179.6799972653389, + "p95": 195.96799463033676, + "p99": 301.7600029706955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.39200168848038, + "p90": 107.93600231409073, + "p95": 117.47200042009354, + "p99": 157.82399475574493 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 82.36800134181976, + "p95": 89.59999680519104, + "p99": 102.7199998497963 + }, + "roundtrip": { + "p50": 127.51999497413635, + "p90": 154.7199934720993, + "p95": 170.04799842834473, + "p99": 201.27999782562256 + }, + "isolatedSum": { + "p50": 153.56799960136414, + "p90": 190.3040036559105, + "p95": 207.07199722528458, + "p99": 260.54399460554123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.19200229644775, + "p90": 114.04799669981003, + "p95": 123.83999675512314, + "p99": 167.4560010433197 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 95.87199985980988, + "p95": 99.93600100278854, + "p99": 113.92000317573547 + }, + "roundtrip": { + "p50": 156.3200056552887, + "p90": 175.64800381660461, + "p95": 185.56800484657288, + "p99": 221.15199267864227 + }, + "isolatedSum": { + "p50": 180.67200481891632, + "p90": 209.9199965596199, + "p95": 223.77599775791168, + "p99": 281.3760042190552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9febd1e2", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_9779cb2d", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:12:58.540972+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272345418", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272345418", + "createdAt": "2026-06-27T00:12:58.540972+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.97599720954895, + "p90": 90.91199934482574, + "p95": 99.32799637317657, + "p99": 128.83199751377106 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 80.1599994301796, + "p95": 89.21600133180618, + "p99": 107.07200318574905 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 145.6959992647171, + "p95": 153.31199765205383, + "p99": 184.54399704933167 + }, + "isolatedSum": { + "p50": 145.24799585342407, + "p90": 171.07199877500534, + "p95": 188.54399770498276, + "p99": 235.9040006995201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.26400071382523, + "p90": 95.93600034713745, + "p95": 101.05600208044052, + "p99": 118.65600198507309 + }, + "combine": { + "p50": 78.8159966468811, + "p90": 86.75199747085571, + "p95": 92.03200042247772, + "p99": 111.84000223875046 + }, + "roundtrip": { + "p50": 139.13600146770477, + "p90": 150.68799257278442, + "p95": 155.20000457763672, + "p99": 181.05599284172058 + }, + "isolatedSum": { + "p50": 162.07999736070633, + "p90": 182.68799781799316, + "p95": 193.08800250291824, + "p99": 230.49600422382355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.95999783277512, + "p90": 135.51999628543854, + "p95": 140.54399728775024, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 118.30399930477142, + "p90": 126.0479986667633, + "p95": 131.00799918174744, + "p99": 152.5759994983673 + }, + "roundtrip": { + "p50": 222.27199375629425, + "p90": 233.5679978132248, + "p95": 239.3600046634674, + "p99": 254.55999374389648 + }, + "isolatedSum": { + "p50": 243.26399713754654, + "p90": 261.56799495220184, + "p95": 271.5519964694977, + "p99": 305.9519976377487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5a9f57f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_9479c674", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:12.398873+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272348704", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272348704", + "createdAt": "2026-06-27T00:13:12.398873+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.87199658155441, + "p90": 93.59999746084213, + "p95": 100.19200295209885, + "p99": 114.56000059843063 + }, + "combine": { + "p50": 71.35999947786331, + "p90": 79.64800298213959, + "p95": 85.63199639320374, + "p99": 97.79199957847595 + }, + "roundtrip": { + "p50": 129.2160004377365, + "p90": 148.5760062932968, + "p95": 158.84800255298615, + "p99": 188.22400271892548 + }, + "isolatedSum": { + "p50": 147.23199605941772, + "p90": 173.24800044298172, + "p95": 185.82399934530258, + "p99": 212.35200017690659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.0960002541542, + "p90": 110.78400164842606, + "p95": 121.72800302505493, + "p99": 175.61599612236023 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 92.3520028591156, + "p95": 98.88000041246414, + "p99": 121.34400010108948 + }, + "roundtrip": { + "p50": 141.37600362300873, + "p90": 164.19200599193573, + "p95": 172.95999825000763, + "p99": 193.7599927186966 + }, + "isolatedSum": { + "p50": 168.8000038266182, + "p90": 203.13600450754166, + "p95": 220.60800343751907, + "p99": 296.9599962234497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.92800569534302, + "p90": 140.3840035200119, + "p95": 146.65600657463074, + "p99": 171.10399901866913 + }, + "combine": { + "p50": 120.28799951076508, + "p90": 132.38400220870972, + "p95": 136.76799833774567, + "p99": 159.36000645160675 + }, + "roundtrip": { + "p50": 224.2880016565323, + "p90": 240.1919960975647, + "p95": 248.1279969215393, + "p99": 276.8320143222809 + }, + "isolatedSum": { + "p50": 249.2160052061081, + "p90": 272.7680057287216, + "p95": 283.4240049123764, + "p99": 330.4640054702759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13ab64c2", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_9579c807", + "comparisonKey": "65013819dd1ccf9e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:13:19.903361+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272352256", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272352256", + "createdAt": "2026-06-27T00:13:19.903361+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.40000027418137, + "p90": 94.7519987821579, + "p95": 101.9200012087822, + "p99": 123.36000055074692 + }, + "combine": { + "p50": 70.20799815654755, + "p90": 82.17599987983704, + "p95": 89.37600255012512, + "p99": 105.56799918413162 + }, + "roundtrip": { + "p50": 125.34399330615997, + "p90": 150.04800260066986, + "p95": 162.6559942960739, + "p99": 177.88800597190857 + }, + "isolatedSum": { + "p50": 144.6079984307289, + "p90": 176.92799866199493, + "p95": 191.29600375890732, + "p99": 228.92799973487854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.46400272846222, + "p90": 103.00800204277039, + "p95": 111.7440015077591, + "p99": 129.95199859142303 + }, + "combine": { + "p50": 79.26400005817413, + "p90": 90.97599983215332, + "p95": 96.47999703884125, + "p99": 115.9679964184761 + }, + "roundtrip": { + "p50": 139.8400068283081, + "p90": 156.6080003976822, + "p95": 163.96799683570862, + "p99": 176.35199427604675 + }, + "isolatedSum": { + "p50": 165.72800278663635, + "p90": 193.9840018749237, + "p95": 208.22399854660034, + "p99": 245.91999500989914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.83199685811996, + "p90": 138.59200477600098, + "p95": 144.44799721240997, + "p99": 233.88800024986267 + }, + "combine": { + "p50": 119.07199770212173, + "p90": 130.8159977197647, + "p95": 139.71200585365295, + "p99": 152.5759994983673 + }, + "roundtrip": { + "p50": 222.24000096321106, + "p90": 239.84000086784363, + "p95": 250.65600872039795, + "p99": 283.4239900112152 + }, + "isolatedSum": { + "p50": 243.9039945602417, + "p90": 269.4080024957657, + "p95": 284.1600030660629, + "p99": 386.46399974823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c6f809c", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", + "colorKey": "h200_189562cd", + "comparisonKey": "6b812f29e2dcdef6", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:16.217396+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2ad5ef98d328fa1", + "workloadId": "set:4:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271859196", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271859196", + "createdAt": "2026-06-26T23:57:16.217396+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.71200215816498, + "p90": 98.30400347709656, + "p95": 109.69600081443787, + "p99": 295.48799991607666 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 82.07999914884567, + "p95": 88.16000074148178, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 153.24799716472626, + "p95": 161.53599321842194, + "p99": 211.16800606250763 + }, + "isolatedSum": { + "p50": 139.39200341701508, + "p90": 180.38400262594223, + "p95": 197.85600155591965, + "p99": 405.5360034108162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.17599856853485, + "p90": 98.08000177145004, + "p95": 108.0000028014183, + "p99": 146.14400267601013 + }, + "combine": { + "p50": 69.63200122117996, + "p90": 83.13599973917007, + "p95": 89.02399986982346, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 125.40799379348755, + "p90": 153.50399911403656, + "p95": 165.12000560760498, + "p99": 192.83199310302734 + }, + "isolatedSum": { + "p50": 143.8079997897148, + "p90": 181.21600151062012, + "p95": 197.02400267124176, + "p99": 249.34400618076324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.02399855852127, + "p90": 105.76000064611435, + "p95": 114.46399986743927, + "p99": 129.72800433635712 + }, + "combine": { + "p50": 77.2159993648529, + "p90": 89.34400230646133, + "p95": 95.8079993724823, + "p99": 114.97599631547928 + }, + "roundtrip": { + "p50": 137.472003698349, + "p90": 158.91200304031372, + "p95": 166.20799899101257, + "p99": 185.08799374103546 + }, + "isolatedSum": { + "p50": 158.23999792337418, + "p90": 195.10400295257568, + "p95": 210.27199923992157, + "p99": 244.7040006518364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.25599992275238, + "p90": 137.02400028705597, + "p95": 144.51199769973755, + "p99": 166.6879951953888 + }, + "combine": { + "p50": 118.30399930477142, + "p90": 130.14400005340576, + "p95": 135.71199774742126, + "p99": 157.6319932937622 + }, + "roundtrip": { + "p50": 220.06399929523468, + "p90": 239.42400515079498, + "p95": 246.17600440979004, + "p99": 313.6639893054962 + }, + "isolatedSum": { + "p50": 242.5599992275238, + "p90": 267.16800034046173, + "p95": 280.2239954471588, + "p99": 324.319988489151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13c27f2d", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_189562cd", + "comparisonKey": "6b812f29e2dcdef6", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:10.730241+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": "set:8:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272100552", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272100552", + "createdAt": "2026-06-27T00:05:10.730241+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.14399832487106, + "p90": 98.84800016880035, + "p95": 106.36799782514572, + "p99": 130.46400249004364 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 80.19199967384338, + "p95": 86.30400151014328, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 122.17599898576736, + "p90": 154.4319987297058, + "p95": 165.98400473594666, + "p99": 216.44799411296844 + }, + "isolatedSum": { + "p50": 142.30399578809738, + "p90": 179.03999984264374, + "p95": 192.671999335289, + "p99": 229.63200509548187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.19199901819229, + "p90": 103.5199984908104, + "p95": 114.3679991364479, + "p99": 145.9520012140274 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 83.96799862384796, + "p95": 90.11200070381165, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 125.02400577068329, + "p90": 152.3520052433014, + "p95": 163.58399391174316, + "p99": 191.16799533367157 + }, + "isolatedSum": { + "p50": 145.4719975590706, + "p90": 187.48799711465836, + "p95": 204.47999984025955, + "p99": 245.728000998497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 79.16799932718277, + "p90": 122.56000190973282, + "p95": 143.8719928264618, + "p99": 228.03199291229248 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 85.1840004324913, + "p95": 89.9519994854927, + "p99": 98.4639972448349 + }, + "roundtrip": { + "p50": 130.0159990787506, + "p90": 166.17600619792938, + "p95": 180.80000579357147, + "p99": 225.63199698925018 + }, + "isolatedSum": { + "p50": 149.21599626541138, + "p90": 207.74400234222412, + "p95": 233.8239923119545, + "p99": 326.4959901571274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.52000135183334, + "p90": 99.71199929714203, + "p95": 106.62399977445602, + "p99": 121.24799937009811 + }, + "combine": { + "p50": 70.592001080513, + "p90": 88.19200098514557, + "p95": 93.31200271844864, + "p99": 122.49600142240524 + }, + "roundtrip": { + "p50": 127.29600071907043, + "p90": 156.44800662994385, + "p95": 164.2879992723465, + "p99": 200.76799392700195 + }, + "isolatedSum": { + "p50": 146.11200243234634, + "p90": 187.9040002822876, + "p95": 199.93600249290466, + "p99": 243.74400079250336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.42400062084198, + "p90": 99.32799637317657, + "p95": 107.16799646615982, + "p99": 116.44800007343292 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 89.59999680519104, + "p95": 95.551997423172, + "p99": 149.1200029850006 + }, + "roundtrip": { + "p50": 129.5360028743744, + "p90": 163.42400014400482, + "p95": 173.18400740623474, + "p99": 210.36800742149353 + }, + "isolatedSum": { + "p50": 148.12800288200378, + "p90": 188.92799317836761, + "p95": 202.71999388933182, + "p99": 265.56800305843353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.89599823951721, + "p90": 109.31199789047241, + "p95": 117.15199798345566, + "p99": 152.92799472808838 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 95.36000341176987, + "p95": 99.10400211811066, + "p99": 120.06399780511856 + }, + "roundtrip": { + "p50": 140.73599874973297, + "p90": 167.29600727558136, + "p95": 174.01599884033203, + "p99": 211.07199788093567 + }, + "isolatedSum": { + "p50": 163.64800184965134, + "p90": 204.67200130224228, + "p95": 216.25600010156631, + "p99": 272.99199253320694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.91200065612793, + "p90": 116.19199812412262, + "p95": 121.31199985742569, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 91.36000275611877, + "p90": 105.50399869680405, + "p95": 109.92000252008438, + "p99": 130.65600395202637 + }, + "roundtrip": { + "p50": 168.7999963760376, + "p90": 190.8479928970337, + "p95": 195.23200392723083, + "p99": 233.69599878787994 + }, + "isolatedSum": { + "p50": 190.2720034122467, + "p90": 221.69599682092667, + "p95": 231.23200237751007, + "p99": 277.50399708747864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.72799623012543, + "p90": 143.16800236701965, + "p95": 147.90399372577667, + "p99": 170.71999609470367 + }, + "combine": { + "p50": 120.06399780511856, + "p90": 136.48000359535217, + "p95": 141.9840008020401, + "p99": 148.44800531864166 + }, + "roundtrip": { + "p50": 224.09600019454956, + "p90": 247.8400021791458, + "p95": 254.68799471855164, + "p99": 276.38399600982666 + }, + "isolatedSum": { + "p50": 245.791994035244, + "p90": 279.6480059623718, + "p95": 289.8879945278168, + "p99": 319.16800141334534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c4fd916e", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_80a72891", + "comparisonKey": "abe9d0af26c5a0c0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:13.797855+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": "set:8:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272103776", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272103776", + "createdAt": "2026-06-27T00:05:13.797855+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.86400347948074, + "p90": 99.10400211811066, + "p95": 107.35999792814255, + "p99": 136.48000359535217 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 82.30400085449219, + "p95": 87.55200356245041, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 150.62400698661804, + "p95": 160.76800227165222, + "p99": 204.8639953136444 + }, + "isolatedSum": { + "p50": 140.73600620031357, + "p90": 181.40800297260284, + "p95": 194.91200149059296, + "p99": 228.60800474882126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.43200051784515, + "p90": 101.34399682283401, + "p95": 109.66400057077408, + "p99": 138.43199610710144 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 80.76799660921097, + "p95": 85.37600189447403, + "p99": 95.13600170612335 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 151.67999267578125, + "p95": 162.23999857902527, + "p99": 191.64800643920898 + }, + "isolatedSum": { + "p50": 142.33600348234177, + "p90": 182.11199343204498, + "p95": 195.0400024652481, + "p99": 233.5679978132248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.36800003051758, + "p90": 106.30399733781815, + "p95": 112.8000020980835, + "p99": 133.34399461746216 + }, + "combine": { + "p50": 69.31199878454208, + "p90": 85.75999736785889, + "p95": 93.05600076913834, + "p99": 108.41599851846695 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 152.16000378131866, + "p95": 162.33600676059723, + "p99": 187.80800700187683 + }, + "isolatedSum": { + "p50": 143.67999881505966, + "p90": 192.06399470567703, + "p95": 205.85600286722183, + "p99": 241.7599931359291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.72800260782242, + "p90": 94.94400024414062, + "p95": 102.30399668216705, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 68.44799965620041, + "p90": 81.91999793052673, + "p95": 88.03199976682663, + "p99": 102.52799838781357 + }, + "roundtrip": { + "p50": 124.22399967908859, + "p90": 154.14400398731232, + "p95": 164.60800170898438, + "p99": 177.44000256061554 + }, + "isolatedSum": { + "p50": 142.17600226402283, + "p90": 176.86399817466736, + "p95": 190.33599644899368, + "p99": 223.80799800157547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.12799787521362, + "p90": 98.55999797582626, + "p95": 106.01600259542465, + "p99": 130.62399625778198 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 83.29600095748901, + "p95": 89.28000181913376, + "p99": 106.75200074911118 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 149.63200688362122, + "p95": 158.4639996290207, + "p99": 176.54399573802948 + }, + "isolatedSum": { + "p50": 142.04800128936768, + "p90": 181.85599893331528, + "p95": 195.2960044145584, + "p99": 237.37599700689316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.48000252246857, + "p90": 110.75200140476227, + "p95": 119.61600184440613, + "p99": 152.41600573062897 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 91.07200056314468, + "p95": 98.36799651384354, + "p99": 130.17599284648895 + }, + "roundtrip": { + "p50": 134.783998131752, + "p90": 159.04000401496887, + "p95": 166.97600483894348, + "p99": 194.36800479888916 + }, + "isolatedSum": { + "p50": 161.72800213098526, + "p90": 201.82400196790695, + "p95": 217.98399835824966, + "p99": 282.5919985771179 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.02400118112564, + "p90": 121.2799996137619, + "p95": 137.95199990272522, + "p99": 238.87999355793 + }, + "combine": { + "p50": 90.94399958848953, + "p90": 106.97600245475769, + "p95": 113.98400366306305, + "p99": 139.3280029296875 + }, + "roundtrip": { + "p50": 161.05599701404572, + "p90": 182.17599391937256, + "p95": 191.23199582099915, + "p99": 230.27199506759644 + }, + "isolatedSum": { + "p50": 187.96800076961517, + "p90": 228.2560020685196, + "p95": 251.93600356578827, + "p99": 378.2079964876175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.03199690580368, + "p90": 134.39999520778656, + "p95": 142.5279974937439, + "p99": 206.11199736595154 + }, + "combine": { + "p50": 103.04000228643417, + "p90": 118.23999881744385, + "p95": 122.079998254776, + "p99": 137.69599795341492 + }, + "roundtrip": { + "p50": 195.99999487400055, + "p90": 214.33599293231964, + "p95": 224.5440036058426, + "p99": 265.02400636672974 + }, + "isolatedSum": { + "p50": 219.07199919223785, + "p90": 252.6399940252304, + "p95": 264.6079957485199, + "p99": 343.80799531936646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34b2b051", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h200_2a7f12a0", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:14:22.620116+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272386143", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272386143", + "createdAt": "2026-06-27T00:14:22.620116+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 73.15199822187424, + "p90": 92.76799857616425, + "p95": 100.28800368309021, + "p99": 131.58400356769562 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 83.64800363779068, + "p95": 88.92799913883209, + "p99": 102.11200267076492 + }, + "roundtrip": { + "p50": 121.66400253772736, + "p90": 145.37599682807922, + "p95": 157.18400478363037, + "p99": 189.56799805164337 + }, + "isolatedSum": { + "p50": 142.11200177669525, + "p90": 176.41600221395493, + "p95": 189.2160028219223, + "p99": 233.69600623846054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 82.43200182914734, + "p90": 96.28800302743912, + "p95": 103.84000092744827, + "p99": 123.07199835777283 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 86.65599673986435, + "p95": 92.28800237178802, + "p99": 107.84000158309937 + }, + "roundtrip": { + "p50": 134.49600338935852, + "p90": 156.031996011734, + "p95": 167.4879938364029, + "p99": 228.12800109386444 + }, + "isolatedSum": { + "p50": 159.04000401496887, + "p90": 182.94399976730347, + "p95": 196.1280032992363, + "p99": 230.9119999408722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 115.90400338172913, + "p90": 130.49599528312683, + "p95": 136.86400651931763, + "p99": 152.319997549057 + }, + "combine": { + "p50": 108.92800241708755, + "p90": 121.31199985742569, + "p95": 126.8479973077774, + "p99": 144.06399428844452 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 216.5759950876236, + "p95": 222.33599424362183, + "p99": 238.5919988155365 + }, + "isolatedSum": { + "p50": 224.83200579881668, + "p90": 251.80799514055252, + "p95": 263.71200382709503, + "p99": 296.3839918375015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2de6a2af", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_58b5650b", + "comparisonKey": "4dde4e46080a91eb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:14:22.294115+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272382939", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272382939", + "createdAt": "2026-06-27T00:14:22.294115+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.19999891519547, + "p90": 97.18400239944458, + "p95": 107.84000158309937, + "p99": 136.1279934644699 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 80.48000186681747, + "p95": 86.62399649620056, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 122.27199971675873, + "p90": 154.6880006790161, + "p95": 166.97600483894348, + "p99": 202.78400182724 + }, + "isolatedSum": { + "p50": 144.19199526309967, + "p90": 177.66400426626205, + "p95": 194.46399807929993, + "p99": 232.60799050331116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.69599688053131, + "p90": 105.8880016207695, + "p95": 113.63200098276138, + "p99": 147.2959965467453 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 89.85599875450134, + "p95": 95.93600034713745, + "p99": 106.84800148010254 + }, + "roundtrip": { + "p50": 134.62400436401367, + "p90": 154.81600165367126, + "p95": 166.1120057106018, + "p99": 190.0160014629364 + }, + "isolatedSum": { + "p50": 164.09599781036377, + "p90": 195.74400037527084, + "p95": 209.56800132989883, + "p99": 254.14399802684784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.76000261306763, + "p90": 139.13600146770477, + "p95": 149.9200016260147, + "p99": 190.94400107860565 + }, + "combine": { + "p50": 114.88000303506851, + "p90": 121.88799679279327, + "p95": 128.1599998474121, + "p99": 155.61600029468536 + }, + "roundtrip": { + "p50": 208.25600624084473, + "p90": 228.57600450515747, + "p95": 237.37600445747375, + "p99": 271.64798974990845 + }, + "isolatedSum": { + "p50": 232.64000564813614, + "p90": 261.02399826049805, + "p95": 278.0800014734268, + "p99": 346.560001373291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6ff3844b", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_580d7b05", + "comparisonKey": "46ecc7ff5ccb7c5d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:26.011362+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272020269", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272020269", + "createdAt": "2026-06-27T00:02:26.011362+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 76.76800340414047, + "p90": 99.23200309276581, + "p95": 110.59200018644333, + "p99": 139.71200585365295 + }, + "combine": { + "p50": 68.1919977068901, + "p90": 80.09599894285202, + "p95": 84.06399935483932, + "p99": 98.65599870681763 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 143.90400052070618, + "p95": 155.8080017566681, + "p99": 181.5679967403412 + }, + "isolatedSum": { + "p50": 144.96000111103058, + "p90": 179.32800203561783, + "p95": 194.65599954128265, + "p99": 238.36800456047058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.09599828720093, + "p90": 102.55999863147736, + "p95": 112.70400136709213, + "p99": 138.5599970817566 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 79.83999699354172, + "p95": 83.39200168848038, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 125.791996717453, + "p90": 143.96800100803375, + "p95": 156.67200088500977, + "p99": 176.5120029449463 + }, + "isolatedSum": { + "p50": 146.04800194501877, + "p90": 182.39999562501907, + "p95": 196.0960030555725, + "p99": 230.49599677324295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.05599814653397, + "p90": 99.0080013871193, + "p95": 106.6880002617836, + "p99": 139.77600634098053 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 82.49600231647491, + "p95": 85.56800335645676, + "p99": 100.09600222110748 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 161.6320013999939, + "p95": 169.24799978733063, + "p99": 194.43200528621674 + }, + "isolatedSum": { + "p50": 147.10399508476257, + "p90": 181.5040037035942, + "p95": 192.25600361824036, + "p99": 239.872008562088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.34400033950806, + "p90": 90.94399958848953, + "p95": 97.9200005531311, + "p99": 113.18399757146835 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 79.9039974808693, + "p95": 84.06399935483932, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 130.0159990787506, + "p90": 153.08800339698792, + "p95": 165.24800658226013, + "p99": 195.3279972076416 + }, + "isolatedSum": { + "p50": 148.54399859905243, + "p90": 170.84799706935883, + "p95": 181.98399990797043, + "p99": 226.20799392461777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.48800241947174, + "p90": 106.20799660682678, + "p95": 114.78400230407715, + "p99": 256.0960054397583 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 86.17600053548813, + "p95": 91.51999652385712, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 132.9919993877411, + "p90": 166.24000668525696, + "p95": 176.35199427604675, + "p99": 203.5519927740097 + }, + "isolatedSum": { + "p50": 156.47999942302704, + "p90": 192.3839971423149, + "p95": 206.30399882793427, + "p99": 364.9280071258545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.1840010881424, + "p90": 104.51199859380722, + "p95": 112.44799941778183, + "p99": 135.5839967727661 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 87.26400136947632, + "p95": 92.73599833250046, + "p99": 111.32799834012985 + }, + "roundtrip": { + "p50": 139.90400731563568, + "p90": 159.2639982700348, + "p95": 169.3439930677414, + "p99": 189.02400135993958 + }, + "isolatedSum": { + "p50": 168.5440018773079, + "p90": 191.77599996328354, + "p95": 205.1839977502823, + "p99": 246.91199511289597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.2080027461052, + "p90": 113.40799927711487, + "p95": 119.99999731779099, + "p99": 140.19200205802917 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 98.7199991941452, + "p95": 102.7199998497963, + "p99": 111.455999314785 + }, + "roundtrip": { + "p50": 162.7199947834015, + "p90": 182.0800006389618, + "p95": 189.60000574588776, + "p99": 210.4640007019043 + }, + "isolatedSum": { + "p50": 187.32800334692, + "p90": 212.12799847126007, + "p95": 222.71999716758728, + "p99": 251.64800137281418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.46400117874146, + "p90": 136.51199638843536, + "p95": 143.64799857139587, + "p99": 156.41599893569946 + }, + "combine": { + "p50": 106.33599758148193, + "p90": 117.91999638080597, + "p95": 122.079998254776, + "p99": 132.09599256515503 + }, + "roundtrip": { + "p50": 200.15999674797058, + "p90": 217.72800385951996, + "p95": 223.29600155353546, + "p99": 246.87999486923218 + }, + "isolatedSum": { + "p50": 228.7999987602234, + "p90": 254.43199276924133, + "p95": 265.7279968261719, + "p99": 288.5119915008545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f68ea439", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_b6aa6110", + "comparisonKey": "5971fba5c9d29fa7", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:10.278228+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272042133", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272042133", + "createdAt": "2026-06-27T00:03:10.278228+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.68000191450119, + "p90": 93.44000369310379, + "p95": 102.68799960613251, + "p99": 140.1599943637848 + }, + "combine": { + "p50": 67.4239993095398, + "p90": 79.45600152015686, + "p95": 86.496002972126, + "p99": 106.01600259542465 + }, + "roundtrip": { + "p50": 119.4240003824234, + "p90": 146.59200608730316, + "p95": 155.07200360298157, + "p99": 181.34400248527527 + }, + "isolatedSum": { + "p50": 139.10400122404099, + "p90": 172.89600521326065, + "p95": 189.18400257825851, + "p99": 246.17599695920944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.69600236415863, + "p90": 100.92800110578537, + "p95": 109.66400057077408, + "p99": 146.04799449443817 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 80.76799660921097, + "p95": 85.69599688053131, + "p99": 152.8320014476776 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 147.77599275112152, + "p95": 155.71199357509613, + "p99": 193.7599927186966 + }, + "isolatedSum": { + "p50": 141.9840008020401, + "p90": 181.69599771499634, + "p95": 195.3599974513054, + "p99": 298.8799959421158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.73600250482559, + "p90": 96.12800180912018, + "p95": 103.90400141477585, + "p99": 168.06399822235107 + }, + "combine": { + "p50": 66.91200286149979, + "p90": 78.65600287914276, + "p95": 82.2720006108284, + "p99": 94.71999853849411 + }, + "roundtrip": { + "p50": 118.9119964838028, + "p90": 143.8080072402954, + "p95": 155.71199357509613, + "p99": 209.6959948539734 + }, + "isolatedSum": { + "p50": 139.64800536632538, + "p90": 174.78400468826294, + "p95": 186.17600202560425, + "p99": 262.7839967608452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.56800138950348, + "p90": 93.82399916648865, + "p95": 101.47199779748917, + "p99": 132.7359974384308 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 79.6160027384758, + "p95": 83.23200047016144, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 119.26399916410446, + "p90": 145.24799585342407, + "p95": 154.4959992170334, + "p99": 191.71200692653656 + }, + "isolatedSum": { + "p50": 141.24800264835358, + "p90": 173.44000190496445, + "p95": 184.7039982676506, + "p99": 233.95200073719025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.66400277614594, + "p90": 104.12800312042236, + "p95": 114.30399864912033, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 84.32000130414963, + "p95": 90.7519981265068, + "p99": 122.27199971675873 + }, + "roundtrip": { + "p50": 125.95200538635254, + "p90": 157.151997089386, + "p95": 166.81599617004395, + "p99": 207.23199844360352 + }, + "isolatedSum": { + "p50": 148.51200580596924, + "p90": 188.448004424572, + "p95": 205.05599677562714, + "p99": 262.91200518608093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.94399827718735, + "p90": 101.56799852848053, + "p95": 110.88000237941742, + "p99": 162.11199760437012 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 87.67999708652496, + "p95": 90.68799763917923, + "p99": 98.33600372076035 + }, + "roundtrip": { + "p50": 135.71199774742126, + "p90": 155.20000457763672, + "p95": 165.6000018119812, + "p99": 222.27199375629425 + }, + "isolatedSum": { + "p50": 159.2639982700348, + "p90": 189.2479956150055, + "p95": 201.56800001859665, + "p99": 260.44800132513046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.76799923181534, + "p90": 109.11999642848969, + "p95": 116.5120005607605, + "p99": 174.01599884033203 + }, + "combine": { + "p50": 86.17600053548813, + "p90": 97.31200337409973, + "p95": 103.07200253009796, + "p99": 120.64000219106674 + }, + "roundtrip": { + "p50": 160.67199409008026, + "p90": 175.61599612236023, + "p95": 181.40800297260284, + "p99": 218.9439982175827 + }, + "isolatedSum": { + "p50": 182.94399976730347, + "p90": 206.43199980258942, + "p95": 219.58400309085846, + "p99": 294.6560010313988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.3360002040863, + "p90": 132.47999548912048, + "p95": 135.51999628543854, + "p99": 155.90399503707886 + }, + "combine": { + "p50": 112.86400258541107, + "p90": 121.8239963054657, + "p95": 126.62400305271149, + "p99": 136.76799833774567 + }, + "roundtrip": { + "p50": 214.52799439430237, + "p90": 232.92799293994904, + "p95": 243.42399835586548, + "p99": 306.97599053382874 + }, + "isolatedSum": { + "p50": 235.20000278949738, + "p90": 254.30399179458618, + "p95": 262.14399933815, + "p99": 292.6719933748245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e42f709", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "h200_b6aa6110", + "comparisonKey": "5971fba5c9d29fa7", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:48.444120+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271844665", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271844665", + "createdAt": "2026-06-26T23:56:48.444120+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.3919997215271, + "p90": 101.1200025677681, + "p95": 115.1999980211258, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 64.4799992442131, + "p90": 82.78399705886841, + "p95": 91.48799628019333, + "p99": 104.67199981212616 + }, + "roundtrip": { + "p50": 117.98399686813354, + "p90": 156.22399747371674, + "p95": 165.3120070695877, + "p99": 193.12000274658203 + }, + "isolatedSum": { + "p50": 135.8719989657402, + "p90": 183.9039996266365, + "p95": 206.68799430131912, + "p99": 249.11999702453613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.08799773454666, + "p90": 95.77599912881851, + "p95": 105.66399991512299, + "p99": 147.32800424098969 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 82.59200304746628, + "p95": 89.02399986982346, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 121.2799996137619, + "p90": 152.63999998569489, + "p95": 167.4560010433197, + "p99": 201.7280012369156 + }, + "isolatedSum": { + "p50": 140.76799899339676, + "p90": 178.3680021762848, + "p95": 194.68799978494644, + "p99": 255.96800446510315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.66400343179703, + "p90": 98.30400347709656, + "p95": 109.98400300741196, + "p99": 134.14399325847626 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 89.21600133180618, + "p95": 95.90400010347366, + "p99": 118.6240017414093 + }, + "roundtrip": { + "p50": 136.00000739097595, + "p90": 157.53600001335144, + "p95": 172.7360039949417, + "p99": 212.25599944591522 + }, + "isolatedSum": { + "p50": 157.98400342464447, + "p90": 187.52000480890274, + "p95": 205.88800311088562, + "p99": 252.76799499988556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.52800166606903, + "p90": 136.6720050573349, + "p95": 148.15999567508698, + "p99": 160.89600324630737 + }, + "combine": { + "p50": 112.03200370073318, + "p90": 125.21600723266602, + "p95": 132.4480026960373, + "p99": 149.02399480342865 + }, + "roundtrip": { + "p50": 211.58400177955627, + "p90": 233.2800030708313, + "p95": 244.159996509552, + "p99": 292.03200340270996 + }, + "isolatedSum": { + "p50": 234.56000536680222, + "p90": 261.8880122900009, + "p95": 280.60799837112427, + "p99": 309.919998049736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1823392", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_c5b3365a", + "comparisonKey": "73e84f1c938d90c0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:44.997855+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272086516", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272086516", + "createdAt": "2026-06-27T00:04:44.997855+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.92800265550613, + "p90": 98.62399846315384, + "p95": 109.27999764680862, + "p99": 182.23999440670013 + }, + "combine": { + "p50": 60.92799827456474, + "p90": 75.42400062084198, + "p95": 80.6720033288002, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 116.57600104808807, + "p90": 152.44799852371216, + "p95": 162.81600296497345, + "p99": 179.51999604701996 + }, + "isolatedSum": { + "p50": 125.85600093007088, + "p90": 174.04799908399582, + "p95": 189.95200097560883, + "p99": 278.78399193286896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 65.50399959087372, + "p90": 96.47999703884125, + "p95": 104.86400127410889, + "p99": 137.56799697875977 + }, + "combine": { + "p50": 59.808000922203064, + "p90": 72.83200323581696, + "p95": 78.84799689054489, + "p99": 92.19200164079666 + }, + "roundtrip": { + "p50": 110.97600311040878, + "p90": 140.00000059604645, + "p95": 150.87999403476715, + "p99": 177.72799730300903 + }, + "isolatedSum": { + "p50": 125.31200051307678, + "p90": 169.3120002746582, + "p95": 183.71199816465378, + "p99": 229.75999861955643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.18399846553802, + "p90": 93.63199770450592, + "p95": 108.67200046777725, + "p99": 126.01600587368011 + }, + "combine": { + "p50": 62.20800057053566, + "p90": 70.52800059318542, + "p95": 78.07999849319458, + "p99": 100.51199793815613 + }, + "roundtrip": { + "p50": 116.67200177907944, + "p90": 144.1279947757721, + "p95": 158.91200304031372, + "p99": 186.17600202560425 + }, + "isolatedSum": { + "p50": 135.39199903607368, + "p90": 164.15999829769135, + "p95": 186.75199896097183, + "p99": 226.52800381183624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.64000177383423, + "p90": 98.39999675750732, + "p95": 103.93600165843964, + "p99": 132.28799402713776 + }, + "combine": { + "p50": 60.99199876189232, + "p90": 72.06399738788605, + "p95": 79.52000200748444, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 118.94399672746658, + "p90": 150.30400454998016, + "p95": 160.3199988603592, + "p99": 178.78399789333344 + }, + "isolatedSum": { + "p50": 133.63200053572655, + "p90": 170.46399414539337, + "p95": 183.45600366592407, + "p99": 223.83999079465866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.4480003118515, + "p90": 85.9839990735054, + "p95": 96.99200093746185, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 77.11999863386154, + "p95": 83.74399691820145, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 118.40000003576279, + "p90": 138.11199367046356, + "p95": 145.11999487876892, + "p99": 157.18400478363037 + }, + "isolatedSum": { + "p50": 139.55199718475342, + "p90": 163.10399770736694, + "p95": 180.7359978556633, + "p99": 226.33600234985352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.86399668455124, + "p90": 97.31200337409973, + "p95": 106.36799782514572, + "p99": 120.25599926710129 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 82.78399705886841, + "p95": 87.80799806118011, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 152.63999998569489, + "p95": 163.10399770736694, + "p99": 197.37599790096283 + }, + "isolatedSum": { + "p50": 146.33599668741226, + "p90": 180.09600043296814, + "p95": 194.17599588632584, + "p99": 223.23200106620789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.8079993724823, + "p90": 111.68000102043152, + "p95": 120.99199742078781, + "p99": 207.61600136756897 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 93.75999867916107, + "p95": 102.24000364542007, + "p99": 131.1360001564026 + }, + "roundtrip": { + "p50": 155.96799552440643, + "p90": 171.23199999332428, + "p95": 179.9360066652298, + "p99": 195.93599438667297 + }, + "isolatedSum": { + "p50": 177.34400182962418, + "p90": 205.4399996995926, + "p95": 223.23200106620789, + "p99": 338.75200152397156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.49600076675415, + "p90": 133.40799510478973, + "p95": 137.472003698349, + "p99": 168.09600591659546 + }, + "combine": { + "p50": 108.51199924945831, + "p90": 121.37600034475327, + "p95": 125.18399953842163, + "p99": 135.74400544166565 + }, + "roundtrip": { + "p50": 205.76000213623047, + "p90": 222.78399765491486, + "p95": 227.84000635147095, + "p99": 288.2879972457886 + }, + "isolatedSum": { + "p50": 227.00800001621246, + "p90": 254.783995449543, + "p95": 262.65600323677063, + "p99": 303.8400113582611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1cebdc77", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", + "colorKey": "h200_c5b3365a", + "comparisonKey": "73e84f1c938d90c0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:04.169845+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fddabb3277bec", + "workloadId": "set:4:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271852422", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271852422", + "createdAt": "2026-06-26T23:57:04.169845+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.27199864387512, + "p90": 103.64799946546555, + "p95": 125.791996717453, + "p99": 208.15999805927277 + }, + "combine": { + "p50": 61.95199862122536, + "p90": 75.45600086450577, + "p95": 80.6720033288002, + "p99": 99.07200187444687 + }, + "roundtrip": { + "p50": 117.37599968910217, + "p90": 144.83200013637543, + "p95": 152.73599326610565, + "p99": 179.58399653434753 + }, + "isolatedSum": { + "p50": 132.22399726510048, + "p90": 179.1040003299713, + "p95": 206.4640000462532, + "p99": 307.23199993371964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.26399940252304, + "p90": 98.11200201511383, + "p95": 106.175996363163, + "p99": 138.3039951324463 + }, + "combine": { + "p50": 63.90400230884552, + "p90": 78.43200117349625, + "p95": 83.99999886751175, + "p99": 94.11200135946274 + }, + "roundtrip": { + "p50": 119.48800086975098, + "p90": 151.16800367832184, + "p95": 161.53599321842194, + "p99": 214.4320011138916 + }, + "isolatedSum": { + "p50": 139.16800171136856, + "p90": 176.54400318861008, + "p95": 190.17599523067474, + "p99": 232.41599649190903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.17599987983704, + "p90": 105.98400235176086, + "p95": 113.11999708414078, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 88.76799792051315, + "p95": 93.28000247478485, + "p99": 116.57600104808807 + }, + "roundtrip": { + "p50": 134.49600338935852, + "p90": 162.432000041008, + "p95": 173.47200214862823, + "p99": 268.8640058040619 + }, + "isolatedSum": { + "p50": 154.33599799871445, + "p90": 194.75200027227402, + "p95": 206.39999955892563, + "p99": 249.7600018978119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.58400160074234, + "p90": 131.96800649166107, + "p95": 136.3839954137802, + "p99": 154.59200739860535 + }, + "combine": { + "p50": 109.31199789047241, + "p90": 120.67200243473053, + "p95": 125.69600343704224, + "p99": 135.3919953107834 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 222.91199862957, + "p95": 232.86400735378265, + "p99": 284.89598631858826 + }, + "isolatedSum": { + "p50": 228.89599949121475, + "p90": 252.6400089263916, + "p95": 262.07999885082245, + "p99": 289.98400270938873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-78ae7872", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_06aa1194", + "comparisonKey": "85dbd46cb77d1362", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:54.232728+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": "set:8:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272090308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272090308", + "createdAt": "2026-06-27T00:04:54.232728+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.21599805355072, + "p90": 83.55200290679932, + "p95": 92.83199906349182, + "p99": 110.75200140476227 + }, + "combine": { + "p50": 67.45599955320358, + "p90": 76.12799853086472, + "p95": 81.53600245714188, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 122.079998254776, + "p90": 140.4159963130951, + "p95": 148.25600385665894, + "p99": 178.3680021762848 + }, + "isolatedSum": { + "p50": 136.6719976067543, + "p90": 159.68000143766403, + "p95": 174.3680015206337, + "p99": 199.2960050702095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.03199714422226, + "p90": 80.57600259780884, + "p95": 86.40000224113464, + "p99": 97.34400361776352 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 75.13599842786789, + "p95": 79.0719985961914, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 120.7360029220581, + "p90": 138.49599659442902, + "p95": 162.01600432395935, + "p99": 265.21599292755127 + }, + "isolatedSum": { + "p50": 139.64799791574478, + "p90": 155.71200102567673, + "p95": 165.47200083732605, + "p99": 183.74400585889816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.30399954319, + "p90": 86.91199868917465, + "p95": 100.12800246477127, + "p99": 123.48800152540207 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 77.85599678754807, + "p95": 84.70399677753448, + "p99": 112.15999722480774 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 140.25600254535675, + "p95": 151.64799988269806, + "p99": 177.66399681568146 + }, + "isolatedSum": { + "p50": 142.815999686718, + "p90": 164.76799547672272, + "p95": 184.83199924230576, + "p99": 235.6479987502098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.65600222349167, + "p90": 88.28800171613693, + "p95": 97.98400104045868, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 69.56800073385239, + "p90": 78.87999713420868, + "p95": 83.16799998283386, + "p99": 94.84799951314926 + }, + "roundtrip": { + "p50": 126.36800110340118, + "p90": 164.57599401474, + "p95": 172.44799435138702, + "p99": 196.22400403022766 + }, + "isolatedSum": { + "p50": 144.22400295734406, + "p90": 167.1679988503456, + "p95": 181.15200102329254, + "p99": 216.12799912691116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.53600114583969, + "p90": 88.0960002541542, + "p95": 96.03200107812881, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 70.39999961853027, + "p90": 78.91199737787247, + "p95": 86.36800199747086, + "p99": 98.9760011434555 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 143.96800100803375, + "p95": 153.6960005760193, + "p99": 172.8000044822693 + }, + "isolatedSum": { + "p50": 143.93600076436996, + "p90": 167.00799763202667, + "p95": 182.40000307559967, + "p99": 220.2560007572174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.02399855852127, + "p90": 94.71999853849411, + "p95": 106.11200332641602, + "p99": 144.6399986743927 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 88.54400366544724, + "p95": 96.76799923181534, + "p99": 107.00800269842148 + }, + "roundtrip": { + "p50": 135.29600203037262, + "p90": 158.78400206565857, + "p95": 170.84799706935883, + "p99": 241.43999814987183 + }, + "isolatedSum": { + "p50": 157.72800147533417, + "p90": 183.26400220394135, + "p95": 202.88000255823135, + "p99": 251.64800137281418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.96000003814697, + "p90": 105.47199845314026, + "p95": 115.80800265073776, + "p99": 153.56799960136414 + }, + "combine": { + "p50": 86.87999844551086, + "p90": 96.03200107812881, + "p95": 102.33599692583084, + "p99": 112.67200112342834 + }, + "roundtrip": { + "p50": 158.4320068359375, + "p90": 171.26399278640747, + "p95": 179.967999458313, + "p99": 206.43199980258942 + }, + "isolatedSum": { + "p50": 179.83999848365784, + "p90": 201.50399953126907, + "p95": 218.1439995765686, + "p99": 266.2400007247925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.18399822711945, + "p90": 127.68000364303589, + "p95": 131.3599944114685, + "p99": 140.44800400733948 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 113.76000195741653, + "p95": 121.98399752378464, + "p99": 137.28000223636627 + }, + "roundtrip": { + "p50": 196.28800451755524, + "p90": 208.95999670028687, + "p95": 216.5759950876236, + "p99": 241.56799912452698 + }, + "isolatedSum": { + "p50": 221.50399535894394, + "p90": 241.44000560045242, + "p95": 253.34399193525314, + "p99": 277.72800624370575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fa5aaad", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_6a794fcd", + "comparisonKey": "50f5858697d33730", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:36.902996+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272056705", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272056705", + "createdAt": "2026-06-27T00:03:36.902996+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.98399710655212, + "p90": 98.84800016880035, + "p95": 105.98400235176086, + "p99": 125.21600723266602 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 81.66400343179703, + "p95": 86.496002972126, + "p99": 102.88000106811523 + }, + "roundtrip": { + "p50": 119.93599683046341, + "p90": 147.93600142002106, + "p95": 157.53600001335144, + "p99": 168.09600591659546 + }, + "isolatedSum": { + "p50": 142.94400066137314, + "p90": 180.51200360059738, + "p95": 192.48000532388687, + "p99": 228.09600830078125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.08799773454666, + "p90": 88.73599767684937, + "p95": 100.09600222110748, + "p99": 118.20799857378006 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 77.08799839019775, + "p95": 82.84799754619598, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 123.3920007944107, + "p90": 151.296004652977, + "p95": 158.84800255298615, + "p99": 186.27199530601501 + }, + "isolatedSum": { + "p50": 141.4399966597557, + "p90": 165.82399606704712, + "p95": 182.94399976730347, + "p99": 209.82399582862854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.51200145483017, + "p90": 102.88000106811523, + "p95": 109.11999642848969, + "p99": 128.31999361515045 + }, + "combine": { + "p50": 69.82400268316269, + "p90": 81.44000172615051, + "p95": 86.75199747085571, + "p99": 98.04800152778625 + }, + "roundtrip": { + "p50": 126.14400684833527, + "p90": 157.6640009880066, + "p95": 167.84000396728516, + "p99": 190.88000059127808 + }, + "isolatedSum": { + "p50": 146.33600413799286, + "p90": 184.32000279426575, + "p95": 195.8719938993454, + "p99": 226.3679951429367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.9039968252182, + "p90": 97.4079966545105, + "p95": 101.88800096511841, + "p99": 117.60000139474869 + }, + "combine": { + "p50": 70.62400132417679, + "p90": 84.73599702119827, + "p95": 90.11200070381165, + "p99": 107.42399841547012 + }, + "roundtrip": { + "p50": 125.69600343704224, + "p90": 150.751993060112, + "p95": 158.30400586128235, + "p99": 175.4239946603775 + }, + "isolatedSum": { + "p50": 146.527998149395, + "p90": 182.14399367570877, + "p95": 192.00000166893005, + "p99": 225.0239998102188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.60800218582153, + "p90": 93.47199648618698, + "p95": 101.9200012087822, + "p99": 109.82400178909302 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 84.09599959850311, + "p95": 88.32000195980072, + "p99": 100.89600086212158 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 152.96000242233276, + "p95": 160.76800227165222, + "p99": 201.92000269889832 + }, + "isolatedSum": { + "p50": 147.87200093269348, + "p90": 177.5679960846901, + "p95": 190.24000316858292, + "p99": 210.7200026512146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.8719978928566, + "p90": 102.55999863147736, + "p95": 108.92800241708755, + "p99": 121.76000326871872 + }, + "combine": { + "p50": 78.43200117349625, + "p90": 91.839998960495, + "p95": 96.57599776983261, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 160.19199788570404, + "p95": 168.09600591659546, + "p99": 186.14399433135986 + }, + "isolatedSum": { + "p50": 162.30399906635284, + "p90": 194.39999759197235, + "p95": 205.50400018692017, + "p99": 229.88799959421158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.29600358009338, + "p90": 117.79200285673141, + "p95": 125.44000148773193, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 90.14400094747543, + "p90": 102.91200131177902, + "p95": 110.17599701881409, + "p99": 119.35999989509583 + }, + "roundtrip": { + "p50": 166.75199568271637, + "p90": 185.7600063085556, + "p95": 193.02399456501007, + "p99": 220.60799598693848 + }, + "isolatedSum": { + "p50": 189.44000452756882, + "p90": 220.70400416851044, + "p95": 235.61599850654602, + "p99": 273.376002907753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.4800016283989, + "p90": 135.93600690364838, + "p95": 141.184002161026, + "p99": 167.23200678825378 + }, + "combine": { + "p50": 115.68000167608261, + "p90": 127.29600071907043, + "p95": 131.99999928474426, + "p99": 150.78400075435638 + }, + "roundtrip": { + "p50": 216.95999801158905, + "p90": 232.80000686645508, + "p95": 238.27199637889862, + "p99": 261.02399826049805 + }, + "isolatedSum": { + "p50": 240.1600033044815, + "p90": 263.2320076227188, + "p95": 273.18400144577026, + "p99": 318.01600754261017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ffad9f17", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_b2ffaf91", + "comparisonKey": "b3b8e5cc27948267", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:43.326778+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": "set:8:289b7f9c14292e96", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272060649", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272060649", + "createdAt": "2026-06-27T00:03:43.326778+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.9919970035553, + "p90": 96.73599898815155, + "p95": 102.7199998497963, + "p99": 128.83199751377106 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 81.05599880218506, + "p95": 86.40000224113464, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 122.30399996042252, + "p90": 153.85599434375763, + "p95": 167.23200678825378, + "p99": 196.03200256824493 + }, + "isolatedSum": { + "p50": 141.15199446678162, + "p90": 177.7919977903366, + "p95": 189.12000209093094, + "p99": 223.7439975142479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.02399724721909, + "p90": 95.77599912881851, + "p95": 103.74400019645691, + "p99": 121.72800302505493 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 80.73599636554718, + "p95": 87.39200234413147, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 121.34400010108948, + "p90": 149.1840034723282, + "p95": 156.76799416542053, + "p99": 182.36799538135529 + }, + "isolatedSum": { + "p50": 140.83199948072433, + "p90": 176.5119954943657, + "p95": 191.13600254058838, + "p99": 221.18400037288666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.40800017118454, + "p90": 92.54399687051773, + "p95": 101.15200281143188, + "p99": 184.28799510002136 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 82.40000158548355, + "p95": 88.03199976682663, + "p99": 100.44799745082855 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 158.59200060367584, + "p95": 172.2240000963211, + "p99": 259.42400097846985 + }, + "isolatedSum": { + "p50": 141.695998609066, + "p90": 174.94399845600128, + "p95": 189.18400257825851, + "p99": 284.7359925508499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.5600014925003, + "p90": 100.12800246477127, + "p95": 106.78400099277496, + "p99": 138.11199367046356 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 81.28000050783157, + "p95": 86.81599795818329, + "p99": 96.67199850082397 + }, + "roundtrip": { + "p50": 123.23199957609177, + "p90": 151.58399939537048, + "p95": 159.87199544906616, + "p99": 174.6560037136078 + }, + "isolatedSum": { + "p50": 143.64799857139587, + "p90": 181.40800297260284, + "p95": 193.59999895095825, + "p99": 234.78399217128754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.65600222349167, + "p90": 99.32799637317657, + "p95": 105.56799918413162, + "p99": 127.20000743865967 + }, + "combine": { + "p50": 69.88800317049026, + "p90": 83.10399949550629, + "p95": 88.639996945858, + "p99": 99.35999661684036 + }, + "roundtrip": { + "p50": 124.9919980764389, + "p90": 151.48800611495972, + "p95": 159.5200002193451, + "p99": 197.88800179958344 + }, + "isolatedSum": { + "p50": 144.54400539398193, + "p90": 182.43199586868286, + "p95": 194.20799612998962, + "p99": 226.56000405550003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.35200089216232, + "p90": 101.02400183677673, + "p95": 105.6319996714592, + "p99": 116.7680025100708 + }, + "combine": { + "p50": 76.80000364780426, + "p90": 88.86399865150452, + "p95": 94.17600184679031, + "p99": 101.56799852848053 + }, + "roundtrip": { + "p50": 135.04000008106232, + "p90": 155.29599785804749, + "p95": 165.50399363040924, + "p99": 190.43199717998505 + }, + "isolatedSum": { + "p50": 157.15200453996658, + "p90": 189.88800048828125, + "p95": 199.8080015182495, + "p99": 218.33600103855133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.120001912117, + "p90": 111.00800335407257, + "p95": 117.11999773979187, + "p99": 134.39999520778656 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 99.32799637317657, + "p95": 105.6319996714592, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 159.2320054769516, + "p90": 177.2480010986328, + "p95": 184.28799510002136, + "p99": 207.71199464797974 + }, + "isolatedSum": { + "p50": 184.51200425624847, + "p90": 210.33599972724915, + "p95": 222.75199741125107, + "p99": 255.5839940905571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.23199826478958, + "p90": 132.4159950017929, + "p95": 140.47999680042267, + "p99": 171.64799571037292 + }, + "combine": { + "p50": 102.84800082445145, + "p90": 114.07999694347382, + "p95": 119.1679984331131, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 195.90400159358978, + "p90": 210.11200547218323, + "p95": 217.15199947357178, + "p99": 243.74400079250336 + }, + "isolatedSum": { + "p50": 218.07999908924103, + "p90": 246.49599194526672, + "p95": 259.64799523353577, + "p99": 301.2479990720749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49529f9d", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_f2b19f62", + "comparisonKey": "cc27e02aea0a210a", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:04.313162+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:120a8dc1dba92ca9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272072315", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272072315", + "createdAt": "2026-06-27T00:04:04.313162+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.95999675989151, + "p90": 100.73599964380264, + "p95": 110.88000237941742, + "p99": 152.99199521541595 + }, + "combine": { + "p50": 65.2799978852272, + "p90": 80.9599980711937, + "p95": 85.28000116348267, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 155.20000457763672, + "p95": 166.27199947834015, + "p99": 225.11999309062958 + }, + "isolatedSum": { + "p50": 138.2399946451187, + "p90": 181.69599771499634, + "p95": 196.16000354290009, + "p99": 255.16799837350845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.2479989528656, + "p90": 105.47199845314026, + "p95": 114.84800279140472, + "p99": 135.74400544166565 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 79.83999699354172, + "p95": 83.5840031504631, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 119.64800208806992, + "p90": 145.56799829006195, + "p95": 150.91200172901154, + "p99": 165.18400609493256 + }, + "isolatedSum": { + "p50": 140.86399972438812, + "p90": 185.31199544668198, + "p95": 198.43200594186783, + "p99": 228.7360057234764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.79200309515, + "p90": 102.88000106811523, + "p95": 112.0000034570694, + "p99": 131.8719983100891 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 78.8159966468811, + "p95": 83.29600095748901, + "p99": 102.08000242710114 + }, + "roundtrip": { + "p50": 120.60800194740295, + "p90": 144.44799721240997, + "p95": 152.67199277877808, + "p99": 166.59200191497803 + }, + "isolatedSum": { + "p50": 141.60000532865524, + "p90": 181.69599771499634, + "p95": 195.2960044145584, + "p99": 233.95200073719025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.79200309515, + "p90": 97.75999933481216, + "p95": 105.92000186443329, + "p99": 117.69600212574005 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 81.56800270080566, + "p95": 87.39200234413147, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 153.98399531841278, + "p95": 162.78399527072906, + "p99": 199.5519995689392 + }, + "isolatedSum": { + "p50": 141.85599982738495, + "p90": 179.32800203561783, + "p95": 193.31200420856476, + "p99": 222.1440002322197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.44000041484833, + "p90": 97.69599884748459, + "p95": 103.74400019645691, + "p99": 117.15199798345566 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 83.16799998283386, + "p95": 88.51200342178345, + "p99": 98.59199821949005 + }, + "roundtrip": { + "p50": 125.91999769210815, + "p90": 152.0320028066635, + "p95": 167.7439957857132, + "p99": 200.54399967193604 + }, + "isolatedSum": { + "p50": 143.42399686574936, + "p90": 180.86399883031845, + "p95": 192.25600361824036, + "p99": 215.7439962029457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.4400023818016, + "p90": 105.05600273609161, + "p95": 111.93600296974182, + "p99": 135.48800349235535 + }, + "combine": { + "p50": 76.12799853086472, + "p90": 88.60799670219421, + "p95": 92.41600334644318, + "p99": 124.06399846076965 + }, + "roundtrip": { + "p50": 136.4479959011078, + "p90": 159.04000401496887, + "p95": 166.81599617004395, + "p99": 204.12799715995789 + }, + "isolatedSum": { + "p50": 161.56800091266632, + "p90": 193.66399943828583, + "p95": 204.352006316185, + "p99": 259.552001953125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.55199807882309, + "p90": 115.84000289440155, + "p95": 124.79999661445618, + "p99": 159.5200002193451 + }, + "combine": { + "p50": 86.65599673986435, + "p90": 98.68799895048141, + "p95": 104.032002389431, + "p99": 120.28799951076508 + }, + "roundtrip": { + "p50": 162.23999857902527, + "p90": 177.7919977903366, + "p95": 186.62400543689728, + "p99": 207.58399367332458 + }, + "isolatedSum": { + "p50": 186.20799481868744, + "p90": 214.52800184488297, + "p95": 228.83199900388718, + "p99": 279.80799973011017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.16799908876419, + "p90": 138.08000087738037, + "p95": 149.59999918937683, + "p99": 160.35200655460358 + }, + "combine": { + "p50": 112.47999966144562, + "p90": 122.36800044775009, + "p95": 127.45599448680878, + "p99": 136.9280070066452 + }, + "roundtrip": { + "p50": 213.4079933166504, + "p90": 239.16800320148468, + "p95": 253.6959946155548, + "p99": 450.3040015697479 + }, + "isolatedSum": { + "p50": 235.6479987502098, + "p90": 260.44800132513046, + "p95": 277.0559936761856, + "p99": 297.2800135612488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-904f847b", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_bac4102c", + "comparisonKey": "6234055b9069f2f2", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:21.213602+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:120a8dc1dba92ca9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272075655", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272075655", + "createdAt": "2026-06-27T00:04:21.213602+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.99999690055847, + "p90": 99.35999661684036, + "p95": 108.47999900579453, + "p99": 130.8480054140091 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 80.64000308513641, + "p95": 84.44800227880478, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 149.4079977273941, + "p95": 161.24799847602844, + "p99": 199.8080015182495 + }, + "isolatedSum": { + "p50": 139.13599401712418, + "p90": 179.99999970197678, + "p95": 192.9280012845993, + "p99": 238.97600173950195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.98399710655212, + "p90": 101.79200023412704, + "p95": 111.7120012640953, + "p99": 146.33600413799286 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 82.04799890518188, + "p95": 88.73599767684937, + "p99": 105.21599650382996 + }, + "roundtrip": { + "p50": 124.41600114107132, + "p90": 160.0320041179657, + "p95": 172.86400496959686, + "p99": 196.44799828529358 + }, + "isolatedSum": { + "p50": 142.71999895572662, + "p90": 183.83999913930893, + "p95": 200.44799894094467, + "p99": 251.55200064182281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.22399926185608, + "p90": 108.76800119876862, + "p95": 123.1359988451004, + "p99": 148.8640010356903 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 82.14399963617325, + "p95": 88.54400366544724, + "p99": 105.02400249242783 + }, + "roundtrip": { + "p50": 124.25599992275238, + "p90": 160.0320041179657, + "p95": 170.01600563526154, + "p99": 244.89599466323853 + }, + "isolatedSum": { + "p50": 144.96000111103058, + "p90": 190.91200083494186, + "p95": 211.68000251054764, + "p99": 253.88800352811813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.62400197982788, + "p90": 101.21600329875946, + "p95": 111.10399663448334, + "p99": 145.47200500965118 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 84.70399677753448, + "p95": 89.50400352478027, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 125.37600100040436, + "p90": 159.4880074262619, + "p95": 170.1119989156723, + "p99": 203.23200523853302 + }, + "isolatedSum": { + "p50": 143.96800100803375, + "p90": 185.92000007629395, + "p95": 200.6080001592636, + "p99": 249.92000311613083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.73600250482559, + "p90": 97.75999933481216, + "p95": 108.03200304508209, + "p99": 141.9840008020401 + }, + "combine": { + "p50": 70.36799937486649, + "p90": 88.28800171613693, + "p95": 94.68799829483032, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 127.00800597667694, + "p90": 156.12800419330597, + "p95": 166.9439971446991, + "p99": 198.33600521087646 + }, + "isolatedSum": { + "p50": 143.10400187969208, + "p90": 186.0480010509491, + "p95": 202.72000133991241, + "p99": 246.5279996395111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.98399841785431, + "p90": 106.65600001811981, + "p95": 116.22399836778641, + "p99": 165.69599509239197 + }, + "combine": { + "p50": 76.9599974155426, + "p90": 90.87999910116196, + "p95": 97.120001912117, + "p99": 118.23999881744385 + }, + "roundtrip": { + "p50": 135.74400544166565, + "p90": 164.48000073432922, + "p95": 176.70400440692902, + "p99": 220.22399306297302 + }, + "isolatedSum": { + "p50": 158.9439958333969, + "p90": 197.53599911928177, + "p95": 213.3440002799034, + "p99": 283.9359939098358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.21600264310837, + "p90": 125.50400197505951, + "p95": 140.99200069904327, + "p99": 185.85599958896637 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 105.53599894046783, + "p95": 113.0559965968132, + "p99": 125.63200294971466 + }, + "roundtrip": { + "p50": 159.7760021686554, + "p90": 186.65599822998047, + "p95": 201.53599977493286, + "p99": 221.69600427150726 + }, + "isolatedSum": { + "p50": 184.9920004606247, + "p90": 231.04000091552734, + "p95": 254.04799729585648, + "p99": 311.48800253868103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.40000003576279, + "p90": 141.7279988527298, + "p95": 151.296004652977, + "p99": 174.84800517559052 + }, + "combine": { + "p50": 103.74400019645691, + "p90": 121.21599912643433, + "p95": 128.60800325870514, + "p99": 147.13600277900696 + }, + "roundtrip": { + "p50": 198.08000326156616, + "p90": 219.7760045528412, + "p95": 227.55199670791626, + "p99": 265.3760015964508 + }, + "isolatedSum": { + "p50": 222.1440002322197, + "p90": 262.9439979791641, + "p95": 279.90400791168213, + "p99": 321.9840079545975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06bd64b9", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_1eda221e", + "comparisonKey": "00e2c45e1159b581", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:16.896756+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272045914", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272045914", + "createdAt": "2026-06-27T00:03:16.896756+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.15999811887741, + "p90": 99.0080013871193, + "p95": 105.56799918413162, + "p99": 131.80799782276154 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 83.23200047016144, + "p95": 88.8959988951683, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 121.60000205039978, + "p90": 151.8079936504364, + "p95": 162.88000345230103, + "p99": 197.63199985027313 + }, + "isolatedSum": { + "p50": 140.86399972438812, + "p90": 182.24000185728073, + "p95": 194.46399807929993, + "p99": 249.2159977555275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.7600028514862, + "p90": 99.96800124645233, + "p95": 106.97600245475769, + "p99": 125.63200294971466 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 79.52000200748444, + "p95": 84.35200154781342, + "p99": 95.61599791049957 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 150.52799880504608, + "p95": 158.9760035276413, + "p99": 188.51199746131897 + }, + "isolatedSum": { + "p50": 141.34400337934494, + "p90": 179.48800325393677, + "p95": 191.3280040025711, + "p99": 221.24800086021423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.23999905586243, + "p90": 98.36799651384354, + "p95": 105.8880016207695, + "p99": 117.60000139474869 + }, + "combine": { + "p50": 68.57600063085556, + "p90": 81.82399719953537, + "p95": 86.496002972126, + "p99": 94.62399780750275 + }, + "roundtrip": { + "p50": 123.19999933242798, + "p90": 152.92799472808838, + "p95": 164.12800550460815, + "p99": 221.98399901390076 + }, + "isolatedSum": { + "p50": 142.815999686718, + "p90": 180.1919937133789, + "p95": 192.3840045928955, + "p99": 212.22399920225143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.06399804353714, + "p90": 117.8240031003952, + "p95": 132.03200697898865, + "p99": 183.45600366592407 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 85.02399921417236, + "p95": 89.66399729251862, + "p99": 100.3199964761734 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 152.8639942407608, + "p95": 160.96000373363495, + "p99": 184.1920018196106 + }, + "isolatedSum": { + "p50": 145.4399973154068, + "p90": 202.84800231456757, + "p95": 221.69600427150726, + "p99": 283.7760001420975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.67200267314911, + "p90": 107.10400342941284, + "p95": 114.20799791812897, + "p99": 128.9599984884262 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 88.76799792051315, + "p95": 96.00000083446503, + "p99": 114.75200206041336 + }, + "roundtrip": { + "p50": 128.31999361515045, + "p90": 158.6879938840866, + "p95": 168.89600455760956, + "p99": 192.89599359035492 + }, + "isolatedSum": { + "p50": 148.92800152301788, + "p90": 195.872001349926, + "p95": 210.207998752594, + "p99": 243.71200054883957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.39200168848038, + "p90": 103.80800068378448, + "p95": 109.43999886512756, + "p99": 126.71999633312225 + }, + "combine": { + "p50": 77.18399912118912, + "p90": 89.79199826717377, + "p95": 95.10400146245956, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 134.783998131752, + "p90": 157.79200196266174, + "p95": 167.13599860668182, + "p99": 210.94399690628052 + }, + "isolatedSum": { + "p50": 160.5760008096695, + "p90": 193.59999895095825, + "p95": 204.54400032758713, + "p99": 232.70399868488312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.56799721717834, + "p90": 113.63200098276138, + "p95": 120.2239990234375, + "p99": 133.4719955921173 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 101.72799974679947, + "p95": 105.6319996714592, + "p99": 116.48000031709671 + }, + "roundtrip": { + "p50": 157.9200029373169, + "p90": 181.34400248527527, + "p95": 187.42400407791138, + "p99": 211.87199652194977 + }, + "isolatedSum": { + "p50": 179.967999458313, + "p90": 215.36000072956085, + "p95": 225.8559986948967, + "p99": 249.95199590921402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.15199798345566, + "p90": 137.1839940547943, + "p95": 142.33599603176117, + "p99": 165.79200327396393 + }, + "combine": { + "p50": 106.84800148010254, + "p90": 119.32799965143204, + "p95": 122.81599640846252, + "p99": 133.53599607944489 + }, + "roundtrip": { + "p50": 197.56799936294556, + "p90": 213.85599672794342, + "p95": 221.3120013475418, + "p99": 245.37600576877594 + }, + "isolatedSum": { + "p50": 223.9999994635582, + "p90": 256.51199370622635, + "p95": 265.1519924402237, + "p99": 299.3279993534088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d6ef23b", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_c851a534", + "comparisonKey": "6b4f4d7f65293019", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:45.312905+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254392935", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", + "createdAt": "2026-06-26T17:29:45.312905+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.11199808120728, + "p90": 94.11200135946274, + "p95": 104.35199737548828, + "p99": 138.0160003900528 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 78.72000336647034, + "p95": 83.48800241947174, + "p99": 105.72800040245056 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 144.31999623775482, + "p95": 156.3200056552887, + "p99": 193.53599846363068 + }, + "isolatedSum": { + "p50": 142.5279974937439, + "p90": 172.83200472593307, + "p95": 187.83999979496002, + "p99": 243.74400079250336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.33599978685379, + "p90": 99.42399710416794, + "p95": 109.66400057077408, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 83.00799876451492, + "p95": 90.40000289678574, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 122.43200093507767, + "p90": 144.6080058813095, + "p95": 154.62400019168854, + "p99": 173.69599640369415 + }, + "isolatedSum": { + "p50": 144.19200271368027, + "p90": 182.43199586868286, + "p95": 200.06400346755981, + "p99": 246.0480034351349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.97599720954895, + "p90": 95.29600292444229, + "p95": 104.12800312042236, + "p99": 139.74399864673615 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 81.63200318813324, + "p95": 88.22400122880936, + "p99": 119.4240003824234 + }, + "roundtrip": { + "p50": 123.74400347471237, + "p90": 150.36800503730774, + "p95": 160.3199988603592, + "p99": 204.8960030078888 + }, + "isolatedSum": { + "p50": 144.3839967250824, + "p90": 176.92800611257553, + "p95": 192.35200434923172, + "p99": 259.16799902915955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.78400319814682, + "p90": 92.25600212812424, + "p95": 102.91200131177902, + "p99": 123.16799908876419 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 81.95199817419052, + "p95": 87.48800307512283, + "p99": 100.51199793815613 + }, + "roundtrip": { + "p50": 124.03199821710587, + "p90": 147.20000326633453, + "p95": 153.9199948310852, + "p99": 180.00000715255737 + }, + "isolatedSum": { + "p50": 145.31200379133224, + "p90": 174.20800030231476, + "p95": 190.40000438690186, + "p99": 223.67999702692032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.18399846553802, + "p90": 92.83199906349182, + "p95": 103.61599922180176, + "p99": 195.93599438667297 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 86.33600175380707, + "p95": 92.03200042247772, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 129.72800433635712, + "p90": 161.31199896335602, + "p95": 172.86400496959686, + "p99": 215.10399878025055 + }, + "isolatedSum": { + "p50": 144.51199769973755, + "p90": 179.1680008172989, + "p95": 195.64799964427948, + "p99": 316.73599779605865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.2720006108284, + "p90": 100.80000013113022, + "p95": 108.92800241708755, + "p99": 134.88000631332397 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 89.40800279378891, + "p95": 94.97600048780441, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 130.8480054140091, + "p90": 154.33600544929504, + "p95": 164.73600268363953, + "p99": 204.0639966726303 + }, + "isolatedSum": { + "p50": 158.30399841070175, + "p90": 190.20800292491913, + "p95": 203.90400290489197, + "p99": 252.83200293779373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.32800251245499, + "p90": 110.04800349473953, + "p95": 116.86400324106216, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 98.36799651384354, + "p95": 104.70400005578995, + "p99": 124.92799758911133 + }, + "roundtrip": { + "p50": 156.031996011734, + "p90": 173.24799299240112, + "p95": 180.38399517536163, + "p99": 215.39199352264404 + }, + "isolatedSum": { + "p50": 178.6240041255951, + "p90": 208.41600000858307, + "p95": 221.5680032968521, + "p99": 271.7759907245636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.03199690580368, + "p90": 129.7599971294403, + "p95": 136.57599687576294, + "p99": 149.24800395965576 + }, + "combine": { + "p50": 103.42399775981903, + "p90": 116.54400080442429, + "p95": 123.3920007944107, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 192.54399836063385, + "p90": 208.8959962129593, + "p95": 215.64799547195435, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 219.4559946656227, + "p90": 246.3039979338646, + "p95": 259.96799767017365, + "p99": 291.1999970674515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0f126172", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", + "colorKey": "h200_a1e795ec", + "comparisonKey": "467cf4a4daff1cff", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:47.472039+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": "set:8:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254443915", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", + "createdAt": "2026-06-26T17:30:47.472039+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.95199686288834, + "p90": 88.0960002541542, + "p95": 97.24800288677216, + "p99": 108.25599730014801 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 81.60000294446945, + "p95": 87.26400136947632, + "p99": 97.28000313043594 + }, + "roundtrip": { + "p50": 125.2480000257492, + "p90": 149.63200688362122, + "p95": 157.85600244998932, + "p99": 175.04000663757324 + }, + "isolatedSum": { + "p50": 144.86400038003922, + "p90": 169.69600319862366, + "p95": 184.51200425624847, + "p99": 205.53600043058395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.23999905586243, + "p90": 91.00800007581711, + "p95": 98.88000041246414, + "p99": 130.23999333381653 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 79.71200346946716, + "p95": 85.50400286912918, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 123.6800029873848, + "p90": 142.07999408245087, + "p95": 152.99199521541595, + "p99": 184.35199558734894 + }, + "isolatedSum": { + "p50": 144.76799964904785, + "p90": 170.72000354528427, + "p95": 184.38400328159332, + "p99": 236.7039918899536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.03999769687653, + "p90": 97.9200005531311, + "p95": 108.47999900579453, + "p99": 140.09599387645721 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 81.34400099515915, + "p95": 86.496002972126, + "p99": 99.29600358009338 + }, + "roundtrip": { + "p50": 125.69600343704224, + "p90": 151.36000514030457, + "p95": 159.55199301242828, + "p99": 178.3359944820404 + }, + "isolatedSum": { + "p50": 145.1519951224327, + "p90": 179.26400154829025, + "p95": 194.97600197792053, + "p99": 239.3919974565506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.56800138950348, + "p90": 94.17600184679031, + "p95": 102.62399911880493, + "p99": 126.14400684833527 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 82.04799890518188, + "p95": 86.43200248479843, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 125.69600343704224, + "p90": 148.0640023946762, + "p95": 156.76799416542053, + "p99": 182.72000551223755 + }, + "isolatedSum": { + "p50": 144.28800344467163, + "p90": 176.2240007519722, + "p95": 189.05600160360336, + "p99": 222.6240038871765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.46400076150894, + "p90": 90.71999788284302, + "p95": 96.44799679517746, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 84.70399677753448, + "p95": 91.16800129413605, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 129.60000336170197, + "p90": 153.6960005760193, + "p95": 161.3440066576004, + "p99": 196.28800451755524 + }, + "isolatedSum": { + "p50": 150.4959985613823, + "p90": 175.4239946603775, + "p95": 187.6159980893135, + "p99": 216.73599630594254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.20000022649765, + "p90": 100.12800246477127, + "p95": 107.45599865913391, + "p99": 122.3360002040863 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 89.88799899816513, + "p95": 95.36000341176987, + "p99": 100.54399818181992 + }, + "roundtrip": { + "p50": 142.17600226402283, + "p90": 155.45600652694702, + "p95": 165.3439998626709, + "p99": 182.0800006389618 + }, + "isolatedSum": { + "p50": 163.9999970793724, + "p90": 190.0160014629364, + "p95": 202.81600207090378, + "p99": 222.87999838590622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.61599987745285, + "p90": 121.0239976644516, + "p95": 127.07200646400452, + "p99": 148.73600006103516 + }, + "combine": { + "p50": 95.87199985980988, + "p90": 105.3759977221489, + "p95": 112.60800063610077, + "p99": 123.29600006341934 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 191.80800020694733, + "p95": 203.5840004682541, + "p99": 225.98400712013245 + }, + "isolatedSum": { + "p50": 203.48799973726273, + "p90": 226.3999953866005, + "p95": 239.68000710010529, + "p99": 272.0320001244545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.66399705410004, + "p90": 146.04799449443817, + "p95": 152.41600573062897, + "p99": 162.56000101566315 + }, + "combine": { + "p50": 118.52800101041794, + "p90": 127.68000364303589, + "p95": 130.91200590133667, + "p99": 144.67200636863708 + }, + "roundtrip": { + "p50": 225.92000663280487, + "p90": 240.48000574111938, + "p95": 251.3279914855957, + "p99": 700.223982334137 + }, + "isolatedSum": { + "p50": 252.19199806451797, + "p90": 273.72799813747406, + "p95": 283.32801163196564, + "p99": 307.23200738430023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e3ecfeb", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", + "colorKey": "h200_0a93a01f", + "comparisonKey": "c7e35a057338b2fa", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:04.173894+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254452252", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", + "createdAt": "2026-06-26T17:31:04.173894+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.27199929952621, + "p90": 108.83200168609619, + "p95": 118.49600076675415, + "p99": 155.5200070142746 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 84.03199911117554, + "p95": 90.20800143480301, + "p99": 114.88000303506851 + }, + "roundtrip": { + "p50": 123.07199835777283, + "p90": 153.08800339698792, + "p95": 165.8560037612915, + "p99": 205.9199959039688 + }, + "isolatedSum": { + "p50": 142.65599846839905, + "p90": 192.86400079727173, + "p95": 208.70400220155716, + "p99": 270.4000100493431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.95199686288834, + "p90": 97.82399982213974, + "p95": 106.6880002617836, + "p99": 132.9919993877411 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 80.51200211048126, + "p95": 85.37600189447403, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 123.36000055074692, + "p90": 150.176003575325, + "p95": 158.4639996290207, + "p99": 181.63199722766876 + }, + "isolatedSum": { + "p50": 142.59199798107147, + "p90": 178.336001932621, + "p95": 192.06400215625763, + "p99": 231.48799687623978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.5040009021759, + "p90": 95.551997423172, + "p95": 104.86400127410889, + "p99": 123.4240010380745 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 78.46400141716003, + "p95": 84.95999872684479, + "p99": 125.2799928188324 + }, + "roundtrip": { + "p50": 122.78400361537933, + "p90": 150.65599977970123, + "p95": 159.07199680805206, + "p99": 200.51200687885284 + }, + "isolatedSum": { + "p50": 141.31200313568115, + "p90": 174.01599884033203, + "p95": 189.82400000095367, + "p99": 248.7039938569069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.23199915885925, + "p90": 103.32799702882767, + "p95": 111.87200248241425, + "p99": 143.26399564743042 + }, + "combine": { + "p50": 69.60000097751617, + "p90": 85.79199761152267, + "p95": 91.71199798583984, + "p99": 124.12799894809723 + }, + "roundtrip": { + "p50": 126.36800110340118, + "p90": 160.12799739837646, + "p95": 167.64800250530243, + "p99": 193.2159960269928 + }, + "isolatedSum": { + "p50": 144.83200013637543, + "p90": 189.11999464035034, + "p95": 203.5840004682541, + "p99": 267.39199459552765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.40800082683563, + "p90": 104.63999956846237, + "p95": 113.43999952077866, + "p99": 144.0960019826889 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 87.23200112581253, + "p95": 90.94399958848953, + "p99": 101.1200025677681 + }, + "roundtrip": { + "p50": 127.6479959487915, + "p90": 161.85599565505981, + "p95": 175.7120043039322, + "p99": 230.27199506759644 + }, + "isolatedSum": { + "p50": 147.93600142002106, + "p90": 191.8720006942749, + "p95": 204.3839991092682, + "p99": 245.216004550457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.0719992518425, + "p90": 109.50399935245514, + "p95": 115.61600118875504, + "p99": 128.1599998474121 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 91.64799749851227, + "p95": 95.61599791049957, + "p99": 112.73600161075592 + }, + "roundtrip": { + "p50": 132.60799646377563, + "p90": 157.0879966020584, + "p95": 165.0560051202774, + "p99": 194.20799612998962 + }, + "isolatedSum": { + "p50": 160.41599959135056, + "p90": 201.1519968509674, + "p95": 211.2319990992546, + "p99": 240.89600145816803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.15200215578079, + "p90": 111.35999858379364, + "p95": 121.31199985742569, + "p99": 134.8479986190796 + }, + "combine": { + "p50": 87.5839963555336, + "p90": 99.80800002813339, + "p95": 104.06400263309479, + "p99": 116.95999652147293 + }, + "roundtrip": { + "p50": 161.9199961423874, + "p90": 177.72799730300903, + "p95": 184.67199802398682, + "p99": 235.61599850654602 + }, + "isolatedSum": { + "p50": 184.7359985113144, + "p90": 211.16799861192703, + "p95": 225.37600249052048, + "p99": 251.80799514055252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.97599762678146, + "p90": 147.10399508476257, + "p95": 156.25600516796112, + "p99": 183.07200074195862 + }, + "combine": { + "p50": 110.49599945545197, + "p90": 123.87199699878693, + "p95": 129.40800189971924, + "p99": 150.751993060112 + }, + "roundtrip": { + "p50": 208.73600244522095, + "p90": 225.43999552726746, + "p95": 233.024001121521, + "p99": 256.415992975235 + }, + "isolatedSum": { + "p50": 233.47199708223343, + "p90": 270.9759920835495, + "p95": 285.66400706768036, + "p99": 333.8239938020706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9efea369", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", + "colorKey": "h200_993777bf", + "comparisonKey": "cdec001c60a84b85", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:46:59.245966+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255303840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", + "createdAt": "2026-06-26T17:46:59.245966+00:00", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.89600372314453, + "p90": 99.45599734783173, + "p95": 108.73600095510483, + "p99": 128.86400520801544 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 78.3040001988411, + "p95": 82.46400207281113, + "p99": 102.65599936246872 + }, + "roundtrip": { + "p50": 119.32799965143204, + "p90": 147.77599275112152, + "p95": 155.07200360298157, + "p99": 171.03999853134155 + }, + "isolatedSum": { + "p50": 140.0960013270378, + "p90": 177.75999754667282, + "p95": 191.20000302791595, + "p99": 231.52000457048416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.18399846553802, + "p90": 94.27200257778168, + "p95": 104.5759990811348, + "p99": 122.68800288438797 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 81.15199953317642, + "p95": 86.17600053548813, + "p99": 113.3119985461235 + }, + "roundtrip": { + "p50": 120.31999975442886, + "p90": 147.45600521564484, + "p95": 157.82399475574493, + "p99": 190.08000195026398 + }, + "isolatedSum": { + "p50": 141.27999544143677, + "p90": 175.4240021109581, + "p95": 190.75199961662292, + "p99": 236.00000143051147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.62400263547897, + "p90": 130.5920034646988, + "p95": 144.54400539398193, + "p99": 178.847998380661 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 80.51200211048126, + "p95": 87.87199854850769, + "p99": 104.19200360774994 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 154.14400398731232, + "p95": 165.15199840068817, + "p99": 194.68800723552704 + }, + "isolatedSum": { + "p50": 147.71199971437454, + "p90": 211.10400557518005, + "p95": 232.41600394248962, + "p99": 283.04000198841095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.46400076150894, + "p90": 99.39199686050415, + "p95": 109.76000130176544, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 83.64800363779068, + "p95": 90.14400094747543, + "p99": 115.35999923944473 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 155.7759940624237, + "p95": 170.56000232696533, + "p99": 186.91200017929077 + }, + "isolatedSum": { + "p50": 143.23200285434723, + "p90": 183.04000049829483, + "p95": 199.90400224924088, + "p99": 256.00000470876694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.25599950551987, + "p90": 106.9440022110939, + "p95": 120.7360029220581, + "p99": 149.24800395965576 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 85.24800091981888, + "p95": 90.04800021648407, + "p99": 104.5759990811348 + }, + "roundtrip": { + "p50": 129.98400628566742, + "p90": 161.05599701404572, + "p95": 173.8560050725937, + "p99": 205.21600544452667 + }, + "isolatedSum": { + "p50": 146.7840000987053, + "p90": 192.19200313091278, + "p95": 210.78400313854218, + "p99": 253.82400304079056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.91999793052673, + "p90": 99.07200187444687, + "p95": 107.04000294208527, + "p99": 128.57599556446075 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 89.63199704885483, + "p95": 96.54399752616882, + "p99": 106.08000308275223 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 156.76799416542053, + "p95": 167.29600727558136, + "p99": 217.3440009355545 + }, + "isolatedSum": { + "p50": 157.95199573040009, + "p90": 188.7039989233017, + "p95": 203.5840004682541, + "p99": 234.65599864721298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.79200023412704, + "p90": 136.6720050573349, + "p95": 146.36799693107605, + "p99": 175.10400712490082 + }, + "combine": { + "p50": 93.44000369310379, + "p90": 112.76800185441971, + "p95": 117.15199798345566, + "p99": 131.71200454235077 + }, + "roundtrip": { + "p50": 165.43999314308167, + "p90": 204.44799959659576, + "p95": 212.38400042057037, + "p99": 240.03200232982635 + }, + "isolatedSum": { + "p50": 195.23200392723083, + "p90": 249.4400069117546, + "p95": 263.5199949145317, + "p99": 306.8160116672516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.68000167608261, + "p90": 135.29600203037262, + "p95": 142.17600226402283, + "p99": 160.64000129699707 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 118.04799735546112, + "p95": 122.68800288438797, + "p99": 147.64800667762756 + }, + "roundtrip": { + "p50": 194.97600197792053, + "p90": 212.64000236988068, + "p95": 220.19200026988983, + "p99": 234.78400707244873 + }, + "isolatedSum": { + "p50": 220.64000368118286, + "p90": 253.34399938583374, + "p95": 264.8640051484108, + "p99": 308.28800797462463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cee2e19b", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_edd92e38", + "comparisonKey": "4a9eb2a61bfd9462", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:08.901856+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254409438", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", + "createdAt": "2026-06-26T17:30:08.901856+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.97600269317627, + "p90": 86.40000224113464, + "p95": 94.14400160312653, + "p99": 136.9599997997284 + }, + "combine": { + "p50": 69.21599805355072, + "p90": 82.04799890518188, + "p95": 87.20000088214874, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 109.98400300741196, + "p90": 133.08799266815186, + "p95": 140.8960074186325, + "p99": 178.27199399471283 + }, + "isolatedSum": { + "p50": 132.192000746727, + "p90": 168.44800114631653, + "p95": 181.34400248527527, + "p99": 235.45599728822708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 64.38399851322174, + "p90": 88.73599767684937, + "p95": 94.87999975681305, + "p99": 119.48800086975098 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 83.52000266313553, + "p95": 88.95999938249588, + "p99": 107.10400342941284 + }, + "roundtrip": { + "p50": 110.20799726247787, + "p90": 138.2720023393631, + "p95": 145.37599682807922, + "p99": 175.55199563503265 + }, + "isolatedSum": { + "p50": 133.66399705410004, + "p90": 172.2560003399849, + "p95": 183.83999913930893, + "p99": 226.59200429916382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 64.41599875688553, + "p90": 90.52799642086029, + "p95": 101.88800096511841, + "p99": 132.28799402713776 + }, + "combine": { + "p50": 70.62400132417679, + "p90": 85.34400165081024, + "p95": 90.71999788284302, + "p99": 102.27199643850327 + }, + "roundtrip": { + "p50": 113.43999952077866, + "p90": 141.79199934005737, + "p95": 148.22399616241455, + "p99": 183.58400464057922 + }, + "isolatedSum": { + "p50": 135.04000008106232, + "p90": 175.87199807167053, + "p95": 192.60799884796143, + "p99": 234.55999046564102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 63.1679967045784, + "p90": 82.75199681520462, + "p95": 87.96799927949905, + "p99": 107.744000852108 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 85.1840004324913, + "p95": 90.46400338411331, + "p99": 100.99200159311295 + }, + "roundtrip": { + "p50": 112.44799941778183, + "p90": 139.20000195503235, + "p95": 152.38399803638458, + "p99": 206.7520022392273 + }, + "isolatedSum": { + "p50": 133.02399963140488, + "p90": 167.93599724769592, + "p95": 178.43200266361237, + "p99": 208.73600244522095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.35200133919716, + "p90": 85.02399921417236, + "p95": 91.67999774217606, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 86.87999844551086, + "p95": 89.82399851083755, + "p99": 99.35999661684036 + }, + "roundtrip": { + "p50": 116.03199690580368, + "p90": 141.34399592876434, + "p95": 148.3519971370697, + "p99": 184.9920004606247 + }, + "isolatedSum": { + "p50": 130.68800047039986, + "p90": 171.90399765968323, + "p95": 181.5039962530136, + "p99": 210.4959934949875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.74400240182877, + "p90": 90.71999788284302, + "p95": 96.73599898815155, + "p99": 118.23999881744385 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 93.05600076913834, + "p95": 97.69599884748459, + "p99": 108.92800241708755 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 149.05600249767303, + "p95": 159.61599349975586, + "p99": 184.12800133228302 + }, + "isolatedSum": { + "p50": 149.4080051779747, + "p90": 183.77599865198135, + "p95": 194.43199783563614, + "p99": 227.1680012345314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 82.65600353479385, + "p90": 100.3199964761734, + "p95": 109.15199667215347, + "p99": 139.39200341701508 + }, + "combine": { + "p50": 91.45600348711014, + "p90": 106.52799904346466, + "p95": 114.30399864912033, + "p99": 132.22399353981018 + }, + "roundtrip": { + "p50": 147.42399752140045, + "p90": 165.3439998626709, + "p95": 174.20800030231476, + "p99": 198.65599274635315 + }, + "isolatedSum": { + "p50": 174.112007021904, + "p90": 206.84799551963806, + "p95": 223.4559953212738, + "p99": 271.61599695682526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.12000322341919, + "p90": 118.40000003576279, + "p95": 122.81599640846252, + "p99": 147.32800424098969 + }, + "combine": { + "p50": 104.73600029945374, + "p90": 122.11199849843979, + "p95": 126.75200402736664, + "p99": 138.84800672531128 + }, + "roundtrip": { + "p50": 184.38400328159332, + "p90": 200.41599869728088, + "p95": 207.96799659729004, + "p99": 272.44800329208374 + }, + "isolatedSum": { + "p50": 209.85600352287292, + "p90": 240.51199853420258, + "p95": 249.56800043582916, + "p99": 286.17601096630096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a74732f", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_76bb7d5d", + "comparisonKey": "b4a52819ec3c25b8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:31.596673+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271608834", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271608834", + "createdAt": "2026-06-26T23:49:31.596673+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.144000083208084, + "p90": 86.62399649620056, + "p95": 98.49599748849869, + "p99": 125.5359947681427 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 84.41600203514099, + "p95": 92.83199906349182, + "p99": 123.07199835777283 + }, + "roundtrip": { + "p50": 109.31199789047241, + "p90": 135.29600203037262, + "p95": 143.77599954605103, + "p99": 159.84000265598297 + }, + "isolatedSum": { + "p50": 130.68800047039986, + "p90": 171.03999853134155, + "p95": 191.3279965519905, + "p99": 248.60799312591553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.74400109052658, + "p90": 91.26400202512741, + "p95": 99.87200051546097, + "p99": 171.9679981470108 + }, + "combine": { + "p50": 70.81600278615952, + "p90": 194.75199282169342, + "p95": 206.94400370121002, + "p99": 256.9279968738556 + }, + "roundtrip": { + "p50": 110.04800349473953, + "p90": 140.1599943637848, + "p95": 147.13600277900696, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 134.5600038766861, + "p90": 286.01599484682083, + "p95": 306.816004216671, + "p99": 428.8959950208664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 62.94400244951248, + "p90": 80.51200211048126, + "p95": 89.02399986982346, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 79.8719972372055, + "p95": 88.54400366544724, + "p99": 100.54399818181992 + }, + "roundtrip": { + "p50": 111.16799712181091, + "p90": 139.80799913406372, + "p95": 148.41599762439728, + "p99": 167.07199811935425 + }, + "isolatedSum": { + "p50": 131.32800161838531, + "p90": 160.38399934768677, + "p95": 177.5680035352707, + "p99": 211.93599700927734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.88800185918808, + "p90": 83.16799998283386, + "p95": 92.51199662685394, + "p99": 104.06400263309479 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 82.84799754619598, + "p95": 88.639996945858, + "p99": 105.05600273609161 + }, + "roundtrip": { + "p50": 110.84800213575363, + "p90": 140.79999923706055, + "p95": 148.0640023946762, + "p99": 159.2639982700348 + }, + "isolatedSum": { + "p50": 130.560003221035, + "p90": 166.01599752902985, + "p95": 181.15199357271194, + "p99": 209.1200053691864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.84000116586685, + "p90": 82.17599987983704, + "p95": 92.32000261545181, + "p99": 105.92000186443329 + }, + "combine": { + "p50": 69.72800195217133, + "p90": 84.19200032949448, + "p95": 90.68799763917923, + "p99": 106.91200196743011 + }, + "roundtrip": { + "p50": 112.12799698114395, + "p90": 134.62400436401367, + "p95": 145.9839940071106, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 129.56800311803818, + "p90": 166.3680002093315, + "p95": 183.00800025463104, + "p99": 212.8320038318634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.20799815654755, + "p90": 94.08000111579895, + "p95": 101.15200281143188, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 91.2960022687912, + "p95": 97.43999689817429, + "p99": 105.27999699115753 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 148.3519971370697, + "p95": 155.29599785804749, + "p99": 175.135999917984 + }, + "isolatedSum": { + "p50": 146.84800058603287, + "p90": 185.37600338459015, + "p95": 198.59199970960617, + "p99": 223.4559953212738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.43200248479843, + "p90": 99.48799759149551, + "p95": 106.84800148010254, + "p99": 127.42400169372559 + }, + "combine": { + "p50": 85.82399785518646, + "p90": 96.63999825716019, + "p95": 104.76800054311752, + "p99": 113.21599781513214 + }, + "roundtrip": { + "p50": 147.8399932384491, + "p90": 164.5440012216568, + "p95": 169.95200514793396, + "p99": 197.53600656986237 + }, + "isolatedSum": { + "p50": 172.2560003399849, + "p90": 196.1279958486557, + "p95": 211.61600202322006, + "p99": 240.63999950885773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.7760004401207, + "p90": 118.9119964838028, + "p95": 127.16799974441528, + "p99": 134.97599959373474 + }, + "combine": { + "p50": 105.15200346708298, + "p90": 119.00799721479416, + "p95": 124.35200065374374, + "p99": 139.55199718475342 + }, + "roundtrip": { + "p50": 185.2799952030182, + "p90": 201.7280012369156, + "p95": 207.39200711250305, + "p99": 224.95999932289124 + }, + "isolatedSum": { + "p50": 208.92800390720367, + "p90": 237.91999369859695, + "p95": 251.52000039815903, + "p99": 274.52799677848816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-274a06b0", + "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_c9aeae24", + "comparisonKey": "0abd2163f516521c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:44.931546+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271645585", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271645585", + "createdAt": "2026-06-26T23:50:44.931546+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 37.567999213933945, + "p90": 48.0320006608963, + "p95": 52.41600051522255, + "p99": 62.33600154519081 + }, + "combine": { + "p50": 33.663999289274216, + "p90": 44.38399896025658, + "p95": 46.879999339580536, + "p99": 61.85600161552429 + }, + "roundtrip": { + "p50": 51.231998950242996, + "p90": 70.14399766921997, + "p95": 77.31200009584427, + "p99": 100.0640019774437 + }, + "isolatedSum": { + "p50": 71.23199850320816, + "p90": 92.41599962115288, + "p95": 99.29599985480309, + "p99": 124.1920031607151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.88800165057182, + "p90": 49.15200173854828, + "p95": 55.87200075387955, + "p99": 76.89599692821503 + }, + "combine": { + "p50": 32.896000891923904, + "p90": 43.83999854326248, + "p95": 47.07200080156326, + "p99": 67.74400174617767 + }, + "roundtrip": { + "p50": 51.00800096988678, + "p90": 67.9360032081604, + "p95": 74.20799881219864, + "p99": 96.83199971914291 + }, + "isolatedSum": { + "p50": 70.78400254249573, + "p90": 92.99200028181076, + "p95": 102.94400155544281, + "p99": 144.6399986743927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 37.53599897027016, + "p90": 44.95999962091446, + "p95": 51.61599814891815, + "p99": 66.30399823188782 + }, + "combine": { + "p50": 29.791999608278275, + "p90": 39.16800022125244, + "p95": 44.064000248909, + "p99": 53.63199859857559 + }, + "roundtrip": { + "p50": 51.13599821925163, + "p90": 63.519999384880066, + "p95": 71.77600264549255, + "p99": 81.34400099515915 + }, + "isolatedSum": { + "p50": 67.32799857854843, + "p90": 84.1279998421669, + "p95": 95.67999839782715, + "p99": 119.93599683046341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 38.27200084924698, + "p90": 51.32799968123436, + "p95": 57.08799883723259, + "p99": 66.97600334882736 + }, + "combine": { + "p50": 34.623999148607254, + "p90": 44.03200000524521, + "p95": 46.62400111556053, + "p99": 54.55999821424484 + }, + "roundtrip": { + "p50": 55.39200082421303, + "p90": 67.58400052785873, + "p95": 75.42400062084198, + "p99": 95.0080007314682 + }, + "isolatedSum": { + "p50": 72.89599999785423, + "p90": 95.35999968647957, + "p95": 103.71199995279312, + "p99": 121.5360015630722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 38.816001266241074, + "p90": 54.17599901556969, + "p95": 57.72799998521805, + "p99": 75.00799745321274 + }, + "combine": { + "p50": 36.288000643253326, + "p90": 46.01600021123886, + "p95": 48.00000041723251, + "p99": 69.47200000286102 + }, + "roundtrip": { + "p50": 59.967998415231705, + "p90": 73.05599749088287, + "p95": 77.2159993648529, + "p99": 92.12800115346909 + }, + "isolatedSum": { + "p50": 75.1040019094944, + "p90": 100.19199922680855, + "p95": 105.72800040245056, + "p99": 144.47999745607376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.40799930691719, + "p90": 55.32800033688545, + "p95": 60.15999987721443, + "p99": 70.88000327348709 + }, + "combine": { + "p50": 43.87199878692627, + "p90": 53.53600159287453, + "p95": 55.32800033688545, + "p99": 67.9360032081604 + }, + "roundtrip": { + "p50": 72.35199958086014, + "p90": 82.8159973025322, + "p95": 86.01599931716919, + "p99": 98.88000041246414 + }, + "isolatedSum": { + "p50": 89.27999809384346, + "p90": 108.86400192975998, + "p95": 115.48800021409988, + "p99": 138.8160064816475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.296000093221664, + "p90": 66.6240006685257, + "p95": 70.36799937486649, + "p99": 88.16000074148178 + }, + "combine": { + "p50": 59.07199904322624, + "p90": 67.71200150251389, + "p95": 70.43199986219406, + "p99": 79.3600007891655 + }, + "roundtrip": { + "p50": 97.34400361776352, + "p90": 109.3439981341362, + "p95": 115.32799899578094, + "p99": 128.12800705432892 + }, + "isolatedSum": { + "p50": 114.3679991364479, + "p90": 134.33600217103958, + "p95": 140.79999923706055, + "p99": 167.52000153064728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.05599880218506, + "p90": 91.26400202512741, + "p95": 95.77599912881851, + "p99": 104.38399761915207 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 98.36799651384354, + "p95": 102.84800082445145, + "p99": 111.96800321340561 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 162.88000345230103, + "p95": 168.16000640392303, + "p99": 178.24000120162964 + }, + "isolatedSum": { + "p50": 167.4560010433197, + "p90": 189.63199853897095, + "p95": 198.62399995326996, + "p99": 216.35200083255768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-81e223f4", + "identity": "h200|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_7cfa04c4", + "comparisonKey": "72cd529af4968fe8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:48.529187+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271650161", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271650161", + "createdAt": "2026-06-26T23:50:48.529187+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 38.55999931693077, + "p90": 52.25599929690361, + "p95": 57.69599974155426, + "p99": 68.70400160551071 + }, + "combine": { + "p50": 33.440001308918, + "p90": 46.23999819159508, + "p95": 50.36799982190132, + "p99": 62.912002205848694 + }, + "roundtrip": { + "p50": 52.70399898290634, + "p90": 70.43199986219406, + "p95": 77.85599678754807, + "p99": 90.27200192213058 + }, + "isolatedSum": { + "p50": 72.00000062584877, + "p90": 98.49599748849869, + "p95": 108.06399956345558, + "p99": 131.6160038113594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.76000067591667, + "p90": 48.25599864125252, + "p95": 55.93600124120712, + "p99": 79.68000322580338 + }, + "combine": { + "p50": 32.80000016093254, + "p90": 41.120000183582306, + "p95": 44.863998889923096, + "p99": 49.8879998922348 + }, + "roundtrip": { + "p50": 52.83199995756149, + "p90": 65.88800251483917, + "p95": 71.80800288915634, + "p99": 80.60800284147263 + }, + "isolatedSum": { + "p50": 70.56000083684921, + "p90": 89.37599882483482, + "p95": 100.80000013113022, + "p99": 129.56800311803818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 38.495998829603195, + "p90": 52.44800075888634, + "p95": 56.543998420238495, + "p99": 76.4480009675026 + }, + "combine": { + "p50": 33.055998384952545, + "p90": 44.16000097990036, + "p95": 45.951999723911285, + "p99": 53.568001836538315 + }, + "roundtrip": { + "p50": 52.70399898290634, + "p90": 64.2239972949028, + "p95": 71.96799665689468, + "p99": 81.53600245714188 + }, + "isolatedSum": { + "p50": 71.55199721455574, + "p90": 96.6080017387867, + "p95": 102.49599814414978, + "p99": 130.0160028040409 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 39.07199949026108, + "p90": 52.960000932216644, + "p95": 56.992001831531525, + "p99": 65.43999910354614 + }, + "combine": { + "p50": 34.04799848794937, + "p90": 44.19200122356415, + "p95": 46.1760014295578, + "p99": 57.472001761198044 + }, + "roundtrip": { + "p50": 54.11199852824211, + "p90": 68.60800087451935, + "p95": 74.78400319814682, + "p99": 85.28000116348267 + }, + "isolatedSum": { + "p50": 73.11999797821045, + "p90": 97.15200215578079, + "p95": 103.16800326108932, + "p99": 122.91200086474419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 40.12800008058548, + "p90": 55.00800162553787, + "p95": 59.29600074887276, + "p99": 66.81600213050842 + }, + "combine": { + "p50": 38.047999143600464, + "p90": 49.82399940490723, + "p95": 52.799999713897705, + "p99": 63.19999694824219 + }, + "roundtrip": { + "p50": 61.5679994225502, + "p90": 75.48800110816956, + "p95": 82.36800134181976, + "p99": 96.89600020647049 + }, + "isolatedSum": { + "p50": 78.17599922418594, + "p90": 104.8320010304451, + "p95": 112.09600046277046, + "p99": 130.0159990787506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 46.23999819159508, + "p90": 56.57599866390228, + "p95": 62.30400130152702, + "p99": 70.8480030298233 + }, + "combine": { + "p50": 43.96799951791763, + "p90": 53.75999957323074, + "p95": 58.33600088953972, + "p99": 61.216000467538834 + }, + "roundtrip": { + "p50": 71.19999825954437, + "p90": 80.86399734020233, + "p95": 85.28000116348267, + "p99": 93.21600198745728 + }, + "isolatedSum": { + "p50": 90.20799770951271, + "p90": 110.33599823713303, + "p95": 120.64000219106674, + "p99": 132.06400349736214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 56.60799890756607, + "p90": 77.53600180149078, + "p95": 85.31200140714645, + "p99": 192.03199446201324 + }, + "combine": { + "p50": 58.240000158548355, + "p90": 67.29599833488464, + "p95": 69.56800073385239, + "p99": 77.82399654388428 + }, + "roundtrip": { + "p50": 96.28800302743912, + "p90": 107.39199817180634, + "p95": 111.58400028944016, + "p99": 126.52799487113953 + }, + "isolatedSum": { + "p50": 114.84799906611443, + "p90": 144.83200013637543, + "p95": 154.88000214099884, + "p99": 269.8559910058975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.7599967122078, + "p90": 92.67199784517288, + "p95": 95.13600170612335, + "p99": 128.38399410247803 + }, + "combine": { + "p50": 86.27200126647949, + "p90": 94.91200000047684, + "p95": 97.120001912117, + "p99": 105.27999699115753 + }, + "roundtrip": { + "p50": 147.2959965467453, + "p90": 157.56799280643463, + "p95": 162.36799955368042, + "p99": 174.9120056629181 + }, + "isolatedSum": { + "p50": 168.0319979786873, + "p90": 187.58399784564972, + "p95": 192.25600361824036, + "p99": 233.66399109363556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-43b4144e", + "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_0a1a73b3", + "comparisonKey": "14196b9d68f90910", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:32.638567+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254426529", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254426529", + "createdAt": "2026-06-26T17:30:32.638567+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 36.86400130391121, + "p90": 47.13600128889084, + "p95": 51.52000114321709, + "p99": 63.32799792289734 + }, + "combine": { + "p50": 33.440001308918, + "p90": 42.527999728918076, + "p95": 46.81599885225296, + "p99": 52.22399905323982 + }, + "roundtrip": { + "p50": 50.52800104022026, + "p90": 65.15199691057205, + "p95": 71.03999704122543, + "p99": 78.68800312280655 + }, + "isolatedSum": { + "p50": 70.30400261282921, + "p90": 89.66400101780891, + "p95": 98.33599999547005, + "p99": 115.55199697613716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.408001720905304, + "p90": 48.608001321554184, + "p95": 54.687999188899994, + "p99": 65.2799978852272 + }, + "combine": { + "p50": 32.735999673604965, + "p90": 42.59200021624565, + "p95": 45.05600035190582, + "p99": 51.35999992489815 + }, + "roundtrip": { + "p50": 51.4880008995533, + "p90": 66.72000139951706, + "p95": 72.54400104284286, + "p99": 85.08799970149994 + }, + "isolatedSum": { + "p50": 70.14400139451027, + "p90": 91.20000153779984, + "p95": 99.74399954080582, + "p99": 116.63999781012535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 37.79200091958046, + "p90": 49.27999898791313, + "p95": 54.91200089454651, + "p99": 61.08799949288368 + }, + "combine": { + "p50": 31.231999397277832, + "p90": 43.487999588251114, + "p95": 47.26399853825569, + "p99": 65.31199812889099 + }, + "roundtrip": { + "p50": 51.58400163054466, + "p90": 68.89600306749344, + "p95": 73.95199686288834, + "p99": 91.61599725484848 + }, + "isolatedSum": { + "p50": 69.02400031685829, + "p90": 92.76799857616425, + "p95": 102.1759994328022, + "p99": 126.39999762177467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 37.53599897027016, + "p90": 48.128001391887665, + "p95": 54.75199967622757, + "p99": 62.111999839544296 + }, + "combine": { + "p50": 34.46400165557861, + "p90": 44.544000178575516, + "p95": 47.231998294591904, + "p99": 57.37600103020668 + }, + "roundtrip": { + "p50": 54.687999188899994, + "p90": 67.4239993095398, + "p95": 73.44000041484833, + "p99": 91.96799993515015 + }, + "isolatedSum": { + "p50": 72.00000062584877, + "p90": 92.67200157046318, + "p95": 101.98399797081947, + "p99": 119.48800086975098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 39.29600119590759, + "p90": 51.35999992489815, + "p95": 55.64799904823303, + "p99": 64.96000289916992 + }, + "combine": { + "p50": 36.67199984192848, + "p90": 46.62400111556053, + "p95": 50.56000128388405, + "p99": 60.38400158286095 + }, + "roundtrip": { + "p50": 60.47999858856201, + "p90": 74.5920017361641, + "p95": 79.3600007891655, + "p99": 87.87199854850769 + }, + "isolatedSum": { + "p50": 75.96800103783607, + "p90": 97.98400104045868, + "p95": 106.20800033211708, + "p99": 125.34400448203087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.05600035190582, + "p90": 55.00800162553787, + "p95": 57.95200169086456, + "p99": 66.01600348949432 + }, + "combine": { + "p50": 44.28799822926521, + "p90": 53.05600166320801, + "p95": 55.904000997543335, + "p99": 61.3120011985302 + }, + "roundtrip": { + "p50": 72.64000177383423, + "p90": 84.16000008583069, + "p95": 88.03199976682663, + "p99": 106.30399733781815 + }, + "isolatedSum": { + "p50": 89.34399858117104, + "p90": 108.06400328874588, + "p95": 113.8560026884079, + "p99": 127.32800468802452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.23199960589409, + "p90": 65.63200056552887, + "p95": 71.48800045251846, + "p99": 79.55200225114822 + }, + "combine": { + "p50": 58.43200162053108, + "p90": 69.37599927186966, + "p95": 71.07199728488922, + "p99": 79.42400127649307 + }, + "roundtrip": { + "p50": 96.8639999628067, + "p90": 108.44799876213074, + "p95": 113.72800171375275, + "p99": 121.72800302505493 + }, + "isolatedSum": { + "p50": 113.66400122642517, + "p90": 135.00799983739853, + "p95": 142.55999773740768, + "p99": 158.9760035276413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 79.26400005817413, + "p90": 88.44800293445587, + "p95": 92.6399976015091, + "p99": 101.69599950313568 + }, + "combine": { + "p50": 86.01599931716919, + "p90": 95.0080007314682, + "p95": 97.02400118112564, + "p99": 103.32799702882767 + }, + "roundtrip": { + "p50": 147.32800424098969, + "p90": 157.53600001335144, + "p95": 161.47199273109436, + "p99": 169.0240055322647 + }, + "isolatedSum": { + "p50": 165.27999937534332, + "p90": 183.45600366592407, + "p95": 189.66399878263474, + "p99": 205.02399653196335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-878f6103", + "identity": "h200|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_9979edfc", + "comparisonKey": "539cbdfe3675c8d8", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:31.220360+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287507619", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287507619", + "createdAt": "2026-06-27T11:14:31.220360+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.6240006685257, + "p90": 96.6079980134964, + "p95": 105.40799796581268, + "p99": 139.8400068283081 + }, + "combine": { + "p50": 48.928000032901764, + "p90": 62.144000083208084, + "p95": 69.98399645090103, + "p99": 94.71999853849411 + }, + "roundtrip": { + "p50": 150.39999783039093, + "p90": 202.27199792861938, + "p95": 209.88799631595612, + "p99": 232.35200345516205 + }, + "isolatedSum": { + "p50": 115.55200070142746, + "p90": 158.75199809670448, + "p95": 175.39199441671371, + "p99": 234.56000536680222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.13599646091461, + "p90": 90.81599861383438, + "p95": 99.84000027179718, + "p99": 112.86400258541107 + }, + "combine": { + "p50": 48.86399954557419, + "p90": 59.487998485565186, + "p95": 66.880002617836, + "p99": 72.67200201749802 + }, + "roundtrip": { + "p50": 141.82400703430176, + "p90": 184.1599941253662, + "p95": 192.1280026435852, + "p99": 211.64800226688385 + }, + "isolatedSum": { + "p50": 111.9999960064888, + "p90": 150.30399709939957, + "p95": 166.72000288963318, + "p99": 185.5360046029091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 64.60800021886826, + "p90": 93.28000247478485, + "p95": 99.07200187444687, + "p99": 110.46399921178818 + }, + "combine": { + "p50": 49.375999718904495, + "p90": 60.447998344898224, + "p95": 67.61600077152252, + "p99": 73.27999919652939 + }, + "roundtrip": { + "p50": 142.752006649971, + "p90": 189.69599902629852, + "p95": 199.13600385189056, + "p99": 217.3440009355545 + }, + "isolatedSum": { + "p50": 113.98399993777275, + "p90": 153.72800081968307, + "p95": 166.6880026459694, + "p99": 183.74399840831757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.85600161552429, + "p90": 86.496002972126, + "p95": 95.551997423172, + "p99": 104.00000214576721 + }, + "combine": { + "p50": 50.08000135421753, + "p90": 60.28800085186958, + "p95": 66.91200286149979, + "p99": 77.40800082683563 + }, + "roundtrip": { + "p50": 143.51999759674072, + "p90": 185.5040043592453, + "p95": 194.17600333690643, + "p99": 225.63199698925018 + }, + "isolatedSum": { + "p50": 111.93600296974182, + "p90": 146.7840038239956, + "p95": 162.46400028467178, + "p99": 181.40800297260284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 67.87200272083282, + "p90": 97.72799909114838, + "p95": 107.10400342941284, + "p99": 163.64799439907074 + }, + "combine": { + "p50": 51.29599943757057, + "p90": 61.824001371860504, + "p95": 69.88800317049026, + "p99": 75.6480023264885 + }, + "roundtrip": { + "p50": 146.33600413799286, + "p90": 189.31199610233307, + "p95": 197.4399983882904, + "p99": 221.18400037288666 + }, + "isolatedSum": { + "p50": 119.1680021584034, + "p90": 159.55200046300888, + "p95": 176.9920065999031, + "p99": 239.29599672555923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 67.45599955320358, + "p90": 91.61599725484848, + "p95": 100.54399818181992, + "p99": 115.48800021409988 + }, + "combine": { + "p50": 54.17599901556969, + "p90": 66.01600348949432, + "p95": 71.74400240182877, + "p99": 80.22399991750717 + }, + "roundtrip": { + "p50": 148.80000054836273, + "p90": 190.11199474334717, + "p95": 201.79200172424316, + "p99": 216.44799411296844 + }, + "isolatedSum": { + "p50": 121.63199856877327, + "p90": 157.6320007443428, + "p95": 172.28800058364868, + "p99": 195.71200013160706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 71.80800288915634, + "p90": 112.67200112342834, + "p95": 120.15999853610992, + "p99": 136.28800213336945 + }, + "combine": { + "p50": 62.3680017888546, + "p90": 75.32799988985062, + "p95": 80.25600016117096, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 162.52799332141876, + "p90": 212.89600431919098, + "p95": 224.41600263118744, + "p99": 245.40799856185913 + }, + "isolatedSum": { + "p50": 134.17600467801094, + "p90": 188.00000101327896, + "p95": 200.41599869728088, + "p99": 224.32000190019608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 76.48000121116638, + "p90": 95.36000341176987, + "p95": 103.64799946546555, + "p99": 109.15199667215347 + }, + "combine": { + "p50": 72.28799909353256, + "p90": 84.57600325345993, + "p95": 91.07200056314468, + "p99": 94.2080020904541 + }, + "roundtrip": { + "p50": 167.58400201797485, + "p90": 208.8959962129593, + "p95": 216.5759950876236, + "p99": 233.08800160884857 + }, + "isolatedSum": { + "p50": 148.76800030469894, + "p90": 179.9360066652298, + "p95": 194.72000002861023, + "p99": 203.35999876260757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b5299c0b", + "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_87683f6c", + "comparisonKey": "0d3b5b81799f76d5", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:33.916655+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271736220", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271736220", + "createdAt": "2026-06-26T23:53:33.916655+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 221.15199267864227, + "p90": 287.26398944854736, + "p95": 315.39198756217957, + "p99": 401.98400616645813 + }, + "combine": { + "p50": 47.87199944257736, + "p90": 66.27199798822403, + "p95": 73.91999661922455, + "p99": 92.51199662685394 + }, + "roundtrip": { + "p50": 246.75199389457703, + "p90": 302.2400140762329, + "p95": 335.61599254608154, + "p99": 400.160014629364 + }, + "isolatedSum": { + "p50": 269.02399212121964, + "p90": 353.5359874367714, + "p95": 389.3119841814041, + "p99": 494.4960027933121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 264.6079957485199, + "p90": 342.3680067062378, + "p95": 371.0399866104126, + "p99": 447.00801372528076 + }, + "combine": { + "p50": 54.46400120854378, + "p90": 68.03199648857117, + "p95": 74.8480036854744, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 257.2160065174103, + "p90": 336.4480137825012, + "p95": 375.10401010513306, + "p99": 443.93599033355713 + }, + "isolatedSum": { + "p50": 319.0719969570637, + "p90": 410.40000319480896, + "p95": 445.887990295887, + "p99": 535.8400121331215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 210.14399826526642, + "p90": 260.0319981575012, + "p95": 276.99199318885803, + "p99": 401.856005191803 + }, + "combine": { + "p50": 49.02400076389313, + "p90": 61.983998864889145, + "p95": 68.57600063085556, + "p99": 82.43200182914734 + }, + "roundtrip": { + "p50": 252.73600220680237, + "p90": 308.51200222969055, + "p95": 325.76000690460205, + "p99": 404.2240083217621 + }, + "isolatedSum": { + "p50": 259.16799902915955, + "p90": 322.01599702239037, + "p95": 345.5679938197136, + "p99": 484.2880070209503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 229.40799593925476, + "p90": 285.91999411582947, + "p95": 302.97601222991943, + "p99": 384.799987077713 + }, + "combine": { + "p50": 50.6879985332489, + "p90": 65.95200300216675, + "p95": 71.48800045251846, + "p99": 85.56800335645676 + }, + "roundtrip": { + "p50": 262.7840042114258, + "p90": 331.9680094718933, + "p95": 359.6160113811493, + "p99": 441.0560131072998 + }, + "isolatedSum": { + "p50": 280.09599447250366, + "p90": 351.8719971179962, + "p95": 374.4640126824379, + "p99": 470.36799043416977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 211.67999505996704, + "p90": 262.0159983634949, + "p95": 281.5360128879547, + "p99": 434.4319999217987 + }, + "combine": { + "p50": 50.87999999523163, + "p90": 67.74400174617767, + "p95": 72.76800274848938, + "p99": 100.47999769449234 + }, + "roundtrip": { + "p50": 261.1199915409088, + "p90": 332.5119912624359, + "p95": 354.8800051212311, + "p99": 414.2720103263855 + }, + "isolatedSum": { + "p50": 262.55999505519867, + "p90": 329.76000010967255, + "p95": 354.3040156364441, + "p99": 534.911997616291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 210.68799495697021, + "p90": 258.91199707984924, + "p95": 279.87200021743774, + "p99": 326.1440098285675 + }, + "combine": { + "p50": 53.85600030422211, + "p90": 68.67200136184692, + "p95": 72.51200079917908, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 265.6959891319275, + "p90": 326.2079954147339, + "p95": 351.52000188827515, + "p99": 446.3360011577606 + }, + "isolatedSum": { + "p50": 264.5439952611923, + "p90": 327.58399844169617, + "p95": 352.3840010166168, + "p99": 418.0480092763901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 209.6640020608902, + "p90": 265.21599292755127, + "p95": 291.0720109939575, + "p99": 366.14400148391724 + }, + "combine": { + "p50": 61.43999844789505, + "p90": 73.91999661922455, + "p95": 79.42400127649307, + "p99": 92.06400066614151 + }, + "roundtrip": { + "p50": 262.2399926185608, + "p90": 317.7280128002167, + "p95": 350.7840037345886, + "p99": 447.9680061340332 + }, + "isolatedSum": { + "p50": 271.10400050878525, + "p90": 339.1359895467758, + "p95": 370.4960122704506, + "p99": 458.20800215005875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.8879976272583, + "p90": 276.99199318885803, + "p95": 317.05600023269653, + "p99": 742.6559925079346 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 88.54400366544724, + "p95": 92.47999638319016, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 273.44000339508057, + "p90": 323.5520124435425, + "p95": 345.0239896774292, + "p99": 420.3520119190216 + }, + "isolatedSum": { + "p50": 290.5599996447563, + "p90": 365.53599685430527, + "p95": 409.5359966158867, + "p99": 855.679988861084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a3751d3c", + "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_87683f6c", + "comparisonKey": "972ab14012f6276a", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:56.538326+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": "set:8:34e5874082f8ea8f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271751941", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271751941", + "createdAt": "2026-06-26T23:53:56.538326+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 217.3759937286377, + "p90": 269.1839933395386, + "p95": 295.1360046863556, + "p99": 345.69600224494934 + }, + "combine": { + "p50": 50.592001527547836, + "p90": 66.46399945020676, + "p95": 71.74400240182877, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 245.60000002384186, + "p90": 292.64000058174133, + "p95": 306.0480058193207, + "p99": 346.8160033226013 + }, + "isolatedSum": { + "p50": 267.96799525618553, + "p90": 335.64799278974533, + "p95": 366.88000708818436, + "p99": 435.0400045514107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 206.2399983406067, + "p90": 254.94399666786194, + "p95": 270.4960107803345, + "p99": 337.21598982810974 + }, + "combine": { + "p50": 51.263999193906784, + "p90": 65.72800129652023, + "p95": 70.52800059318542, + "p99": 75.58400183916092 + }, + "roundtrip": { + "p50": 245.15199661254883, + "p90": 296.31999135017395, + "p95": 316.1279857158661, + "p99": 367.3279881477356 + }, + "isolatedSum": { + "p50": 257.5039975345135, + "p90": 320.6719979643822, + "p95": 341.0240113735199, + "p99": 412.79999166727066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 220.38400173187256, + "p90": 289.15199637413025, + "p95": 331.5519988536835, + "p99": 1036.1599922180176 + }, + "combine": { + "p50": 52.191998809576035, + "p90": 65.21599739789963, + "p95": 68.96000355482101, + "p99": 77.88799703121185 + }, + "roundtrip": { + "p50": 248.79999458789825, + "p90": 299.71200227737427, + "p95": 314.5279884338379, + "p99": 352.09599137306213 + }, + "isolatedSum": { + "p50": 272.5760005414486, + "p90": 354.3679937720299, + "p95": 400.5120024085045, + "p99": 1114.0479892492294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 221.91999852657318, + "p90": 292.4480140209198, + "p95": 316.3520097732544, + "p99": 412.76800632476807 + }, + "combine": { + "p50": 54.84800040721893, + "p90": 71.61600142717361, + "p95": 80.64000308513641, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 249.24799799919128, + "p90": 305.5360019207001, + "p95": 325.1520097255707, + "p99": 406.9119989871979 + }, + "isolatedSum": { + "p50": 276.7679989337921, + "p90": 364.0640154480934, + "p95": 396.9920128583908, + "p99": 514.9440094828606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 209.75999534130096, + "p90": 260.73598861694336, + "p95": 279.7119915485382, + "p99": 349.98399019241333 + }, + "combine": { + "p50": 54.88000065088272, + "p90": 69.34399902820587, + "p95": 73.91999661922455, + "p99": 101.08800232410431 + }, + "roundtrip": { + "p50": 254.36800718307495, + "p90": 305.2160143852234, + "p95": 330.55999875068665, + "p99": 445.72800397872925 + }, + "isolatedSum": { + "p50": 264.6399959921837, + "p90": 330.07998764514923, + "p95": 353.63198816776276, + "p99": 451.07199251651764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 212.16000616550446, + "p90": 261.34398579597473, + "p95": 274.4959890842438, + "p99": 355.9679985046387 + }, + "combine": { + "p50": 59.487998485565186, + "p90": 75.9039968252182, + "p95": 79.29600030183792, + "p99": 111.13599687814713 + }, + "roundtrip": { + "p50": 262.4320089817047, + "p90": 318.33600997924805, + "p95": 339.4559919834137, + "p99": 384.0320110321045 + }, + "isolatedSum": { + "p50": 271.64800465106964, + "p90": 337.24798262119293, + "p95": 353.7919893860817, + "p99": 467.1039953827858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 213.44000101089478, + "p90": 259.99999046325684, + "p95": 280.2880108356476, + "p99": 418.08000206947327 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 79.1039988398552, + "p95": 86.94399893283844, + "p99": 97.59999811649323 + }, + "roundtrip": { + "p50": 273.98398518562317, + "p90": 361.2799942493439, + "p95": 384.0959966182709, + "p99": 485.24799942970276 + }, + "isolatedSum": { + "p50": 280.70399910211563, + "p90": 339.10398930311203, + "p95": 367.232009768486, + "p99": 515.6800001859665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.53600239753723, + "p90": 271.9680070877075, + "p95": 288.8000011444092, + "p99": 367.71199107170105 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 95.90400010347366, + "p95": 99.16800260543823, + "p99": 122.56000190973282 + }, + "roundtrip": { + "p50": 289.6000146865845, + "p90": 337.69598603248596, + "p95": 350.847989320755, + "p99": 431.4559996128082 + }, + "isolatedSum": { + "p50": 298.2719987630844, + "p90": 367.8720071911812, + "p95": 387.9680037498474, + "p99": 490.27199298143387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-73819dd3", + "identity": "h200|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_9979edfc", + "comparisonKey": "3ee03cee0282c011", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:13:48.278988+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287496212", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287496212", + "createdAt": "2026-06-27T11:13:48.278988+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.14399766921997, + "p90": 101.59999877214432, + "p95": 114.72000181674957, + "p99": 231.32799565792084 + }, + "combine": { + "p50": 56.8000003695488, + "p90": 69.18399780988693, + "p95": 76.64000242948532, + "p99": 99.64799880981445 + }, + "roundtrip": { + "p50": 154.30399775505066, + "p90": 196.383997797966, + "p95": 217.47200191020966, + "p99": 263.90400528907776 + }, + "isolatedSum": { + "p50": 126.94399803876877, + "p90": 170.78399658203125, + "p95": 191.3600042462349, + "p99": 330.9759944677353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.60800087451935, + "p90": 98.33600372076035, + "p95": 105.66399991512299, + "p99": 159.45599973201752 + }, + "combine": { + "p50": 56.60799890756607, + "p90": 71.55200093984604, + "p95": 75.48800110816956, + "p99": 96.22400254011154 + }, + "roundtrip": { + "p50": 158.07999670505524, + "p90": 207.39200711250305, + "p95": 222.3680019378662, + "p99": 268.15998554229736 + }, + "isolatedSum": { + "p50": 125.21599978208542, + "p90": 169.88800466060638, + "p95": 181.15200102329254, + "p99": 255.68000227212906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 66.3679987192154, + "p90": 87.23200112581253, + "p95": 100.76799988746643, + "p99": 133.5040032863617 + }, + "combine": { + "p50": 56.41600117087364, + "p90": 65.72800129652023, + "p95": 73.27999919652939, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 150.4960060119629, + "p90": 190.36799669265747, + "p95": 200.32000541687012, + "p99": 249.37599897384644 + }, + "isolatedSum": { + "p50": 122.78399989008904, + "p90": 152.96000242233276, + "p95": 174.04799908399582, + "p99": 215.488001704216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.80800223350525, + "p90": 100.80000013113022, + "p95": 111.00800335407257, + "p99": 136.99199259281158 + }, + "combine": { + "p50": 57.08799883723259, + "p90": 72.22399860620499, + "p95": 76.60800218582153, + "p99": 86.33600175380707 + }, + "roundtrip": { + "p50": 153.56799960136414, + "p90": 201.50400698184967, + "p95": 210.24000644683838, + "p99": 257.3759853839874 + }, + "isolatedSum": { + "p50": 124.89600107073784, + "p90": 173.0239987373352, + "p95": 187.6160055398941, + "p99": 223.32799434661865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 65.76000154018402, + "p90": 92.6079973578453, + "p95": 103.13600301742554, + "p99": 142.81600713729858 + }, + "combine": { + "p50": 58.400001376867294, + "p90": 68.70400160551071, + "p95": 77.40800082683563, + "p99": 84.22400057315826 + }, + "roundtrip": { + "p50": 162.75200247764587, + "p90": 214.30400013923645, + "p95": 235.167995095253, + "p99": 264.70398902893066 + }, + "isolatedSum": { + "p50": 124.16000291705132, + "p90": 161.31199896335602, + "p95": 180.54400384426117, + "p99": 227.04000771045685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.31199878454208, + "p90": 97.59999811649323, + "p95": 107.77600109577179, + "p99": 145.37599682807922 + }, + "combine": { + "p50": 64.67200070619583, + "p90": 80.35200089216232, + "p95": 85.4720026254654, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 164.32000696659088, + "p90": 208.48000049591064, + "p95": 222.88000583648682, + "p99": 259.71201062202454 + }, + "isolatedSum": { + "p50": 133.98399949073792, + "p90": 177.95199900865555, + "p95": 193.24800372123718, + "p99": 247.26399779319763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.31200009584427, + "p90": 101.69599950313568, + "p95": 117.76000261306763, + "p99": 361.5039885044098 + }, + "combine": { + "p50": 72.35199958086014, + "p90": 85.63199639320374, + "p95": 92.51199662685394, + "p99": 103.64799946546555 + }, + "roundtrip": { + "p50": 168.2880073785782, + "p90": 209.9200040102005, + "p95": 219.67999637126923, + "p99": 266.84799790382385 + }, + "isolatedSum": { + "p50": 149.6639996767044, + "p90": 187.32799589633942, + "p95": 210.27199923992157, + "p99": 465.15198796987534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.70399677753448, + "p90": 109.21599715948105, + "p95": 114.78400230407715, + "p99": 161.47199273109436 + }, + "combine": { + "p50": 88.25600147247314, + "p90": 105.0880029797554, + "p95": 113.11999708414078, + "p99": 147.42399752140045 + }, + "roundtrip": { + "p50": 195.5839991569519, + "p90": 248.28800559043884, + "p95": 262.4959945678711, + "p99": 325.56799054145813 + }, + "isolatedSum": { + "p50": 172.95999825000763, + "p90": 214.30400013923645, + "p95": 227.90399938821793, + "p99": 308.8959902524948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1bedbd87", + "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_87683f6c", + "comparisonKey": "73242cc56a07dc73", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:22.337969+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:2e0df6a62cd0143e", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271767522", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271767522", + "createdAt": "2026-06-26T23:54:22.337969+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 217.43999421596527, + "p90": 302.7519881725311, + "p95": 334.4320058822632, + "p99": 396.06401324272156 + }, + "combine": { + "p50": 55.1999993622303, + "p90": 72.03199714422226, + "p95": 78.23999971151352, + "p99": 108.09600353240967 + }, + "roundtrip": { + "p50": 251.71199440956116, + "p90": 317.27999448776245, + "p95": 335.10398864746094, + "p99": 397.92001247406006 + }, + "isolatedSum": { + "p50": 272.6399935781956, + "p90": 374.7839853167534, + "p95": 412.6720055937767, + "p99": 504.1600167751312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 206.81600272655487, + "p90": 269.6639895439148, + "p95": 289.6000146865845, + "p99": 343.23200583457947 + }, + "combine": { + "p50": 55.135998874902725, + "p90": 71.77600264549255, + "p95": 77.47200131416321, + "p99": 96.09600156545639 + }, + "roundtrip": { + "p50": 247.93599545955658, + "p90": 305.63199520111084, + "p95": 323.168009519577, + "p99": 380.12799620628357 + }, + "isolatedSum": { + "p50": 261.9520016014576, + "p90": 341.43999218940735, + "p95": 367.0720160007477, + "p99": 439.32800740003586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 211.04000508785248, + "p90": 283.32799673080444, + "p95": 302.65599489212036, + "p99": 377.6639997959137 + }, + "combine": { + "p50": 56.89600110054016, + "p90": 70.68800181150436, + "p95": 78.3040001988411, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 251.52000784873962, + "p90": 306.4959943294525, + "p95": 319.64799761772156, + "p99": 344.1599905490875 + }, + "isolatedSum": { + "p50": 267.93600618839264, + "p90": 354.0159985423088, + "p95": 380.95999509096146, + "p99": 463.1040021777153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 204.92799580097198, + "p90": 272.09600806236267, + "p95": 291.29600524902344, + "p99": 364.3519878387451 + }, + "combine": { + "p50": 56.96000158786774, + "p90": 71.96799665689468, + "p95": 77.79199630022049, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 245.69599330425262, + "p90": 303.16799879074097, + "p95": 321.9519853591919, + "p99": 421.1199879646301 + }, + "isolatedSum": { + "p50": 261.8879973888397, + "p90": 344.06400471925735, + "p95": 369.0880015492439, + "p99": 451.26398652791977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 211.61599457263947, + "p90": 274.3679881095886, + "p95": 311.2959861755371, + "p99": 390.8799886703491 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 74.68800246715546, + "p95": 80.09599894285202, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 250.65600872039795, + "p90": 313.24800848960876, + "p95": 336.1920118331909, + "p99": 386.59200072288513 + }, + "isolatedSum": { + "p50": 270.33599466085434, + "p90": 349.0559905767441, + "p95": 391.39198511838913, + "p99": 478.4639850258827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 204.92799580097198, + "p90": 262.62399554252625, + "p95": 280.5440127849579, + "p99": 327.4880051612854 + }, + "combine": { + "p50": 64.54399973154068, + "p90": 81.85599744319916, + "p95": 87.8399983048439, + "p99": 104.41599786281586 + }, + "roundtrip": { + "p50": 262.59198784828186, + "p90": 327.7440071105957, + "p95": 351.6159951686859, + "p99": 406.0800075531006 + }, + "isolatedSum": { + "p50": 269.47199553251266, + "p90": 344.4799929857254, + "p95": 368.3840110898018, + "p99": 431.90400302410126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 247.5840002298355, + "p90": 392.5119936466217, + "p95": 406.14399313926697, + "p99": 443.5200095176697 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 89.85599875450134, + "p95": 94.68799829483032, + "p99": 119.32799965143204 + }, + "roundtrip": { + "p50": 261.85598969459534, + "p90": 329.24801111221313, + "p95": 345.15199065208435, + "p99": 426.1760115623474 + }, + "isolatedSum": { + "p50": 319.42400336265564, + "p90": 482.36799240112305, + "p95": 500.8319914340973, + "p99": 562.8480091691017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 218.33600103855133, + "p90": 282.6240062713623, + "p95": 299.1040050983429, + "p99": 340.831995010376 + }, + "combine": { + "p50": 87.16800063848495, + "p90": 104.67199981212616, + "p95": 109.18399691581726, + "p99": 127.32799351215363 + }, + "roundtrip": { + "p50": 291.83998703956604, + "p90": 343.6479866504669, + "p95": 355.48800230026245, + "p99": 407.1680009365082 + }, + "isolatedSum": { + "p50": 305.5040016770363, + "p90": 387.29600608348846, + "p95": 408.28800201416016, + "p99": 468.1599885225296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d12a6ce", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_9979edfc", + "comparisonKey": "057f864d1542d54f", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:28.109691+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286433802", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286433802", + "createdAt": "2026-06-27T10:26:28.109691+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.94400310516357, + "p90": 102.65599936246872, + "p95": 111.55200004577637, + "p99": 136.06399297714233 + }, + "combine": { + "p50": 61.503998935222626, + "p90": 75.99999755620956, + "p95": 80.64000308513641, + "p99": 118.33599954843521 + }, + "roundtrip": { + "p50": 168.7999963760376, + "p90": 279.00800108909607, + "p95": 304.03199791908264, + "p99": 436.41600012779236 + }, + "isolatedSum": { + "p50": 128.4480020403862, + "p90": 178.65599691867828, + "p95": 192.19200313091278, + "p99": 254.39999252557755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.51200014352798, + "p90": 98.43199700117111, + "p95": 108.67200046777725, + "p99": 120.41600048542023 + }, + "combine": { + "p50": 61.69600039720535, + "p90": 78.14399898052216, + "p95": 82.0159986615181, + "p99": 97.9200005531311 + }, + "roundtrip": { + "p50": 167.04000532627106, + "p90": 214.88000452518463, + "p95": 225.63199698925018, + "p99": 264.8319900035858 + }, + "isolatedSum": { + "p50": 130.20800054073334, + "p90": 176.57599598169327, + "p95": 190.68799912929535, + "p99": 218.33600103855133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 67.77600198984146, + "p90": 99.5199978351593, + "p95": 105.27999699115753, + "p99": 120.7680031657219 + }, + "combine": { + "p50": 60.99199876189232, + "p90": 76.9599974155426, + "p95": 81.37600123882294, + "p99": 85.28000116348267 + }, + "roundtrip": { + "p50": 158.36800634860992, + "p90": 202.4639993906021, + "p95": 213.34399282932281, + "p99": 470.46399116516113 + }, + "isolatedSum": { + "p50": 128.76800075173378, + "p90": 176.4799952507019, + "p95": 186.65599822998047, + "p99": 206.04800432920456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 68.44799965620041, + "p90": 100.3199964761734, + "p95": 105.21599650382996, + "p99": 126.43200159072876 + }, + "combine": { + "p50": 63.45599889755249, + "p90": 79.0719985961914, + "p95": 84.99199897050858, + "p99": 93.02400052547455 + }, + "roundtrip": { + "p50": 166.78400337696075, + "p90": 212.0639979839325, + "p95": 220.09600698947906, + "p99": 258.8160037994385 + }, + "isolatedSum": { + "p50": 131.9039985537529, + "p90": 179.3919950723648, + "p95": 190.20799547433853, + "p99": 219.4560021162033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 68.31999868154526, + "p90": 94.97600048780441, + "p95": 101.88800096511841, + "p99": 112.73600161075592 + }, + "combine": { + "p50": 63.80800157785416, + "p90": 75.58400183916092, + "p95": 82.97599852085114, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 165.47200083732605, + "p90": 223.29600155353546, + "p95": 241.98399484157562, + "p99": 347.9999899864197 + }, + "isolatedSum": { + "p50": 132.1280002593994, + "p90": 170.56000232696533, + "p95": 184.86399948596954, + "p99": 209.44000035524368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.60800153017044, + "p90": 105.12000322341919, + "p95": 111.10399663448334, + "p99": 125.08800625801086 + }, + "combine": { + "p50": 70.23999840021133, + "p90": 83.52000266313553, + "p95": 88.35200220346451, + "p99": 93.1520015001297 + }, + "roundtrip": { + "p50": 169.37600076198578, + "p90": 216.25599265098572, + "p95": 225.15200078487396, + "p99": 254.59200143814087 + }, + "isolatedSum": { + "p50": 142.84799993038177, + "p90": 188.64000588655472, + "p95": 199.45599883794785, + "p99": 218.24000775814056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.0719985961914, + "p90": 104.54399883747101, + "p95": 109.76000130176544, + "p99": 320.99199295043945 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 98.39999675750732, + "p95": 101.85600072145462, + "p99": 225.53600370883942 + }, + "roundtrip": { + "p50": 180.2240014076233, + "p90": 218.4319943189621, + "p95": 229.312002658844, + "p99": 268.70399713516235 + }, + "isolatedSum": { + "p50": 159.58400070667267, + "p90": 202.94399559497833, + "p95": 211.61600202322006, + "p99": 546.5279966592789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.62399649620056, + "p90": 106.52799904346466, + "p95": 113.21599781513214, + "p99": 120.89599668979645 + }, + "combine": { + "p50": 98.1760025024414, + "p90": 115.13599753379822, + "p95": 118.6240017414093, + "p99": 130.5920034646988 + }, + "roundtrip": { + "p50": 210.207998752594, + "p90": 238.3359968662262, + "p95": 245.15199661254883, + "p99": 258.87998938560486 + }, + "isolatedSum": { + "p50": 184.79999899864197, + "p90": 221.66399657726288, + "p95": 231.83999955654144, + "p99": 251.48800015449524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a6e69f6", + "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_87683f6c", + "comparisonKey": "c387c5e642249761", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:29.289162+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271636896", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271636896", + "createdAt": "2026-06-26T23:50:29.289162+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 228.70400547981262, + "p90": 269.6959972381592, + "p95": 279.5200049877167, + "p99": 338.1119966506958 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 73.5040009021759, + "p95": 82.20800012350082, + "p99": 98.33600372076035 + }, + "roundtrip": { + "p50": 271.232008934021, + "p90": 306.94401264190674, + "p95": 324.2560029029846, + "p99": 374.65599179267883 + }, + "isolatedSum": { + "p50": 289.7920049726963, + "p90": 343.1999981403351, + "p95": 361.7280051112175, + "p99": 436.44800037145615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 216.06400609016418, + "p90": 246.33599817752838, + "p95": 261.3759934902191, + "p99": 341.40801429748535 + }, + "combine": { + "p50": 59.7120001912117, + "p90": 68.09599697589874, + "p95": 74.46400076150894, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 268.99200677871704, + "p90": 305.08801341056824, + "p95": 324.41601157188416, + "p99": 433.0880045890808 + }, + "isolatedSum": { + "p50": 275.7760062813759, + "p90": 314.4319951534271, + "p95": 335.83999425172806, + "p99": 430.9440106153488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 229.98400032520294, + "p90": 283.07199478149414, + "p95": 300.00001192092896, + "p99": 371.2959885597229 + }, + "combine": { + "p50": 61.055999249219894, + "p90": 78.68800312280655, + "p95": 83.55200290679932, + "p99": 112.47999966144562 + }, + "roundtrip": { + "p50": 274.1119861602783, + "p90": 337.0879888534546, + "p95": 358.7520122528076, + "p99": 398.75200390815735 + }, + "isolatedSum": { + "p50": 291.03999957442284, + "p90": 361.7599979043007, + "p95": 383.55201482772827, + "p99": 483.7759882211685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 218.87999773025513, + "p90": 251.55198574066162, + "p95": 265.855997800827, + "p99": 311.39200925827026 + }, + "combine": { + "p50": 62.111999839544296, + "p90": 71.6480016708374, + "p95": 77.11999863386154, + "p99": 90.40000289678574 + }, + "roundtrip": { + "p50": 266.9120132923126, + "p90": 300.57600140571594, + "p95": 317.8560137748718, + "p99": 357.02401399612427 + }, + "isolatedSum": { + "p50": 280.9919975697994, + "p90": 323.199987411499, + "p95": 342.97599643468857, + "p99": 401.792012155056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 219.29599344730377, + "p90": 267.61600375175476, + "p95": 287.00798749923706, + "p99": 346.8160033226013 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 79.77599650621414, + "p95": 84.95999872684479, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 265.4719948768616, + "p90": 309.9519908428192, + "p95": 323.8399922847748, + "p99": 397.8559970855713 + }, + "isolatedSum": { + "p50": 283.1359952688217, + "p90": 347.3920002579689, + "p95": 371.96798622608185, + "p99": 445.3120008111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 219.10400688648224, + "p90": 245.5040067434311, + "p95": 260.3200078010559, + "p99": 308.0959916114807 + }, + "combine": { + "p50": 69.50400024652481, + "p90": 78.33600044250488, + "p95": 83.96799862384796, + "p99": 95.8079993724823 + }, + "roundtrip": { + "p50": 275.2319872379303, + "p90": 308.9599907398224, + "p95": 331.07200264930725, + "p99": 425.6319999694824 + }, + "isolatedSum": { + "p50": 288.60800713300705, + "p90": 323.840007185936, + "p95": 344.28800642490387, + "p99": 403.903990983963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 221.27999365329742, + "p90": 263.90400528907776, + "p95": 282.20799565315247, + "p99": 368.51200461387634 + }, + "combine": { + "p50": 79.77599650621414, + "p90": 91.32800251245499, + "p95": 96.6079980134964, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 288.4159982204437, + "p90": 336.41600608825684, + "p95": 353.7920117378235, + "p99": 471.1360037326813 + }, + "isolatedSum": { + "p50": 301.05599015951157, + "p90": 355.23200780153275, + "p95": 378.81599366664886, + "p99": 475.040003657341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 233.024001121521, + "p90": 284.4479978084564, + "p95": 301.63198709487915, + "p99": 392.5760090351105 + }, + "combine": { + "p50": 97.50399738550186, + "p90": 109.76000130176544, + "p95": 115.99999666213989, + "p99": 127.93600559234619 + }, + "roundtrip": { + "p50": 316.6399896144867, + "p90": 356.06399178504944, + "p95": 368.5759902000427, + "p99": 464.352011680603 + }, + "isolatedSum": { + "p50": 330.52799850702286, + "p90": 394.20799911022186, + "p95": 417.63198375701904, + "p99": 520.5120146274567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a04f9063", + "identity": "h200|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_9979edfc", + "comparisonKey": "7a8492db4d26e76b", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:07.695062+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287502149", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287502149", + "createdAt": "2026-06-27T11:14:07.695062+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.02399724721909, + "p90": 93.91999989748001, + "p95": 107.42399841547012, + "p99": 139.20000195503235 + }, + "combine": { + "p50": 59.93599817156792, + "p90": 70.36799937486649, + "p95": 75.93599706888199, + "p99": 93.44000369310379 + }, + "roundtrip": { + "p50": 157.69599378108978, + "p90": 192.09599494934082, + "p95": 211.32799983024597, + "p99": 397.7600038051605 + }, + "isolatedSum": { + "p50": 132.959995418787, + "p90": 164.2879992723465, + "p95": 183.3599954843521, + "p99": 232.64000564813614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.3919997215271, + "p90": 89.91999924182892, + "p95": 99.71199929714203, + "p99": 113.79200220108032 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 72.09599763154984, + "p95": 77.60000228881836, + "p99": 88.70399743318558 + }, + "roundtrip": { + "p50": 158.62399339675903, + "p90": 189.5039975643158, + "p95": 197.82400131225586, + "p99": 229.34399545192719 + }, + "isolatedSum": { + "p50": 130.11199980974197, + "p90": 162.01599687337875, + "p95": 177.3120015859604, + "p99": 202.4959996342659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.7040022611618, + "p90": 106.04800283908844, + "p95": 114.81600254774094, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 60.5119988322258, + "p90": 75.58400183916092, + "p95": 79.42400127649307, + "p99": 93.9520001411438 + }, + "roundtrip": { + "p50": 158.75199437141418, + "p90": 193.15199553966522, + "p95": 202.04800367355347, + "p99": 231.51999711990356 + }, + "isolatedSum": { + "p50": 133.2160010933876, + "p90": 181.63200467824936, + "p95": 194.240003824234, + "p99": 240.79999327659607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.0799971818924, + "p90": 88.22400122880936, + "p95": 97.59999811649323, + "p99": 165.82399606704712 + }, + "combine": { + "p50": 60.54399907588959, + "p90": 70.68800181150436, + "p95": 78.07999849319458, + "p99": 89.05600011348724 + }, + "roundtrip": { + "p50": 159.32799875736237, + "p90": 187.6160055398941, + "p95": 201.24800503253937, + "p99": 239.58399891853333 + }, + "isolatedSum": { + "p50": 130.62399625778198, + "p90": 158.91200304031372, + "p95": 175.6799966096878, + "p99": 254.87999618053436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.69600236415863, + "p90": 100.38399696350098, + "p95": 108.19199681282043, + "p99": 146.14400267601013 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 74.87999647855759, + "p95": 80.38400113582611, + "p99": 123.23199957609177 + }, + "roundtrip": { + "p50": 161.43999993801117, + "p90": 194.97600197792053, + "p95": 208.67200195789337, + "p99": 259.68000292778015 + }, + "isolatedSum": { + "p50": 137.53600418567657, + "p90": 175.26399344205856, + "p95": 188.57599794864655, + "p99": 269.3760022521019 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.6480016708374, + "p90": 94.14400160312653, + "p95": 102.65599936246872, + "p99": 167.32800006866455 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 80.32000064849854, + "p95": 88.19200098514557, + "p99": 124.22399967908859 + }, + "roundtrip": { + "p50": 167.55199432373047, + "p90": 192.32000410556793, + "p95": 208.54400098323822, + "p99": 261.50399446487427 + }, + "isolatedSum": { + "p50": 141.02400094270706, + "p90": 174.46400225162506, + "p95": 190.8480003476143, + "p99": 291.55199974775314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.80799674987793, + "p90": 91.2960022687912, + "p95": 100.51199793815613, + "p99": 125.72799623012543 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 84.70399677753448, + "p95": 90.7839983701706, + "p99": 105.56799918413162 + }, + "roundtrip": { + "p50": 183.80799889564514, + "p90": 212.96000480651855, + "p95": 232.7679991722107, + "p99": 262.688010931015 + }, + "isolatedSum": { + "p50": 157.9199954867363, + "p90": 175.99999904632568, + "p95": 191.29599630832672, + "p99": 231.29599541425705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.3840024471283, + "p90": 100.3199964761734, + "p95": 105.56799918413162, + "p99": 115.77600240707397 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 107.10400342941284, + "p95": 112.28799819946289, + "p99": 124.38400089740753 + }, + "roundtrip": { + "p50": 212.16000616550446, + "p90": 234.3360036611557, + "p95": 243.93600225448608, + "p99": 303.5520017147064 + }, + "isolatedSum": { + "p50": 185.63200533390045, + "p90": 207.42399990558624, + "p95": 217.8559973835945, + "p99": 240.1600033044815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-180681db", + "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_87683f6c", + "comparisonKey": "3006922c66758d92", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:15.049258+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": "set:8:9a27d0df4b17fa09", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271721386", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271721386", + "createdAt": "2026-06-26T23:53:15.049258+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 212.44800090789795, + "p90": 272.8320062160492, + "p95": 292.32001304626465, + "p99": 382.752001285553 + }, + "combine": { + "p50": 58.75200033187866, + "p90": 73.40800017118454, + "p95": 78.5600021481514, + "p99": 96.12800180912018 + }, + "roundtrip": { + "p50": 247.26399779319763, + "p90": 306.36799335479736, + "p95": 325.1200020313263, + "p99": 389.8560106754303 + }, + "isolatedSum": { + "p50": 271.2000012397766, + "p90": 346.24000638723373, + "p95": 370.88001519441605, + "p99": 478.88000309467316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 212.09600567817688, + "p90": 273.69600534439087, + "p95": 297.791987657547, + "p99": 586.5920186042786 + }, + "combine": { + "p50": 58.17599967122078, + "p90": 74.81600344181061, + "p95": 79.71200346946716, + "p99": 97.120001912117 + }, + "roundtrip": { + "p50": 265.3760015964508, + "p90": 339.6799862384796, + "p95": 375.5840063095093, + "p99": 458.8159918785095 + }, + "isolatedSum": { + "p50": 270.27200534939766, + "p90": 348.5120087862015, + "p95": 377.50399112701416, + "p99": 683.7120205163956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.6960003376007, + "p90": 252.8960108757019, + "p95": 267.64801144599915, + "p99": 318.59201192855835 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 70.49600034952164, + "p95": 76.4160007238388, + "p99": 87.36000210046768 + }, + "roundtrip": { + "p50": 246.91200256347656, + "p90": 306.2080144882202, + "p95": 339.1680121421814, + "p99": 585.1519703865051 + }, + "isolatedSum": { + "p50": 255.61600178480148, + "p90": 323.39201122522354, + "p95": 344.06401216983795, + "p99": 405.95201402902603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 211.93599700927734, + "p90": 265.1520073413849, + "p95": 276.6079902648926, + "p99": 336.5760147571564 + }, + "combine": { + "p50": 59.647999703884125, + "p90": 77.02399790287018, + "p95": 82.94399827718735, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 259.5840096473694, + "p90": 317.6639974117279, + "p95": 331.9680094718933, + "p99": 400.06399154663086 + }, + "isolatedSum": { + "p50": 271.58399671316147, + "p90": 342.17600524425507, + "p95": 359.5519885420799, + "p99": 433.1200122833252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 214.01600539684296, + "p90": 275.90399980545044, + "p95": 303.9039969444275, + "p99": 374.30399656295776 + }, + "combine": { + "p50": 61.76000088453293, + "p90": 80.4160013794899, + "p95": 84.79999750852585, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 258.59200954437256, + "p90": 322.9120075702667, + "p95": 347.104012966156, + "p99": 422.39999771118164 + }, + "isolatedSum": { + "p50": 275.7760062813759, + "p90": 356.32000118494034, + "p95": 388.70399445295334, + "p99": 473.471999168396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 209.9200040102005, + "p90": 263.7439966201782, + "p95": 275.2639949321747, + "p99": 311.13600730895996 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 84.09599959850311, + "p95": 87.42400258779526, + "p99": 103.90400141477585 + }, + "roundtrip": { + "p50": 263.5520100593567, + "p90": 318.30400228500366, + "p95": 334.5920145511627, + "p99": 403.80799770355225 + }, + "isolatedSum": { + "p50": 277.50400453805923, + "p90": 347.83999621868134, + "p95": 362.68799751996994, + "p99": 415.0400087237358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 205.82400262355804, + "p90": 253.02401185035706, + "p95": 266.36800169944763, + "p99": 311.5200102329254 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 92.76799857616425, + "p95": 98.04800152778625, + "p99": 111.07199639081955 + }, + "roundtrip": { + "p50": 272.7360129356384, + "p90": 325.50400495529175, + "p95": 342.6879942417145, + "p99": 378.6559998989105 + }, + "isolatedSum": { + "p50": 284.2240035533905, + "p90": 345.7920104265213, + "p95": 364.4160032272339, + "p99": 422.59200662374496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 223.23200106620789, + "p90": 271.61601185798645, + "p95": 281.98400139808655, + "p99": 319.96798515319824 + }, + "combine": { + "p50": 96.25600278377533, + "p90": 112.44799941778183, + "p95": 115.61600118875504, + "p99": 127.36000120639801 + }, + "roundtrip": { + "p50": 324.864000082016, + "p90": 388.63998651504517, + "p95": 415.3279960155487, + "p99": 494.3999946117401 + }, + "isolatedSum": { + "p50": 319.4880038499832, + "p90": 384.0640112757683, + "p95": 397.6000025868416, + "p99": 447.32798635959625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1b077c8", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_3a17d46b", + "comparisonKey": "f29f35383c05d38b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:04.228393+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254401482", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", + "createdAt": "2026-06-26T17:30:04.228393+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.23999840021133, + "p90": 92.38400310277939, + "p95": 101.88800096511841, + "p99": 121.15199863910675 + }, + "combine": { + "p50": 58.88000130653381, + "p90": 70.3359991312027, + "p95": 78.65600287914276, + "p99": 101.43999755382538 + }, + "roundtrip": { + "p50": 159.32799875736237, + "p90": 200.3840059041977, + "p95": 213.69600296020508, + "p99": 243.58400702476501 + }, + "isolatedSum": { + "p50": 129.11999970674515, + "p90": 162.7200022339821, + "p95": 180.54400384426117, + "p99": 222.59199619293213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 67.55200028419495, + "p90": 91.45600348711014, + "p95": 102.33599692583084, + "p99": 144.57599818706512 + }, + "combine": { + "p50": 59.42400172352791, + "p90": 71.6480016708374, + "p95": 81.24800026416779, + "p99": 105.43999820947647 + }, + "roundtrip": { + "p50": 156.12800419330597, + "p90": 199.13600385189056, + "p95": 215.32799303531647, + "p99": 382.4000060558319 + }, + "isolatedSum": { + "p50": 126.97600200772285, + "p90": 163.10400515794754, + "p95": 183.58399718999863, + "p99": 250.0159963965416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.72800195217133, + "p90": 88.54400366544724, + "p95": 98.24000298976898, + "p99": 228.60799729824066 + }, + "combine": { + "p50": 60.92799827456474, + "p90": 72.92799651622772, + "p95": 77.7600035071373, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 160.67199409008026, + "p90": 186.20799481868744, + "p95": 196.44799828529358, + "p99": 242.14400351047516 + }, + "isolatedSum": { + "p50": 130.65600022673607, + "p90": 161.47200018167496, + "p95": 176.00000649690628, + "p99": 319.5199966430664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.49600034952164, + "p90": 97.47199714183807, + "p95": 107.84000158309937, + "p99": 151.90400183200836 + }, + "combine": { + "p50": 61.47199869155884, + "p90": 76.89599692821503, + "p95": 85.28000116348267, + "p99": 107.64800012111664 + }, + "roundtrip": { + "p50": 155.8080017566681, + "p90": 187.45599687099457, + "p95": 205.24799823760986, + "p99": 242.88000166416168 + }, + "isolatedSum": { + "p50": 131.96799904108047, + "p90": 174.3679940700531, + "p95": 193.12000274658203, + "p99": 259.552001953125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 68.4799998998642, + "p90": 86.94399893283844, + "p95": 95.58399766683578, + "p99": 126.08000636100769 + }, + "combine": { + "p50": 63.391998410224915, + "p90": 77.34400033950806, + "p95": 86.62399649620056, + "p99": 119.55200135707855 + }, + "roundtrip": { + "p50": 164.2879992723465, + "p90": 188.09600174427032, + "p95": 203.64800095558167, + "p99": 272.7999985218048 + }, + "isolatedSum": { + "p50": 131.8719983100891, + "p90": 164.2879992723465, + "p95": 182.20799416303635, + "p99": 245.63200771808624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.25599819421768, + "p90": 91.13600105047226, + "p95": 98.91200065612793, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 78.84799689054489, + "p95": 85.40800213813782, + "p99": 92.73599833250046 + }, + "roundtrip": { + "p50": 165.0879979133606, + "p90": 203.45599949359894, + "p95": 221.15199267864227, + "p99": 462.911993265152 + }, + "isolatedSum": { + "p50": 134.5279961824417, + "p90": 169.98399794101715, + "p95": 184.32000279426575, + "p99": 207.5200006365776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 74.46400076150894, + "p90": 89.21600133180618, + "p95": 99.32799637317657, + "p99": 120.57600170373917 + }, + "combine": { + "p50": 80.44800162315369, + "p90": 89.75999802350998, + "p95": 94.65599805116653, + "p99": 122.30399996042252 + }, + "roundtrip": { + "p50": 183.45600366592407, + "p90": 210.78400313854218, + "p95": 228.5439968109131, + "p99": 287.4239981174469 + }, + "isolatedSum": { + "p50": 154.91200238466263, + "p90": 178.97599935531616, + "p95": 193.9839944243431, + "p99": 242.88000166416168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.67199718952179, + "p90": 102.7199998497963, + "p95": 111.93600296974182, + "p99": 128.9920061826706 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 108.86400192975998, + "p95": 114.43199962377548, + "p99": 124.1919994354248 + }, + "roundtrip": { + "p50": 208.99200439453125, + "p90": 229.34399545192719, + "p95": 239.9040013551712, + "p99": 260.22401452064514 + }, + "isolatedSum": { + "p50": 185.5039969086647, + "p90": 211.58400177955627, + "p95": 226.3680025935173, + "p99": 253.1840056180954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2649fd4", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_50a9ee63", + "comparisonKey": "aae31d5755e4ce66", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:20.768220+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm) [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254418007", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", + "createdAt": "2026-06-26T17:30:20.768220+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.09599873423576, + "p90": 86.7839977145195, + "p95": 94.97600048780441, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 60.864001512527466, + "p90": 79.64800298213959, + "p95": 85.7279971241951, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 148.60799908638, + "p90": 199.42399859428406, + "p95": 207.45599269866943, + "p99": 260.5440020561218 + }, + "isolatedSum": { + "p50": 116.96000024676323, + "p90": 166.4320006966591, + "p95": 180.7039976119995, + "p99": 219.2320004105568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 52.06400156021118, + "p90": 83.42400193214417, + "p95": 88.99199962615967, + "p99": 123.80799651145935 + }, + "combine": { + "p50": 59.808000922203064, + "p90": 77.91999727487564, + "p95": 84.48000252246857, + "p99": 130.78400492668152 + }, + "roundtrip": { + "p50": 145.82400023937225, + "p90": 194.91200149059296, + "p95": 215.10399878025055, + "p99": 273.79199862480164 + }, + "isolatedSum": { + "p50": 111.87200248241425, + "p90": 161.3439992070198, + "p95": 173.47200214862823, + "p99": 254.59200143814087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 56.60799890756607, + "p90": 89.08800035715103, + "p95": 98.91200065612793, + "p99": 111.7440015077591 + }, + "combine": { + "p50": 60.7680007815361, + "p90": 78.52800190448761, + "p95": 84.22400057315826, + "p99": 97.95200079679489 + }, + "roundtrip": { + "p50": 143.74400675296783, + "p90": 192.7040070295334, + "p95": 212.0320051908493, + "p99": 294.46399211883545 + }, + "isolatedSum": { + "p50": 117.37599968910217, + "p90": 167.61600226163864, + "p95": 183.1360012292862, + "p99": 209.69600230455399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.25599929690361, + "p90": 80.09599894285202, + "p95": 88.35200220346451, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 60.736000537872314, + "p90": 79.48800176382065, + "p95": 85.60000360012054, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 141.12000167369843, + "p90": 183.87199938297272, + "p95": 195.23200392723083, + "p99": 286.24001145362854 + }, + "isolatedSum": { + "p50": 112.99199983477592, + "p90": 159.58400070667267, + "p95": 173.95200580358505, + "p99": 218.01599860191345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.47200110554695, + "p90": 77.60000228881836, + "p95": 85.05599945783615, + "p99": 93.9520001411438 + }, + "combine": { + "p50": 62.49599903821945, + "p90": 77.34400033950806, + "p95": 82.11199939250946, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 142.17600226402283, + "p90": 183.77600610256195, + "p95": 197.79199361801147, + "p99": 241.5360063314438 + }, + "isolatedSum": { + "p50": 115.9680001437664, + "p90": 154.94400262832642, + "p95": 167.1679988503456, + "p99": 189.7279992699623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.39200082421303, + "p90": 81.05599880218506, + "p95": 89.15200084447861, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 66.39999896287918, + "p90": 84.927998483181, + "p95": 88.3840024471283, + "p99": 101.3759970664978 + }, + "roundtrip": { + "p50": 148.15999567508698, + "p90": 191.23199582099915, + "p95": 200.57600736618042, + "p99": 228.4799963235855 + }, + "isolatedSum": { + "p50": 121.79199978709221, + "p90": 165.98399728536606, + "p95": 177.5360032916069, + "p99": 210.9759971499443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 65.08799642324448, + "p90": 90.97599983215332, + "p95": 100.63999891281128, + "p99": 148.28799664974213 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 96.54399752616882, + "p95": 99.23200309276581, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 171.424001455307, + "p90": 216.8000042438507, + "p95": 232.1919947862625, + "p99": 288.38399052619934 + }, + "isolatedSum": { + "p50": 146.14399522542953, + "p90": 187.51999735832214, + "p95": 199.8720020055771, + "p99": 254.8159956932068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.00799745321274, + "p90": 94.14400160312653, + "p95": 99.04000163078308, + "p99": 115.23199826478958 + }, + "combine": { + "p50": 97.34400361776352, + "p90": 115.84000289440155, + "p95": 119.03999745845795, + "p99": 133.56800377368927 + }, + "roundtrip": { + "p50": 197.79199361801147, + "p90": 227.80799865722656, + "p95": 237.8239929676056, + "p99": 276.8320143222809 + }, + "isolatedSum": { + "p50": 172.35200107097626, + "p90": 209.98400449752808, + "p95": 218.07999908924103, + "p99": 248.80000203847885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fdd09e42", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_4f483b60", + "comparisonKey": "95dcff383339100e", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:13.723754+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271629782", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271629782", + "createdAt": "2026-06-26T23:50:13.723754+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 51.04000121355057, + "p90": 76.64000242948532, + "p95": 84.48000252246857, + "p99": 115.32799899578094 + }, + "combine": { + "p50": 59.20000001788139, + "p90": 77.47200131416321, + "p95": 87.13600039482117, + "p99": 133.85599851608276 + }, + "roundtrip": { + "p50": 140.73599874973297, + "p90": 177.18400061130524, + "p95": 189.60000574588776, + "p99": 239.3919974565506 + }, + "isolatedSum": { + "p50": 110.24000123143196, + "p90": 154.11200374364853, + "p95": 171.61600291728973, + "p99": 249.1839975118637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 54.71999943256378, + "p90": 82.40000158548355, + "p95": 88.16000074148178, + "p99": 115.10399729013443 + }, + "combine": { + "p50": 60.19200012087822, + "p90": 74.78400319814682, + "p95": 81.44000172615051, + "p99": 106.84800148010254 + }, + "roundtrip": { + "p50": 147.13600277900696, + "p90": 190.75199961662292, + "p95": 217.79200434684753, + "p99": 253.79198789596558 + }, + "isolatedSum": { + "p50": 114.911999553442, + "p90": 157.18400478363037, + "p95": 169.6000024676323, + "p99": 221.95199877023697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 54.048001766204834, + "p90": 77.53600180149078, + "p95": 84.99199897050858, + "p99": 106.4319983124733 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 75.83999633789062, + "p95": 82.36800134181976, + "p99": 106.84800148010254 + }, + "roundtrip": { + "p50": 144.31999623775482, + "p90": 184.4799965620041, + "p95": 193.9840018749237, + "p99": 240.83200097084045 + }, + "isolatedSum": { + "p50": 114.75200206041336, + "p90": 153.3759981393814, + "p95": 167.36000031232834, + "p99": 213.27999979257584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 54.687999188899994, + "p90": 88.25600147247314, + "p95": 94.46399658918381, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 61.824001371860504, + "p90": 77.02399790287018, + "p95": 83.26400071382523, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 140.35199582576752, + "p90": 180.09600043296814, + "p95": 193.53599846363068, + "p99": 230.5919975042343 + }, + "isolatedSum": { + "p50": 116.5120005607605, + "p90": 165.27999937534332, + "p95": 177.72799730300903, + "p99": 222.07999974489212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 54.17599901556969, + "p90": 81.11999928951263, + "p95": 88.8959988951683, + "p99": 129.4720023870468 + }, + "combine": { + "p50": 62.3680017888546, + "p90": 78.36800068616867, + "p95": 82.56000280380249, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 140.47999680042267, + "p90": 177.66399681568146, + "p95": 196.99199497699738, + "p99": 237.7600073814392 + }, + "isolatedSum": { + "p50": 116.54400080442429, + "p90": 159.4879999756813, + "p95": 171.4560016989708, + "p99": 230.68800568580627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.24800005555153, + "p90": 79.64800298213959, + "p95": 85.91999858617783, + "p99": 104.67199981212616 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 82.33600109815598, + "p95": 85.7279971241951, + "p99": 99.10400211811066 + }, + "roundtrip": { + "p50": 145.1520025730133, + "p90": 178.1120002269745, + "p95": 187.6479983329773, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 125.66399946808815, + "p90": 161.98400408029556, + "p95": 171.64799571037292, + "p99": 203.77600193023682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 66.30399823188782, + "p90": 83.90399813652039, + "p95": 90.17600119113922, + "p99": 149.1840034723282 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 93.79199892282486, + "p95": 98.88000041246414, + "p99": 114.01599645614624 + }, + "roundtrip": { + "p50": 164.8319959640503, + "p90": 199.48799908161163, + "p95": 211.2639993429184, + "p99": 271.93599939346313 + }, + "isolatedSum": { + "p50": 145.02400159835815, + "p90": 177.69599705934525, + "p95": 189.05600160360336, + "p99": 263.1999999284744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.82400333881378, + "p90": 90.62399715185165, + "p95": 95.39200365543365, + "p99": 114.52800035476685 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 112.31999844312668, + "p95": 115.77600240707397, + "p99": 130.49599528312683 + }, + "roundtrip": { + "p50": 199.77599382400513, + "p90": 228.32000255584717, + "p95": 247.29600548744202, + "p99": 297.88801074028015 + }, + "isolatedSum": { + "p50": 171.07200622558594, + "p90": 202.94399559497833, + "p95": 211.16800606250763, + "p99": 245.02399563789368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-39796825", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_ff232ea5", + "comparisonKey": "643e1b15925a53af", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:34.222899+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271653486", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", + "createdAt": "2026-06-26T23:51:34.222899+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 29.08799983561039, + "p90": 36.41600161790848, + "p95": 44.28799822926521, + "p99": 63.551999628543854 + }, + "combine": { + "p50": 40.95999896526337, + "p90": 64.70400094985962, + "p95": 74.8480036854744, + "p99": 125.69600343704224 + }, + "roundtrip": { + "p50": 1856.8320274353027, + "p90": 1879.7760009765625, + "p95": 1894.495964050293, + "p99": 2116.607904434204 + }, + "isolatedSum": { + "p50": 70.04799880087376, + "p90": 101.1200025677681, + "p95": 119.13600191473961, + "p99": 189.2480030655861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.76799926161766, + "p90": 36.22400015592575, + "p95": 42.11200028657913, + "p99": 48.767998814582825 + }, + "combine": { + "p50": 36.06399893760681, + "p90": 45.75999826192856, + "p95": 52.2879995405674, + "p99": 84.1279998421669 + }, + "roundtrip": { + "p50": 1847.4880456924438, + "p90": 1861.0880374908447, + "p95": 1871.3279962539673, + "p99": 2004.607915878296 + }, + "isolatedSum": { + "p50": 64.83199819922447, + "p90": 81.98399841785431, + "p95": 94.39999982714653, + "p99": 132.89599865674973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.575999662280083, + "p90": 38.816001266241074, + "p95": 45.632001012563705, + "p99": 57.95200169086456 + }, + "combine": { + "p50": 41.69600084424019, + "p90": 59.93599817156792, + "p95": 68.06399673223495, + "p99": 170.30400037765503 + }, + "roundtrip": { + "p50": 1848.3840227127075, + "p90": 1869.920015335083, + "p95": 1881.9199800491333, + "p99": 1995.0400590896606 + }, + "isolatedSum": { + "p50": 70.27200050652027, + "p90": 98.75199943780899, + "p95": 113.69599774479866, + "p99": 228.2560020685196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 29.37600016593933, + "p90": 37.21600025892258, + "p95": 50.65599828958511, + "p99": 62.65600025653839 + }, + "combine": { + "p50": 47.520000487565994, + "p90": 61.664000153541565, + "p95": 68.57600063085556, + "p99": 103.2319962978363 + }, + "roundtrip": { + "p50": 1859.2000007629395, + "p90": 1878.6879777908325, + "p95": 1886.1440420150757, + "p99": 1924.1600036621094 + }, + "isolatedSum": { + "p50": 76.89600065350533, + "p90": 98.88000041246414, + "p95": 119.23199892044067, + "p99": 165.8879965543747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.039999797940254, + "p90": 43.83999854326248, + "p95": 53.63199859857559, + "p99": 66.01600348949432 + }, + "combine": { + "p50": 52.25599929690361, + "p90": 69.43999975919724, + "p95": 82.40000158548355, + "p99": 131.99999928474426 + }, + "roundtrip": { + "p50": 1864.0960454940796, + "p90": 1884.160041809082, + "p95": 1898.1759548187256, + "p99": 1969.1519737243652 + }, + "isolatedSum": { + "p50": 83.29599909484386, + "p90": 113.27999830245972, + "p95": 136.03200018405914, + "p99": 198.0160027742386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.79200026392937, + "p90": 45.27999833226204, + "p95": 49.31199923157692, + "p99": 58.14399942755699 + }, + "combine": { + "p50": 47.839999198913574, + "p90": 64.25599753856659, + "p95": 70.36799937486649, + "p99": 101.53599828481674 + }, + "roundtrip": { + "p50": 1865.056037902832, + "p90": 1881.5360069274902, + "p95": 1888.8959884643555, + "p99": 1917.7600145339966 + }, + "isolatedSum": { + "p50": 81.63199946284294, + "p90": 109.53599587082863, + "p95": 119.6799986064434, + "p99": 159.67999771237373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 40.44799879193306, + "p90": 49.6320016682148, + "p95": 52.799999713897705, + "p99": 64.96000289916992 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 81.31200075149536, + "p95": 98.7199991941452, + "p99": 231.1680018901825 + }, + "roundtrip": { + "p50": 1885.632038116455, + "p90": 1903.3279418945312, + "p95": 1914.080023765564, + "p99": 2039.776086807251 + }, + "isolatedSum": { + "p50": 104.0319986641407, + "p90": 130.94400241971016, + "p95": 151.5199989080429, + "p99": 296.1280047893524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.552001386880875, + "p90": 60.60799956321716, + "p95": 62.65600025653839, + "p99": 73.82400333881378 + }, + "combine": { + "p50": 86.81599795818329, + "p90": 96.19200229644775, + "p95": 108.47999900579453, + "p99": 146.7839926481247 + }, + "roundtrip": { + "p50": 1922.6560592651367, + "p90": 1938.4959936141968, + "p95": 1957.0879936218262, + "p99": 2130.3679943084717 + }, + "isolatedSum": { + "p50": 138.36799934506416, + "p90": 156.80000185966492, + "p95": 171.13599926233292, + "p99": 220.60799598693848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dbb437b5", + "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_7ec76e6d", + "comparisonKey": "9a87b27b98bf2d7a", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:51:35.330044+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271656517", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", + "createdAt": "2026-06-26T23:51:35.330044+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 28.76799926161766, + "p90": 38.88000175356865, + "p95": 44.73600164055824, + "p99": 61.15199998021126 + }, + "combine": { + "p50": 36.768000572919846, + "p90": 48.287998884916306, + "p95": 57.53599852323532, + "p99": 90.81599861383438 + }, + "roundtrip": { + "p50": 1847.7439880371094, + "p90": 1855.6159734725952, + "p95": 1860.543966293335, + "p99": 1893.2160139083862 + }, + "isolatedSum": { + "p50": 65.5359998345375, + "p90": 87.16800063848495, + "p95": 102.27200016379356, + "p99": 151.96799859404564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.991999104619026, + "p90": 33.376000821590424, + "p95": 37.02399879693985, + "p99": 41.05599969625473 + }, + "combine": { + "p50": 37.59999945759773, + "p90": 49.375999718904495, + "p95": 58.62399935722351, + "p99": 235.83999276161194 + }, + "roundtrip": { + "p50": 1847.6799726486206, + "p90": 1855.936050415039, + "p95": 1861.4720106124878, + "p99": 1959.007978439331 + }, + "isolatedSum": { + "p50": 66.59199856221676, + "p90": 82.75200054049492, + "p95": 95.64799815416336, + "p99": 276.89599245786667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.16000021994114, + "p90": 52.41600051522255, + "p95": 58.59199911355972, + "p99": 83.23200047016144 + }, + "combine": { + "p50": 36.959998309612274, + "p90": 48.06400090456009, + "p95": 54.59199845790863, + "p99": 94.59199756383896 + }, + "roundtrip": { + "p50": 1848.3200073242188, + "p90": 1858.62398147583, + "p95": 1864.5440340042114, + "p99": 1925.9519577026367 + }, + "isolatedSum": { + "p50": 65.11999852955341, + "p90": 100.48000141978264, + "p95": 113.18399757146835, + "p99": 177.8239980340004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 29.7279991209507, + "p90": 36.3520011305809, + "p95": 41.08799993991852, + "p99": 52.191998809576035 + }, + "combine": { + "p50": 37.88800165057182, + "p90": 50.52800104022026, + "p95": 61.24800071120262, + "p99": 175.7120043039322 + }, + "roundtrip": { + "p50": 1849.4080305099487, + "p90": 1862.7519607543945, + "p95": 1875.4240274429321, + "p99": 1930.5599927902222 + }, + "isolatedSum": { + "p50": 67.61600077152252, + "p90": 86.88000217080116, + "p95": 102.33600065112114, + "p99": 227.90400311350822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.775999814271927, + "p90": 37.856001406908035, + "p95": 43.007999658584595, + "p99": 52.2879995405674 + }, + "combine": { + "p50": 41.280001401901245, + "p90": 52.319999784231186, + "p95": 64.41599875688553, + "p99": 140.28799533843994 + }, + "roundtrip": { + "p50": 1854.848027229309, + "p90": 1876.3200044631958, + "p95": 1915.3599739074707, + "p99": 1982.6879501342773 + }, + "isolatedSum": { + "p50": 73.05600121617317, + "p90": 90.17600119113922, + "p95": 107.42399841547012, + "p99": 192.57599487900734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.344000577926636, + "p90": 36.159999668598175, + "p95": 38.30400109291077, + "p99": 46.14400118589401 + }, + "combine": { + "p50": 46.30399867892265, + "p90": 56.223999708890915, + "p95": 66.49599969387054, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 1862.8159761428833, + "p90": 1875.2959966659546, + "p95": 1890.6559944152832, + "p99": 1946.6559886932373 + }, + "isolatedSum": { + "p50": 79.64799925684929, + "p90": 92.38399937748909, + "p95": 104.80000078678131, + "p99": 155.39199858903885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 39.68000039458275, + "p90": 51.58400163054466, + "p95": 57.72799998521805, + "p99": 97.63199836015701 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 75.29599964618683, + "p95": 94.2080020904541, + "p99": 319.7759985923767 + }, + "roundtrip": { + "p50": 1882.3360204696655, + "p90": 1892.0639753341675, + "p95": 1907.5520038604736, + "p99": 1997.3440170288086 + }, + "isolatedSum": { + "p50": 100.38400068879128, + "p90": 126.88000127673149, + "p95": 151.93600207567215, + "p99": 417.4079969525337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.552001386880875, + "p90": 55.07199838757515, + "p95": 59.007998555898666, + "p99": 66.11199676990509 + }, + "combine": { + "p50": 86.43200248479843, + "p90": 93.08800101280212, + "p95": 100.89600086212158, + "p99": 167.10400581359863 + }, + "roundtrip": { + "p50": 1921.3759899139404, + "p90": 1930.4640293121338, + "p95": 1935.968041419983, + "p99": 1968.6399698257446 + }, + "isolatedSum": { + "p50": 137.9840038716793, + "p90": 148.15999940037727, + "p95": 159.90399941802025, + "p99": 233.21600258350372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1caa7ff5", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_df102230", + "comparisonKey": "2ce1d8f2e79d5005", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:08.227503+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254435010", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", + "createdAt": "2026-06-26T17:31:08.227503+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 28.736000880599022, + "p90": 42.24000126123428, + "p95": 44.76799815893173, + "p99": 50.97600072622299 + }, + "combine": { + "p50": 37.087999284267426, + "p90": 44.256001710891724, + "p95": 49.6320016682148, + "p99": 65.60000032186508 + }, + "roundtrip": { + "p50": 1824.4800567626953, + "p90": 1831.7760229110718, + "p95": 1838.3680582046509, + "p99": 1884.1919898986816 + }, + "isolatedSum": { + "p50": 65.82400016486645, + "p90": 86.496002972126, + "p95": 94.39999982714653, + "p99": 116.57600104808807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.00000086426735, + "p90": 33.31200033426285, + "p95": 37.66399994492531, + "p99": 50.36799982190132 + }, + "combine": { + "p50": 36.86400130391121, + "p90": 45.27999833226204, + "p95": 51.29599943757057, + "p99": 124.1919994354248 + }, + "roundtrip": { + "p50": 1824.9599933624268, + "p90": 1835.4239463806152, + "p95": 1843.8400030136108, + "p99": 1961.7279767990112 + }, + "isolatedSum": { + "p50": 64.86400216817856, + "p90": 78.59199866652489, + "p95": 88.95999938249588, + "p99": 174.55999925732613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.48000079393387, + "p90": 33.59999880194664, + "p95": 36.41600161790848, + "p99": 42.33599826693535 + }, + "combine": { + "p50": 37.53599897027016, + "p90": 47.839999198913574, + "p95": 62.144000083208084, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 1825.8240222930908, + "p90": 1833.9519500732422, + "p95": 1842.0480489730835, + "p99": 1925.0880479812622 + }, + "isolatedSum": { + "p50": 66.01599976420403, + "p90": 81.43999800086021, + "p95": 98.56000170111656, + "p99": 178.78399416804314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 28.28799933195114, + "p90": 32.00000151991844, + "p95": 34.55999866127968, + "p99": 39.744000881910324 + }, + "combine": { + "p50": 37.43999823927879, + "p90": 46.78399860858917, + "p95": 53.69599908590317, + "p99": 124.64000284671783 + }, + "roundtrip": { + "p50": 1826.3360261917114, + "p90": 1834.1439962387085, + "p95": 1840.1600122451782, + "p99": 1865.6320571899414 + }, + "isolatedSum": { + "p50": 65.72799757122993, + "p90": 78.78400012850761, + "p95": 88.25599774718285, + "p99": 164.38400372862816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.136000528931618, + "p90": 34.94400158524513, + "p95": 37.856001406908035, + "p99": 46.39999940991402 + }, + "combine": { + "p50": 39.264000952243805, + "p90": 44.28799822926521, + "p95": 46.46399989724159, + "p99": 77.85599678754807 + }, + "roundtrip": { + "p50": 1830.4959535598755, + "p90": 1838.304042816162, + "p95": 1842.78404712677, + "p99": 1957.919955253601 + }, + "isolatedSum": { + "p50": 70.40000148117542, + "p90": 79.23199981451035, + "p95": 84.32000130414963, + "p99": 124.25599619746208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 32.896000891923904, + "p90": 35.96799820661545, + "p95": 39.135999977588654, + "p99": 45.56800052523613 + }, + "combine": { + "p50": 45.791998505592346, + "p90": 54.016001522541046, + "p95": 83.0719992518425, + "p99": 153.56799960136414 + }, + "roundtrip": { + "p50": 1840.1600122451782, + "p90": 1847.5840091705322, + "p95": 1853.9199829101562, + "p99": 1896.1600065231323 + }, + "isolatedSum": { + "p50": 78.68799939751625, + "p90": 89.9839997291565, + "p95": 122.20799922943115, + "p99": 199.13600012660027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 38.84800150990486, + "p90": 42.43199899792671, + "p95": 47.16800153255463, + "p99": 62.144000083208084 + }, + "combine": { + "p50": 59.67999994754791, + "p90": 66.14399701356888, + "p95": 83.16799998283386, + "p99": 121.21599912643433 + }, + "roundtrip": { + "p50": 1859.5199584960938, + "p90": 1866.495966911316, + "p95": 1875.264048576355, + "p99": 1916.1280393600464 + }, + "isolatedSum": { + "p50": 98.52800145745277, + "p90": 108.57599601149559, + "p95": 130.3360015153885, + "p99": 183.3599992096424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.80799961090088, + "p90": 55.135998874902725, + "p95": 59.776000678539276, + "p99": 68.83200258016586 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 92.03200042247772, + "p95": 95.74399888515472, + "p99": 156.41599893569946 + }, + "roundtrip": { + "p50": 1899.392008781433, + "p90": 1905.2480459213257, + "p95": 1909.440040588379, + "p99": 1973.3760356903076 + }, + "isolatedSum": { + "p50": 138.20800185203552, + "p90": 147.16799929738045, + "p95": 155.519999563694, + "p99": 225.24800151586533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-858b05cb", + "identity": "h200|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_2b594dfd", + "comparisonKey": "a4b473bf0791db70", + "schemaVersion": 3, + "generatedAt": "2026-06-27T15:56:11.323618+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8-directcast", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8-directcast", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28294159741", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294159741", + "createdAt": "2026-06-27T15:56:11.323618+00:00", + "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.75200164318085, + "p90": 95.23200243711472, + "p95": 102.75200009346008, + "p99": 119.13599818944931 + }, + "combine": { + "p50": 59.007998555898666, + "p90": 76.1599987745285, + "p95": 82.0159986615181, + "p99": 103.00800204277039 + }, + "roundtrip": { + "p50": 152.54400670528412, + "p90": 193.12000274658203, + "p95": 204.8960030078888, + "p99": 230.68800568580627 + }, + "isolatedSum": { + "p50": 125.76000019907951, + "p90": 171.39200121164322, + "p95": 184.76799875497818, + "p99": 222.1440002322197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.58399987220764, + "p90": 91.00800007581711, + "p95": 99.80800002813339, + "p99": 118.52800101041794 + }, + "combine": { + "p50": 58.94400179386139, + "p90": 70.592001080513, + "p95": 77.82399654388428, + "p99": 87.77599781751633 + }, + "roundtrip": { + "p50": 151.32799744606018, + "p90": 191.96799397468567, + "p95": 202.4639993906021, + "p99": 234.17599499225616 + }, + "isolatedSum": { + "p50": 122.52800166606903, + "p90": 161.6000011563301, + "p95": 177.63199657201767, + "p99": 206.30399882793427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 66.01600348949432, + "p90": 93.9520001411438, + "p95": 105.59999942779541, + "p99": 121.8239963054657 + }, + "combine": { + "p50": 60.35200133919716, + "p90": 74.72000271081924, + "p95": 78.5600021481514, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 154.84799444675446, + "p90": 194.5600062608719, + "p95": 203.19999754428864, + "p99": 230.335995554924 + }, + "isolatedSum": { + "p50": 126.36800482869148, + "p90": 168.67200285196304, + "p95": 184.1600015759468, + "p99": 210.55999398231506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.10399687290192, + "p90": 94.4959968328476, + "p95": 104.76800054311752, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 78.04799824953079, + "p95": 82.17599987983704, + "p99": 98.75199943780899 + }, + "roundtrip": { + "p50": 155.93600273132324, + "p90": 198.2399970293045, + "p95": 208.03199708461761, + "p99": 242.8479939699173 + }, + "isolatedSum": { + "p50": 128.1919963657856, + "p90": 172.5439950823784, + "p95": 186.94400042295456, + "p99": 221.75999730825424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 67.16799736022949, + "p90": 97.31200337409973, + "p95": 106.9440022110939, + "p99": 129.37599420547485 + }, + "combine": { + "p50": 61.5679994225502, + "p90": 77.44000107049942, + "p95": 81.66400343179703, + "p99": 91.64799749851227 + }, + "roundtrip": { + "p50": 154.4319987297058, + "p90": 195.3279972076416, + "p95": 206.68800175189972, + "p99": 227.7120053768158 + }, + "isolatedSum": { + "p50": 128.7359967827797, + "p90": 174.75200444459915, + "p95": 188.60800564289093, + "p99": 221.02399170398712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 67.80800223350525, + "p90": 92.19200164079666, + "p95": 102.4319976568222, + "p99": 133.7279975414276 + }, + "combine": { + "p50": 67.74400174617767, + "p90": 82.84799754619598, + "p95": 87.61599659919739, + "p99": 97.120001912117 + }, + "roundtrip": { + "p50": 159.13599729537964, + "p90": 200.06400346755981, + "p95": 211.84000372886658, + "p99": 244.6720004081726 + }, + "isolatedSum": { + "p50": 135.55200397968292, + "p90": 175.03999918699265, + "p95": 190.0479942560196, + "p99": 230.84799945354462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.60000228881836, + "p90": 101.59999877214432, + "p95": 109.11999642848969, + "p99": 125.91999769210815 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 94.08000111579895, + "p95": 100.47999769449234, + "p99": 115.52000045776367 + }, + "roundtrip": { + "p50": 180.16000092029572, + "p90": 224.95999932289124, + "p95": 240.79999327659607, + "p99": 329.75998520851135 + }, + "isolatedSum": { + "p50": 156.2880054116249, + "p90": 195.67999988794327, + "p95": 209.59999412298203, + "p99": 241.43999814987183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.7839977145195, + "p90": 109.79200154542923, + "p95": 122.78400361537933, + "p99": 158.11200439929962 + }, + "combine": { + "p50": 96.3520035147667, + "p90": 111.84000223875046, + "p95": 115.77600240707397, + "p99": 128.22400033473969 + }, + "roundtrip": { + "p50": 209.88799631595612, + "p90": 239.1359955072403, + "p95": 253.9840042591095, + "p99": 331.84000849723816 + }, + "isolatedSum": { + "p50": 183.1360012292862, + "p90": 221.6320037841797, + "p95": 238.5600060224533, + "p99": 286.3360047340393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-339f09b5", + "identity": "h200|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_7351c157", + "comparisonKey": "156f1708b9a7b98d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T15:56:14.997520+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8-pertoken", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8-pertoken", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28294163450", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294163450", + "createdAt": "2026-06-27T15:56:14.997520+00:00", + "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.81600213050842, + "p90": 88.67199718952179, + "p95": 102.9760017991066, + "p99": 120.60800194740295 + }, + "combine": { + "p50": 59.29600074887276, + "p90": 72.86400347948074, + "p95": 78.75200361013412, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 154.6880006790161, + "p90": 198.2720047235489, + "p95": 219.55199539661407, + "p99": 281.69599175453186 + }, + "isolatedSum": { + "p50": 126.11200287938118, + "p90": 161.53600066900253, + "p95": 181.72800540924072, + "p99": 207.45600014925003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.43199986219406, + "p90": 104.73600029945374, + "p95": 119.32799965143204, + "p99": 193.7279999256134 + }, + "combine": { + "p50": 59.10399928689003, + "p90": 71.32799923419952, + "p95": 80.28800040483475, + "p99": 100.16000270843506 + }, + "roundtrip": { + "p50": 155.03999590873718, + "p90": 205.53599298000336, + "p95": 231.58399760723114, + "p99": 357.08799958229065 + }, + "isolatedSum": { + "p50": 129.5359991490841, + "p90": 176.06399953365326, + "p95": 199.61600005626678, + "p99": 293.88800263404846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.92800331115723, + "p90": 96.38399630784988, + "p95": 109.66400057077408, + "p99": 130.97600638866425 + }, + "combine": { + "p50": 61.02399900555611, + "p90": 75.68000257015228, + "p95": 83.61600339412689, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 158.04800391197205, + "p90": 202.94399559497833, + "p95": 213.53599429130554, + "p99": 251.19999051094055 + }, + "isolatedSum": { + "p50": 129.95200231671333, + "p90": 172.06399887800217, + "p95": 193.28000396490097, + "p99": 233.76000672578812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 66.01600348949432, + "p90": 96.25600278377533, + "p95": 106.72000050544739, + "p99": 128.86400520801544 + }, + "combine": { + "p50": 60.19200012087822, + "p90": 72.92799651622772, + "p95": 79.03999835252762, + "p99": 88.19200098514557 + }, + "roundtrip": { + "p50": 153.85599434375763, + "p90": 197.56799936294556, + "p95": 215.64799547195435, + "p99": 285.2480113506317 + }, + "isolatedSum": { + "p50": 126.20800361037254, + "p90": 169.18399930000305, + "p95": 185.759998857975, + "p99": 217.056006193161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 66.23999774456024, + "p90": 92.99200028181076, + "p95": 102.81600058078766, + "p99": 128.9599984884262 + }, + "combine": { + "p50": 63.1679967045784, + "p90": 78.36800068616867, + "p95": 84.35200154781342, + "p99": 111.00800335407257 + }, + "roundtrip": { + "p50": 161.79199516773224, + "p90": 204.48000729084015, + "p95": 219.26400065422058, + "p99": 282.4319899082184 + }, + "isolatedSum": { + "p50": 129.40799444913864, + "p90": 171.36000096797943, + "p95": 187.16800212860107, + "p99": 239.96800184249878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.68000191450119, + "p90": 102.75200009346008, + "p95": 115.68000167608261, + "p99": 132.89600610733032 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 83.42400193214417, + "p95": 88.25600147247314, + "p99": 106.72000050544739 + }, + "roundtrip": { + "p50": 166.04800522327423, + "p90": 211.64800226688385, + "p95": 225.79200565814972, + "p99": 305.7920038700104 + }, + "isolatedSum": { + "p50": 140.4160037636757, + "p90": 186.17600202560425, + "p95": 203.93600314855576, + "p99": 239.6160066127777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.95199751853943, + "p90": 101.69599950313568, + "p95": 111.32799834012985, + "p99": 139.0720009803772 + }, + "combine": { + "p50": 79.26400005817413, + "p90": 92.57599711418152, + "p95": 98.91200065612793, + "p99": 126.36800110340118 + }, + "roundtrip": { + "p50": 175.48799514770508, + "p90": 220.32000124454498, + "p95": 231.64799809455872, + "p99": 279.4559895992279 + }, + "isolatedSum": { + "p50": 157.21599757671356, + "p90": 194.2719966173172, + "p95": 210.23999899625778, + "p99": 265.4400020837784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.07199990749359, + "p90": 109.56799983978271, + "p95": 121.21599912643433, + "p99": 166.20799899101257 + }, + "combine": { + "p50": 96.6079980134964, + "p90": 113.66400122642517, + "p95": 119.64800208806992, + "p99": 157.1200042963028 + }, + "roundtrip": { + "p50": 212.44800090789795, + "p90": 258.36798548698425, + "p95": 284.41599011421204, + "p99": 348.9600121974945 + }, + "isolatedSum": { + "p50": 183.67999792099, + "p90": 223.23200106620789, + "p95": 240.86400121450424, + "p99": 323.32800328731537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16f8b2e1", + "identity": "h200|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_d982b749", + "comparisonKey": "465ef3841664f1ea", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:26.678836+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287506806", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287506806", + "createdAt": "2026-06-27T11:14:26.678836+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 89.79199826717377, + "p90": 110.84800213575363, + "p95": 119.00799721479416, + "p99": 141.92000031471252 + }, + "combine": { + "p50": 83.3280012011528, + "p90": 95.96800059080124, + "p95": 100.38399696350098, + "p99": 112.35199868679047 + }, + "roundtrip": { + "p50": 150.81599354743958, + "p90": 175.64800381660461, + "p95": 183.96799266338348, + "p99": 206.59199357032776 + }, + "isolatedSum": { + "p50": 173.11999946832657, + "p90": 206.81600272655487, + "p95": 219.39199417829514, + "p99": 254.271999001503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.23999750614166, + "p90": 128.48000228405, + "p95": 134.88000631332397, + "p99": 166.143998503685 + }, + "combine": { + "p50": 104.86400127410889, + "p90": 116.95999652147293, + "p95": 122.52800166606903, + "p99": 139.3280029296875 + }, + "roundtrip": { + "p50": 193.95199418067932, + "p90": 219.32800114154816, + "p95": 232.16000199317932, + "p99": 261.79200410842896 + }, + "isolatedSum": { + "p50": 215.10399878025055, + "p90": 245.43999880552292, + "p95": 257.408007979393, + "p99": 305.4720014333725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 146.2080031633377, + "p90": 167.64800250530243, + "p95": 177.37600207328796, + "p99": 212.38400042057037 + }, + "combine": { + "p50": 152.63999998569489, + "p90": 164.48000073432922, + "p95": 170.68800330162048, + "p99": 188.960000872612 + }, + "roundtrip": { + "p50": 272.99201488494873, + "p90": 291.1359965801239, + "p95": 302.0159900188446, + "p99": 328.575998544693 + }, + "isolatedSum": { + "p50": 298.8480031490326, + "p90": 332.12800323963165, + "p95": 348.06400537490845, + "p99": 401.3440012931824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 215.488001704216, + "p90": 237.34399676322937, + "p95": 246.94399535655975, + "p99": 288.03199529647827 + }, + "combine": { + "p50": 248.35200607776642, + "p90": 259.71201062202454, + "p95": 266.4639949798584, + "p99": 279.00800108909607 + }, + "roundtrip": { + "p50": 438.4959936141968, + "p90": 459.80799198150635, + "p95": 470.71999311447144, + "p99": 498.4000027179718 + }, + "isolatedSum": { + "p50": 463.8400077819824, + "p90": 497.0560073852539, + "p95": 513.4079903364182, + "p99": 567.0399963855743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 349.5680093765259, + "p90": 368.4160113334656, + "p95": 377.82400846481323, + "p99": 437.824010848999 + }, + "combine": { + "p50": 416.703999042511, + "p90": 430.9439957141876, + "p95": 437.18400597572327, + "p99": 455.1680088043213 + }, + "roundtrip": { + "p50": 740.2560114860535, + "p90": 760.7359886169434, + "p95": 771.3599801063538, + "p99": 818.4639811515808 + }, + "isolatedSum": { + "p50": 766.2720084190369, + "p90": 799.3600070476532, + "p95": 815.0080144405365, + "p99": 892.9920196533203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 630.8159828186035, + "p90": 655.0719738006592, + "p95": 665.440022945404, + "p99": 703.3920288085938 + }, + "combine": { + "p50": 754.7199726104736, + "p90": 771.1359858512878, + "p95": 779.6480059623718, + "p99": 856.9279909133911 + }, + "roundtrip": { + "p50": 1357.0560216903687, + "p90": 1393.8560485839844, + "p95": 1428.4160137176514, + "p99": 1616.320013999939 + }, + "isolatedSum": { + "p50": 1385.5359554290771, + "p90": 1426.207959651947, + "p95": 1445.0880289077759, + "p99": 1560.3200197219849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5888aff1", + "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "a14fc35e02b01662", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:49.842184+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271748233", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271748233", + "createdAt": "2026-06-26T23:53:49.842184+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.96799993515015, + "p90": 112.70400136709213, + "p95": 120.70400267839432, + "p99": 143.8400000333786 + }, + "combine": { + "p50": 83.29600095748901, + "p90": 93.40800344944, + "p95": 99.29600358009338, + "p99": 117.44000017642975 + }, + "roundtrip": { + "p50": 151.2639969587326, + "p90": 170.78399658203125, + "p95": 179.32799458503723, + "p99": 211.93599700927734 + }, + "isolatedSum": { + "p50": 175.26400089263916, + "p90": 206.11200481653214, + "p95": 220.0000062584877, + "p99": 261.28000020980835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.7120012640953, + "p90": 129.82399761676788, + "p95": 141.59999787807465, + "p99": 159.58400070667267 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 119.93599683046341, + "p95": 123.83999675512314, + "p99": 136.22400164604187 + }, + "roundtrip": { + "p50": 195.42400538921356, + "p90": 218.4000015258789, + "p95": 231.51999711990356, + "p99": 307.16800689697266 + }, + "isolatedSum": { + "p50": 216.0639986395836, + "p90": 249.7599944472313, + "p95": 265.4399946331978, + "p99": 295.80800235271454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 147.23199605941772, + "p90": 165.12000560760498, + "p95": 172.992005944252, + "p99": 204.6079933643341 + }, + "combine": { + "p50": 153.53600680828094, + "p90": 168.2240068912506, + "p95": 175.90400576591492, + "p99": 192.09599494934082 + }, + "roundtrip": { + "p50": 270.8800137042999, + "p90": 295.1680123806, + "p95": 303.77599596977234, + "p99": 446.8800127506256 + }, + "isolatedSum": { + "p50": 300.76800286769867, + "p90": 333.3440124988556, + "p95": 348.89601171016693, + "p99": 396.7039883136749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 214.52799439430237, + "p90": 237.63200640678406, + "p95": 244.9920028448105, + "p99": 282.5919985771179 + }, + "combine": { + "p50": 249.08800423145294, + "p90": 261.0880136489868, + "p95": 267.8079903125763, + "p99": 287.7439856529236 + }, + "roundtrip": { + "p50": 438.27199935913086, + "p90": 458.24000239372253, + "p95": 469.88800168037415, + "p99": 508.1599950790405 + }, + "isolatedSum": { + "p50": 463.6159986257553, + "p90": 498.7200200557709, + "p95": 512.7999931573868, + "p99": 570.3359842300415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 352.1279990673065, + "p90": 375.0720024108887, + "p95": 387.1999979019165, + "p99": 523.360013961792 + }, + "combine": { + "p50": 419.9039936065674, + "p90": 433.8560104370117, + "p95": 441.536009311676, + "p99": 501.6319751739502 + }, + "roundtrip": { + "p50": 744.5759773254395, + "p90": 766.4960026741028, + "p95": 777.3119807243347, + "p99": 837.7919793128967 + }, + "isolatedSum": { + "p50": 772.0319926738739, + "p90": 808.9280128479004, + "p95": 828.7360072135925, + "p99": 1024.9919891357422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 629.6319961547852, + "p90": 648.9279866218567, + "p95": 656.2560200691223, + "p99": 715.1039838790894 + }, + "combine": { + "p50": 754.368007183075, + "p90": 767.1359777450562, + "p95": 774.5919823646545, + "p99": 917.5040125846863 + }, + "roundtrip": { + "p50": 1354.0480136871338, + "p90": 1376.4159679412842, + "p95": 1387.8079652786255, + "p99": 1428.8320541381836 + }, + "isolatedSum": { + "p50": 1384.00000333786, + "p90": 1416.0639643669128, + "p95": 1430.8480024337769, + "p99": 1632.6079964637756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b183f57f", + "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "6953183723230449", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:18.715974+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271763623", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271763623", + "createdAt": "2026-06-26T23:54:18.715974+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.69599884748459, + "p90": 105.95200210809708, + "p95": 110.68800091743469, + "p99": 117.37599968910217 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 95.64799815416336, + "p95": 98.65599870681763, + "p99": 108.03200304508209 + }, + "roundtrip": { + "p50": 164.32000696659088, + "p90": 174.01599884033203, + "p95": 181.0240000486374, + "p99": 201.56799256801605 + }, + "isolatedSum": { + "p50": 188.03200125694275, + "p90": 201.60000026226044, + "p95": 209.34399962425232, + "p99": 225.40800273418427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.79999661445618, + "p90": 143.96800100803375, + "p95": 151.96800231933594, + "p99": 176.57600343227386 + }, + "combine": { + "p50": 119.71200257539749, + "p90": 133.56800377368927, + "p95": 140.09599387645721, + "p99": 156.70399367809296 + }, + "roundtrip": { + "p50": 216.48000180721283, + "p90": 235.35999655723572, + "p95": 243.00800263881683, + "p99": 263.71198892593384 + }, + "isolatedSum": { + "p50": 244.51199918985367, + "p90": 277.536004781723, + "p95": 292.06399619579315, + "p99": 333.2799971103668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 161.6320013999939, + "p90": 176.54399573802948, + "p95": 185.47199666500092, + "p99": 204.96000349521637 + }, + "combine": { + "p50": 177.47199535369873, + "p90": 187.74400651454926, + "p95": 193.88799369335175, + "p99": 218.27200055122375 + }, + "roundtrip": { + "p50": 309.2159926891327, + "p90": 327.2320032119751, + "p95": 333.1199884414673, + "p99": 373.1519877910614 + }, + "isolatedSum": { + "p50": 339.1039967536926, + "p90": 364.28800225257874, + "p95": 379.35999035835266, + "p99": 423.2320040464401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 242.97599494457245, + "p90": 263.264000415802, + "p95": 271.10400795936584, + "p99": 296.54398560523987 + }, + "combine": { + "p50": 279.6800136566162, + "p90": 291.55200719833374, + "p95": 296.7039942741394, + "p99": 321.82401418685913 + }, + "roundtrip": { + "p50": 498.30400943756104, + "p90": 516.0959959030151, + "p95": 529.4719934463501, + "p99": 696.6400146484375 + }, + "isolatedSum": { + "p50": 522.6560086011887, + "p90": 554.8160076141357, + "p95": 567.8080022335052, + "p99": 618.367999792099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 402.52798795700073, + "p90": 414.46399688720703, + "p95": 420.28799653053284, + "p99": 450.72001218795776 + }, + "combine": { + "p50": 478.7839949131012, + "p90": 488.22399973869324, + "p95": 490.4960095882416, + "p99": 499.07198548316956 + }, + "roundtrip": { + "p50": 857.6639890670776, + "p90": 869.3439960479736, + "p95": 882.3680281639099, + "p99": 1592.25594997406 + }, + "isolatedSum": { + "p50": 881.3119828701019, + "p90": 902.6879966259003, + "p95": 910.7840061187744, + "p99": 949.7919976711273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 750.4640221595764, + "p90": 770.9119915962219, + "p95": 780.6079983711243, + "p99": 812.3199939727783 + }, + "combine": { + "p50": 873.1840252876282, + "p90": 885.6319785118103, + "p95": 893.4080004692078, + "p99": 941.9839978218079 + }, + "roundtrip": { + "p50": 1586.143970489502, + "p90": 1606.112003326416, + "p95": 1623.5840320587158, + "p99": 1662.7839803695679 + }, + "isolatedSum": { + "p50": 1623.6480474472046, + "p90": 1656.5439701080322, + "p95": 1674.015998840332, + "p99": 1754.3039917945862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dfdf595d", + "identity": "h200|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_d982b749", + "comparisonKey": "089552474e5d15cf", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:13:50.694218+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287495061", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287495061", + "createdAt": "2026-06-27T11:13:50.694218+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.13600367307663, + "p90": 125.66399574279785, + "p95": 131.23199343681335, + "p99": 139.29599523544312 + }, + "combine": { + "p50": 95.51999717950821, + "p90": 110.81600189208984, + "p95": 115.39199948310852, + "p99": 158.07999670505524 + }, + "roundtrip": { + "p50": 180.83199858665466, + "p90": 198.04799556732178, + "p95": 205.59999346733093, + "p99": 217.1200066804886 + }, + "isolatedSum": { + "p50": 202.65600085258484, + "p90": 236.4799976348877, + "p95": 246.62399291992188, + "p99": 297.37599194049835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.89600610733032, + "p90": 149.05600249767303, + "p95": 155.13600409030914, + "p99": 168.64000260829926 + }, + "combine": { + "p50": 128.03199887275696, + "p90": 142.91200041770935, + "p95": 147.71200716495514, + "p99": 169.27999258041382 + }, + "roundtrip": { + "p50": 236.89599335193634, + "p90": 251.23199820518494, + "p95": 261.6640031337738, + "p99": 302.68800258636475 + }, + "isolatedSum": { + "p50": 260.9280049800873, + "p90": 291.9680029153824, + "p95": 302.8480112552643, + "p99": 337.9199951887131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 182.68799781799316, + "p90": 202.81599462032318, + "p95": 213.1199985742569, + "p99": 237.98400163650513 + }, + "combine": { + "p50": 200.57600736618042, + "p90": 216.09599888324738, + "p95": 226.623997092247, + "p99": 267.36000180244446 + }, + "roundtrip": { + "p50": 357.31199383735657, + "p90": 381.3439905643463, + "p95": 394.8479890823364, + "p99": 424.127995967865 + }, + "isolatedSum": { + "p50": 383.2640051841736, + "p90": 418.91199350357056, + "p95": 439.7439956665039, + "p99": 505.3440034389496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 275.6800055503845, + "p90": 293.0560111999512, + "p95": 299.19999837875366, + "p99": 318.04800033569336 + }, + "combine": { + "p50": 319.8719918727875, + "p90": 332.41599798202515, + "p95": 340.2239978313446, + "p99": 369.4719970226288 + }, + "roundtrip": { + "p50": 570.2400207519531, + "p90": 585.919976234436, + "p95": 596.8000292778015, + "p99": 636.7040276527405 + }, + "isolatedSum": { + "p50": 595.551997423172, + "p90": 625.4720091819763, + "p95": 639.4239962100983, + "p99": 687.5199973583221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 462.2719883918762, + "p90": 478.87998819351196, + "p95": 489.79198932647705, + "p99": 548.7679839134216 + }, + "combine": { + "p50": 548.5119819641113, + "p90": 561.5040063858032, + "p95": 568.3199763298035, + "p99": 726.7199754714966 + }, + "roundtrip": { + "p50": 983.0080270767212, + "p90": 996.6400265693665, + "p95": 1016.3520574569702, + "p99": 1202.5279998779297 + }, + "isolatedSum": { + "p50": 1010.7839703559875, + "p90": 1040.3839945793152, + "p95": 1058.1119656562805, + "p99": 1275.4879593849182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 868.2559728622437, + "p90": 894.5599794387817, + "p95": 901.9839763641357, + "p99": 927.3279905319214 + }, + "combine": { + "p50": 1004.7680139541626, + "p90": 1020.8319425582886, + "p95": 1037.503957748413, + "p99": 1106.7520380020142 + }, + "roundtrip": { + "p50": 1834.112048149109, + "p90": 1855.2639484405518, + "p95": 1866.6880130767822, + "p99": 2027.26411819458 + }, + "isolatedSum": { + "p50": 1873.0239868164062, + "p90": 1915.3919219970703, + "p95": 1939.4879341125488, + "p99": 2034.0800285339355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96267e21", + "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "27afbf0ad63e86ca", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:55:01.688428+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271778692", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271778692", + "createdAt": "2026-06-26T23:55:01.688428+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.64000022411346, + "p90": 120.70400267839432, + "p95": 131.58400356769562, + "p99": 146.2399959564209 + }, + "combine": { + "p50": 95.71199864149094, + "p90": 103.67999970912933, + "p95": 112.73600161075592, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 181.0240000486374, + "p90": 199.2959976196289, + "p95": 207.16799795627594, + "p99": 244.9280023574829 + }, + "isolatedSum": { + "p50": 204.3519988656044, + "p90": 224.38400238752365, + "p95": 244.32000517845154, + "p99": 267.7439972758293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.29600203037262, + "p90": 148.00000190734863, + "p95": 157.72800147533417, + "p99": 182.20800161361694 + }, + "combine": { + "p50": 128.31999361515045, + "p90": 139.74399864673615, + "p95": 145.7280069589615, + "p99": 158.75199437141418 + }, + "roundtrip": { + "p50": 235.6480062007904, + "p90": 248.6400008201599, + "p95": 259.16799902915955, + "p99": 301.60000920295715 + }, + "isolatedSum": { + "p50": 263.61599564552307, + "p90": 287.7440005540848, + "p95": 303.45600843429565, + "p99": 340.9599959850311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 184.1920018196106, + "p90": 199.8080015182495, + "p95": 208.48000049591064, + "p99": 231.90400004386902 + }, + "combine": { + "p50": 198.62399995326996, + "p90": 212.0320051908493, + "p95": 221.18400037288666, + "p99": 289.7599935531616 + }, + "roundtrip": { + "p50": 349.4719862937927, + "p90": 366.3040101528168, + "p95": 376.8320083618164, + "p99": 431.2959909439087 + }, + "isolatedSum": { + "p50": 382.81600177288055, + "p90": 411.8400067090988, + "p95": 429.6640008687973, + "p99": 521.6639935970306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 278.0799865722656, + "p90": 296.671986579895, + "p95": 305.759996175766, + "p99": 346.8799889087677 + }, + "combine": { + "p50": 313.1519854068756, + "p90": 324.6079981327057, + "p95": 331.9680094718933, + "p99": 350.5600094795227 + }, + "roundtrip": { + "p50": 563.1999969482422, + "p90": 577.9839754104614, + "p95": 589.5040035247803, + "p99": 688.9920234680176 + }, + "isolatedSum": { + "p50": 591.2319719791412, + "p90": 621.2799847126007, + "p95": 637.7280056476593, + "p99": 697.4399983882904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 465.8240079879761, + "p90": 484.3200147151947, + "p95": 496.2559938430786, + "p99": 558.8799715042114 + }, + "combine": { + "p50": 544.3519949913025, + "p90": 560.1599812507629, + "p95": 564.9600028991699, + "p99": 624.0959763526917 + }, + "roundtrip": { + "p50": 981.0879826545715, + "p90": 996.3520169258118, + "p95": 1007.7439546585083, + "p99": 1077.1839618682861 + }, + "isolatedSum": { + "p50": 1010.1760029792786, + "p90": 1044.4799959659576, + "p95": 1061.2159967422485, + "p99": 1182.975947856903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 858.2080006599426, + "p90": 877.9839873313904, + "p95": 884.0000033378601, + "p99": 925.6640076637268 + }, + "combine": { + "p50": 981.98401927948, + "p90": 994.4959878921509, + "p95": 1000.9280443191528, + "p99": 1111.9040250778198 + }, + "roundtrip": { + "p50": 1810.1119995117188, + "p90": 1826.0159492492676, + "p95": 1833.7279558181763, + "p99": 1947.551965713501 + }, + "isolatedSum": { + "p50": 1840.1920199394226, + "p90": 1872.4799752235413, + "p95": 1884.928047657013, + "p99": 2037.5680327415466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bc48bfe5", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", + "colorKey": "h200_d982b749", + "comparisonKey": "6da1f9e2ab025dbe", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:28.417730+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "157ca81687ddb63", + "workloadId": "set:3:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271827040", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271827040", + "createdAt": "2026-06-26T23:56:28.417730+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.80000275373459, + "p90": 135.80800592899323, + "p95": 142.14399456977844, + "p99": 172.7679967880249 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 121.56800180673599, + "p95": 125.72799623012543, + "p99": 150.65599977970123 + }, + "roundtrip": { + "p50": 195.77600061893463, + "p90": 216.22399985790253, + "p95": 222.9440063238144, + "p99": 267.67998933792114 + }, + "isolatedSum": { + "p50": 221.15200012922287, + "p90": 257.3760077357292, + "p95": 267.87199079990387, + "p99": 323.42399656772614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.36800611019135, + "p90": 223.1999933719635, + "p95": 231.58399760723114, + "p99": 263.7439966201782 + }, + "combine": { + "p50": 223.93600642681122, + "p90": 236.32000386714935, + "p95": 241.88800156116486, + "p99": 258.7839961051941 + }, + "roundtrip": { + "p50": 399.58399534225464, + "p90": 417.279988527298, + "p95": 424.4160056114197, + "p99": 459.77601408958435 + }, + "isolatedSum": { + "p50": 426.30401253700256, + "p90": 459.51999723911285, + "p95": 473.471999168396, + "p99": 522.5279927253723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 542.5919890403748, + "p90": 557.5039982795715, + "p95": 563.3280277252197, + "p99": 587.8080129623413 + }, + "combine": { + "p50": 619.1999912261963, + "p90": 634.5599889755249, + "p95": 646.3040113449097, + "p99": 683.8080286979675 + }, + "roundtrip": { + "p50": 1131.1999559402466, + "p90": 1146.720051765442, + "p95": 1155.743956565857, + "p99": 1289.952039718628 + }, + "isolatedSum": { + "p50": 1161.791980266571, + "p90": 1192.0639872550964, + "p95": 1209.6320390701294, + "p99": 1271.6160416603088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5553e87c", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_d982b749", + "comparisonKey": "6d1b97a966875452", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:28.382976+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286432534", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286432534", + "createdAt": "2026-06-27T10:26:28.382976+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.89600348472595, + "p90": 136.22400164604187, + "p95": 142.2719955444336, + "p99": 153.1199961900711 + }, + "combine": { + "p50": 106.11200332641602, + "p90": 122.17599898576736, + "p95": 125.85599720478058, + "p99": 131.77600502967834 + }, + "roundtrip": { + "p50": 195.77600061893463, + "p90": 213.95200490951538, + "p95": 220.15999257564545, + "p99": 227.77600586414337 + }, + "isolatedSum": { + "p50": 223.00800681114197, + "p90": 258.40000063180923, + "p95": 268.1279927492142, + "p99": 284.89600121974945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.88000082969666, + "p90": 168.06399822235107, + "p95": 172.54400253295898, + "p99": 178.17600071430206 + }, + "combine": { + "p50": 145.53600549697876, + "p90": 157.0879966020584, + "p95": 163.90399634838104, + "p99": 171.7119961977005 + }, + "roundtrip": { + "p50": 264.92801308631897, + "p90": 279.4879972934723, + "p95": 288.4800136089325, + "p99": 297.60000109672546 + }, + "isolatedSum": { + "p50": 292.4160063266754, + "p90": 325.1519948244095, + "p95": 336.44799888134, + "p99": 349.88799691200256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.04800367355347, + "p90": 215.5199944972992, + "p95": 223.68000447750092, + "p99": 234.52800512313843 + }, + "combine": { + "p50": 224.35200214385986, + "p90": 235.23199558258057, + "p95": 237.15199530124664, + "p99": 253.91998887062073 + }, + "roundtrip": { + "p50": 403.23200821876526, + "p90": 414.88000750541687, + "p95": 423.2639968395233, + "p99": 433.79199504852295 + }, + "isolatedSum": { + "p50": 426.40000581741333, + "p90": 450.75199007987976, + "p95": 460.83199977874756, + "p99": 488.44799399375916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.62401151657104, + "p90": 331.13598823547363, + "p95": 335.80800890922546, + "p99": 343.80799531936646 + }, + "combine": { + "p50": 356.03201389312744, + "p90": 364.1600012779236, + "p95": 366.5600121021271, + "p99": 376.22401118278503 + }, + "roundtrip": { + "p50": 647.8400230407715, + "p90": 659.1359972953796, + "p95": 664.9600267410278, + "p99": 687.2320175170898 + }, + "isolatedSum": { + "p50": 670.6560254096985, + "p90": 695.2959895133972, + "p95": 702.3680210113525, + "p99": 720.0320065021515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 531.0080051422119, + "p90": 542.527973651886, + "p95": 549.4719743728638, + "p99": 571.0399746894836 + }, + "combine": { + "p50": 619.871973991394, + "p90": 628.9920210838318, + "p95": 632.9280138015747, + "p99": 642.4639821052551 + }, + "roundtrip": { + "p50": 1122.8159666061401, + "p90": 1134.7839832305908, + "p95": 1140.8319473266602, + "p99": 1158.4320068359375 + }, + "isolatedSum": { + "p50": 1150.879979133606, + "p90": 1171.5199947357178, + "p95": 1182.3999881744385, + "p99": 1213.5039567947388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1004.5440196990967, + "p90": 1023.7760543823242, + "p95": 1030.2400588989258, + "p99": 1042.464017868042 + }, + "combine": { + "p50": 1125.6959438323975, + "p90": 1136.1279487609863, + "p95": 1140.544056892395, + "p99": 1155.4239988327026 + }, + "roundtrip": { + "p50": 2086.143970489502, + "p90": 2106.048107147217, + "p95": 2112.096071243286, + "p99": 2332.0000171661377 + }, + "isolatedSum": { + "p50": 2130.239963531494, + "p90": 2159.9040031433105, + "p95": 2170.784115791321, + "p99": 2197.8880167007446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71f62108", + "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "c80c3e7446de9680", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:05.486154+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271618490", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271618490", + "createdAt": "2026-06-26T23:50:05.486154+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.68800222873688, + "p90": 127.3919939994812, + "p95": 133.31200182437897, + "p99": 144.57599818706512 + }, + "combine": { + "p50": 105.8880016207695, + "p90": 112.76800185441971, + "p95": 117.79200285673141, + "p99": 129.72800433635712 + }, + "roundtrip": { + "p50": 199.35999810695648, + "p90": 209.4399929046631, + "p95": 215.7440036535263, + "p99": 257.82400369644165 + }, + "isolatedSum": { + "p50": 224.57600384950638, + "p90": 240.1599958539009, + "p95": 251.10400468111038, + "p99": 274.30400252342224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.4080045223236, + "p90": 154.91199493408203, + "p95": 159.2639982700348, + "p99": 170.6559956073761 + }, + "combine": { + "p50": 144.3520039319992, + "p90": 150.59199929237366, + "p95": 153.05599570274353, + "p99": 167.4879938364029 + }, + "roundtrip": { + "p50": 263.5200023651123, + "p90": 270.3680098056793, + "p95": 274.7200131416321, + "p99": 291.1039888858795 + }, + "isolatedSum": { + "p50": 289.7600084543228, + "p90": 305.5039942264557, + "p95": 312.3199939727783, + "p99": 338.143989443779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.1839977502823, + "p90": 219.04000639915466, + "p95": 227.743998169899, + "p99": 242.5920069217682 + }, + "combine": { + "p50": 221.50400280952454, + "p90": 232.96000063419342, + "p95": 239.58399891853333, + "p99": 263.0400061607361 + }, + "roundtrip": { + "p50": 397.8239893913269, + "p90": 412.03200817108154, + "p95": 421.08801007270813, + "p99": 463.8400077819824 + }, + "isolatedSum": { + "p50": 426.6880005598068, + "p90": 452.0000070333481, + "p95": 467.3279970884323, + "p99": 505.6320130825043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 316.0000145435333, + "p90": 324.0959942340851, + "p95": 328.99200916290283, + "p99": 351.6159951686859 + }, + "combine": { + "p50": 350.17600655555725, + "p90": 358.5599958896637, + "p95": 363.2960021495819, + "p99": 392.8639888763428 + }, + "roundtrip": { + "p50": 639.4559741020203, + "p90": 655.1039814949036, + "p95": 665.3760075569153, + "p99": 768.8000202178955 + }, + "isolatedSum": { + "p50": 666.1760210990906, + "p90": 682.6559901237488, + "p95": 692.2880113124847, + "p99": 744.4799840450287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 527.4559855461121, + "p90": 543.936014175415, + "p95": 551.3280034065247, + "p99": 568.5439705848694 + }, + "combine": { + "p50": 612.384021282196, + "p90": 627.3279786109924, + "p95": 639.519989490509, + "p99": 984.5119714736938 + }, + "roundtrip": { + "p50": 1111.6160154342651, + "p90": 1130.6240558624268, + "p95": 1139.7759914398193, + "p99": 1297.5679636001587 + }, + "isolatedSum": { + "p50": 1139.840006828308, + "p90": 1171.2639927864075, + "p95": 1190.8479928970337, + "p99": 1553.0559420585632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.4959878921509, + "p90": 1017.6960229873657, + "p95": 1024.7360467910767, + "p99": 1044.8640584945679 + }, + "combine": { + "p50": 1103.9680242538452, + "p90": 1115.7439947128296, + "p95": 1122.3039627075195, + "p99": 1306.1439990997314 + }, + "roundtrip": { + "p50": 2064.448118209839, + "p90": 2089.344024658203, + "p95": 2106.0800552368164, + "p99": 2285.504102706909 + }, + "isolatedSum": { + "p50": 2098.464012145996, + "p90": 2133.4400177001953, + "p95": 2147.040009498596, + "p99": 2351.0080575942993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e712f4f", + "identity": "h200|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_d982b749", + "comparisonKey": "cbe784eff055b137", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:16.208325+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287501303", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287501303", + "createdAt": "2026-06-27T11:14:16.208325+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.079998254776, + "p90": 145.60000598430634, + "p95": 166.4000004529953, + "p99": 212.38400042057037 + }, + "combine": { + "p50": 105.98400235176086, + "p90": 121.40800058841705, + "p95": 127.23200023174286, + "p99": 153.28000485897064 + }, + "roundtrip": { + "p50": 196.44799828529358, + "p90": 221.15199267864227, + "p95": 232.80000686645508, + "p99": 247.1040040254593 + }, + "isolatedSum": { + "p50": 228.06400060653687, + "p90": 267.0080065727234, + "p95": 293.63200068473816, + "p99": 365.664005279541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.07200229167938, + "p90": 169.11999881267548, + "p95": 174.72000420093536, + "p99": 191.5840059518814 + }, + "combine": { + "p50": 142.5279974937439, + "p90": 154.59200739860535, + "p95": 160.19199788570404, + "p99": 169.95200514793396 + }, + "roundtrip": { + "p50": 261.75999641418457, + "p90": 279.4879972934723, + "p95": 287.07200288772583, + "p99": 312.99200654029846 + }, + "isolatedSum": { + "p50": 289.5999997854233, + "p90": 323.7120062112808, + "p95": 334.9120020866394, + "p99": 361.53601109981537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 208.92800390720367, + "p90": 239.71199989318848, + "p95": 256.6080093383789, + "p99": 289.8240089416504 + }, + "combine": { + "p50": 226.84800624847412, + "p90": 248.9600032567978, + "p95": 259.8400115966797, + "p99": 303.6159873008728 + }, + "roundtrip": { + "p50": 399.9040126800537, + "p90": 421.7599928379059, + "p95": 431.2640130519867, + "p99": 470.91200947761536 + }, + "isolatedSum": { + "p50": 435.7760101556778, + "p90": 488.67200314998627, + "p95": 516.4480209350586, + "p99": 593.4399962425232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.54398822784424, + "p90": 332.41599798202515, + "p95": 337.8239870071411, + "p99": 356.3520014286041 + }, + "combine": { + "p50": 352.03200578689575, + "p90": 364.03200030326843, + "p95": 370.88000774383545, + "p99": 386.49600744247437 + }, + "roundtrip": { + "p50": 642.1759724617004, + "p90": 663.8720035552979, + "p95": 672.4159717559814, + "p99": 710.2400064468384 + }, + "isolatedSum": { + "p50": 664.57599401474, + "p90": 696.4479982852936, + "p95": 708.7039947509766, + "p99": 742.8480088710785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 524.2239832878113, + "p90": 540.3519868850708, + "p95": 545.7919836044312, + "p99": 580.672025680542 + }, + "combine": { + "p50": 604.1600108146667, + "p90": 617.6319718360901, + "p95": 624.8639822006226, + "p99": 663.2959842681885 + }, + "roundtrip": { + "p50": 1101.6960144042969, + "p90": 1123.4560012817383, + "p95": 1130.944013595581, + "p99": 1197.759985923767 + }, + "isolatedSum": { + "p50": 1128.383994102478, + "p90": 1157.983958721161, + "p95": 1170.6559658050537, + "p99": 1243.9680099487305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 980.1279902458191, + "p90": 1001.3760328292847, + "p95": 1012.0639801025391, + "p99": 1055.3920269012451 + }, + "combine": { + "p50": 1095.0080156326294, + "p90": 1110.5279922485352, + "p95": 1121.8559741973877, + "p99": 1219.1040515899658 + }, + "roundtrip": { + "p50": 2037.1840000152588, + "p90": 2063.6160373687744, + "p95": 2101.50408744812, + "p99": 2307.6798915863037 + }, + "isolatedSum": { + "p50": 2075.1360058784485, + "p90": 2111.90402507782, + "p95": 2133.9199542999268, + "p99": 2274.496078491211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a8fb4d9b", + "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_3a47b6c9", + "comparisonKey": "f6581a3621ac6cd2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:25.459367+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271732597", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271732597", + "createdAt": "2026-06-26T23:53:25.459367+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.90400338172913, + "p90": 134.14399325847626, + "p95": 140.35199582576752, + "p99": 160.38399934768677 + }, + "combine": { + "p50": 104.09600287675858, + "p90": 119.71200257539749, + "p95": 124.64000284671783, + "p99": 145.31199634075165 + }, + "roundtrip": { + "p50": 195.64799964427948, + "p90": 212.8639966249466, + "p95": 219.9999988079071, + "p99": 230.3680032491684 + }, + "isolatedSum": { + "p50": 220.0000062584877, + "p90": 253.85599583387375, + "p95": 264.99199867248535, + "p99": 305.6959956884384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.53600549697876, + "p90": 162.30399906635284, + "p95": 170.3999936580658, + "p99": 184.64000523090363 + }, + "combine": { + "p50": 143.77599954605103, + "p90": 157.21599757671356, + "p95": 162.27200627326965, + "p99": 175.64800381660461 + }, + "roundtrip": { + "p50": 265.1199996471405, + "p90": 283.90398621559143, + "p95": 289.0239953994751, + "p99": 302.0159900188446 + }, + "isolatedSum": { + "p50": 289.3120050430298, + "p90": 319.5199966430664, + "p95": 332.67199993133545, + "p99": 360.28800904750824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.4399996995926, + "p90": 224.0000069141388, + "p95": 232.06399381160736, + "p99": 262.719988822937 + }, + "combine": { + "p50": 225.0880002975464, + "p90": 243.96799504756927, + "p95": 250.0160038471222, + "p99": 335.55200695991516 + }, + "roundtrip": { + "p50": 403.55199575424194, + "p90": 432.8959882259369, + "p95": 447.1360146999359, + "p99": 589.6000266075134 + }, + "isolatedSum": { + "p50": 430.527999997139, + "p90": 467.96800196170807, + "p95": 482.07999765872955, + "p99": 598.2719957828522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.79999017715454, + "p90": 334.7199857234955, + "p95": 340.1919901371002, + "p99": 368.73599886894226 + }, + "combine": { + "p50": 356.1280071735382, + "p90": 367.45598912239075, + "p95": 372.6719915866852, + "p99": 395.77600359916687 + }, + "roundtrip": { + "p50": 643.1999802589417, + "p90": 657.3759913444519, + "p95": 663.7439727783203, + "p99": 708.1599831581116 + }, + "isolatedSum": { + "p50": 668.9279973506927, + "p90": 702.1759748458862, + "p95": 712.8639817237854, + "p99": 764.5120024681091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.2639908790588, + "p90": 541.9520139694214, + "p95": 552.2559881210327, + "p99": 611.3280057907104 + }, + "combine": { + "p50": 611.0079884529114, + "p90": 623.0080127716064, + "p95": 630.3359866142273, + "p99": 657.2480201721191 + }, + "roundtrip": { + "p50": 1108.7679862976074, + "p90": 1123.9999532699585, + "p95": 1132.3200464248657, + "p99": 1233.63196849823 + }, + "isolatedSum": { + "p50": 1134.2719793319702, + "p90": 1164.9600267410278, + "p95": 1182.59197473526, + "p99": 1268.5760259628296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.5119714736938, + "p90": 1019.4560289382935, + "p95": 1036.128044128418, + "p99": 1103.0399799346924 + }, + "combine": { + "p50": 1114.6559715270996, + "p90": 1129.472017288208, + "p95": 1136.896014213562, + "p99": 1180.3200244903564 + }, + "roundtrip": { + "p50": 2057.408094406128, + "p90": 2091.423988342285, + "p95": 2103.264093399048, + "p99": 2406.8479537963867 + }, + "isolatedSum": { + "p50": 2099.1679430007935, + "p90": 2148.9280462265015, + "p95": 2173.02405834198, + "p99": 2283.360004425049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ad612267", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_b5c683eb", + "comparisonKey": "b18bebc70bf6167d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:03.036669+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272035224", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272035224", + "createdAt": "2026-06-27T00:03:03.036669+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.60799646377563, + "p90": 151.8400013446808, + "p95": 157.3760062456131, + "p99": 181.47200345993042 + }, + "combine": { + "p50": 125.40799379348755, + "p90": 146.59200608730316, + "p95": 152.73599326610565, + "p99": 228.5439968109131 + }, + "roundtrip": { + "p50": 230.20799458026886, + "p90": 244.51200664043427, + "p95": 253.4080147743225, + "p99": 302.2719919681549 + }, + "isolatedSum": { + "p50": 258.0159902572632, + "p90": 298.43200743198395, + "p95": 310.11199951171875, + "p99": 410.0160002708435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 175.99999904632568, + "p90": 196.83200120925903, + "p95": 202.2400051355362, + "p99": 229.5680046081543 + }, + "combine": { + "p50": 175.58400332927704, + "p90": 189.82400000095367, + "p95": 193.79200041294098, + "p99": 265.5999958515167 + }, + "roundtrip": { + "p50": 323.0719864368439, + "p90": 339.29601311683655, + "p95": 345.3119993209839, + "p99": 369.4399893283844 + }, + "isolatedSum": { + "p50": 351.5840023756027, + "p90": 386.6560012102127, + "p95": 396.0320055484772, + "p99": 495.168000459671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 275.35998821258545, + "p90": 301.472008228302, + "p95": 311.19999289512634, + "p99": 359.0080142021179 + }, + "combine": { + "p50": 268.5120105743408, + "p90": 284.38401222229004, + "p95": 289.3120050430298, + "p99": 321.6319978237152 + }, + "roundtrip": { + "p50": 519.9040174484253, + "p90": 549.2479801177979, + "p95": 559.6160292625427, + "p99": 602.4960279464722 + }, + "isolatedSum": { + "p50": 543.8719987869263, + "p90": 585.856020450592, + "p95": 600.5119979381561, + "p99": 680.6400120258331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 458.75200629234314, + "p90": 610.4320287704468, + "p95": 643.1999802589417, + "p99": 663.7120246887207 + }, + "combine": { + "p50": 451.3919949531555, + "p90": 462.911993265152, + "p95": 471.23199701309204, + "p99": 480.8639883995056 + }, + "roundtrip": { + "p50": 882.0160031318665, + "p90": 899.4879722595215, + "p95": 906.6879749298096, + "p99": 926.688015460968 + }, + "isolatedSum": { + "p50": 910.1440012454987, + "p90": 1073.3440220355988, + "p95": 1114.4319772720337, + "p99": 1144.5760130882263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 819.5520043373108, + "p90": 837.664008140564, + "p95": 856.3200235366821, + "p99": 920.5440282821655 + }, + "combine": { + "p50": 816.6080117225647, + "p90": 834.879994392395, + "p95": 846.9439744949341, + "p99": 919.264018535614 + }, + "roundtrip": { + "p50": 1605.247974395752, + "p90": 1634.1760158538818, + "p95": 1654.9760103225708, + "p99": 1745.8560466766357 + }, + "isolatedSum": { + "p50": 1636.1600160598755, + "p90": 1672.544002532959, + "p95": 1703.2639980316162, + "p99": 1839.8080468177795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1564.7679567337036, + "p90": 1586.0799551010132, + "p95": 1601.8879413604736, + "p99": 1723.0720520019531 + }, + "combine": { + "p50": 1521.9199657440186, + "p90": 1538.7840270996094, + "p95": 1547.104001045227, + "p99": 1626.911997795105 + }, + "roundtrip": { + "p50": 3057.663917541504, + "p90": 3078.3679485321045, + "p95": 3098.1760025024414, + "p99": 3246.783971786499 + }, + "isolatedSum": { + "p50": 3086.687922477722, + "p90": 3124.8639822006226, + "p95": 3148.9919424057007, + "p99": 3349.984049797058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-30f874f3", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", + "colorKey": "h200_b5c683eb", + "comparisonKey": "b18bebc70bf6167d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:38.753854+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "9e6ac678a09f7f8", + "workloadId": "set:3:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271834221", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271834221", + "createdAt": "2026-06-26T23:56:38.753854+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.99999928474426, + "p90": 147.74399995803833, + "p95": 155.68000078201294, + "p99": 168.7680035829544 + }, + "combine": { + "p50": 126.01600587368011, + "p90": 139.74399864673615, + "p95": 146.08000218868256, + "p99": 156.73600137233734 + }, + "roundtrip": { + "p50": 229.8559993505478, + "p90": 251.583993434906, + "p95": 260.0319981575012, + "p99": 275.07200837135315 + }, + "isolatedSum": { + "p50": 258.0160051584244, + "p90": 287.4879986047745, + "p95": 301.7600029706955, + "p99": 325.50400495529175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 270.7520127296448, + "p90": 292.89600253105164, + "p95": 304.9600124359131, + "p99": 352.6400029659271 + }, + "combine": { + "p50": 268.5759961605072, + "p90": 281.76000714302063, + "p95": 287.200003862381, + "p99": 301.31199955940247 + }, + "roundtrip": { + "p50": 514.4960284233093, + "p90": 532.7360033988953, + "p95": 542.1119928359985, + "p99": 571.615993976593 + }, + "isolatedSum": { + "p50": 539.328008890152, + "p90": 574.6560096740723, + "p95": 592.1600162982941, + "p99": 653.9520025253296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 822.0800161361694, + "p90": 845.1840281486511, + "p95": 862.5919818878174, + "p99": 1313.3759498596191 + }, + "combine": { + "p50": 820.032000541687, + "p90": 837.7919793128967, + "p95": 846.3680148124695, + "p99": 873.3440041542053 + }, + "roundtrip": { + "p50": 1605.9520244598389, + "p90": 1629.3439865112305, + "p95": 1645.1200246810913, + "p99": 1737.1840476989746 + }, + "isolatedSum": { + "p50": 1642.1120166778564, + "p90": 1682.9760074615479, + "p95": 1708.9599967002869, + "p99": 2186.7199540138245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2c76343", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_d0dfa19a", + "comparisonKey": "4ade4ca52869383d", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:42.077253+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": "set:3:388ff74baef05c72", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271841288", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271841288", + "createdAt": "2026-06-26T23:56:42.077253+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 85.08799970149994, + "p90": 109.40799862146378, + "p95": 117.47200042009354, + "p99": 164.38399255275726 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 86.496002972126, + "p95": 91.26400202512741, + "p99": 106.20799660682678 + }, + "roundtrip": { + "p50": 132.51200318336487, + "p90": 162.7199947834015, + "p95": 173.8560050725937, + "p99": 221.5680032968521 + }, + "isolatedSum": { + "p50": 156.54399991035461, + "p90": 195.90400159358978, + "p95": 208.73600244522095, + "p99": 270.59198915958405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 111.00800335407257, + "p90": 130.43199479579926, + "p95": 136.80000603199005, + "p99": 165.75999557971954 + }, + "combine": { + "p50": 118.1119978427887, + "p90": 134.62400436401367, + "p95": 139.67999815940857, + "p99": 149.6639996767044 + }, + "roundtrip": { + "p50": 202.30400562286377, + "p90": 223.83999824523926, + "p95": 241.85599386692047, + "p99": 296.25600576400757 + }, + "isolatedSum": { + "p50": 229.12000119686127, + "p90": 265.0559991598129, + "p95": 276.4800041913986, + "p99": 315.42399525642395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 189.63199853897095, + "p90": 209.72800254821777, + "p95": 218.4319943189621, + "p99": 254.14401292800903 + }, + "combine": { + "p50": 284.960001707077, + "p90": 298.7520098686218, + "p95": 303.2959997653961, + "p99": 331.9999873638153 + }, + "roundtrip": { + "p50": 447.3919868469238, + "p90": 475.42399168014526, + "p95": 484.8639965057373, + "p99": 529.9519896507263 + }, + "isolatedSum": { + "p50": 474.592000246048, + "p90": 508.4800124168396, + "p95": 521.7279940843582, + "p99": 586.1440002918243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-79209ba6", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_06544e53", + "comparisonKey": "822fd37c7222ef9b", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:05.638717+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272038593", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272038593", + "createdAt": "2026-06-27T00:03:05.638717+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.83999961614609, + "p90": 112.73600161075592, + "p95": 121.15199863910675, + "p99": 153.4080058336258 + }, + "combine": { + "p50": 83.99999886751175, + "p90": 97.6639986038208, + "p95": 104.22399640083313, + "p99": 116.89600348472595 + }, + "roundtrip": { + "p50": 154.1759967803955, + "p90": 176.32000148296356, + "p95": 183.45600366592407, + "p99": 211.29600703716278 + }, + "isolatedSum": { + "p50": 179.83999848365784, + "p90": 210.40000021457672, + "p95": 225.37599503993988, + "p99": 270.30400931835175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 115.55200070142746, + "p90": 135.68000495433807, + "p95": 143.39199662208557, + "p99": 163.26400637626648 + }, + "combine": { + "p50": 103.35999727249146, + "p90": 120.03199756145477, + "p95": 127.32799351215363, + "p99": 154.4319987297058 + }, + "roundtrip": { + "p50": 197.2160041332245, + "p90": 215.58399498462677, + "p95": 223.26399385929108, + "p99": 242.5599992275238 + }, + "isolatedSum": { + "p50": 218.91199797391891, + "p90": 255.71200251579285, + "p95": 270.7199901342392, + "p99": 317.6960051059723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 144.6080058813095, + "p90": 169.0559983253479, + "p95": 176.7680048942566, + "p99": 208.064004778862 + }, + "combine": { + "p50": 140.47999680042267, + "p90": 155.74400126934052, + "p95": 161.6639941930771, + "p99": 184.1920018196106 + }, + "roundtrip": { + "p50": 262.9440128803253, + "p90": 282.24000334739685, + "p95": 290.6560003757477, + "p99": 320.0640082359314 + }, + "isolatedSum": { + "p50": 285.0880026817322, + "p90": 324.7999995946884, + "p95": 338.4319990873337, + "p99": 392.2560065984726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 205.4080069065094, + "p90": 229.21599447727203, + "p95": 238.49600553512573, + "p99": 255.67999482154846 + }, + "combine": { + "p50": 214.7199958562851, + "p90": 231.90400004386902, + "p95": 236.86400055885315, + "p99": 255.64798712730408 + }, + "roundtrip": { + "p50": 399.4239866733551, + "p90": 420.48001289367676, + "p95": 429.6000003814697, + "p99": 593.7280058860779 + }, + "isolatedSum": { + "p50": 420.1280027627945, + "p90": 461.11999452114105, + "p95": 475.3600060939789, + "p99": 511.32798194885254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 331.743985414505, + "p90": 350.23999214172363, + "p95": 361.08800768852234, + "p99": 406.0479998588562 + }, + "combine": { + "p50": 360.54399609565735, + "p90": 375.90399384498596, + "p95": 382.78400897979736, + "p99": 404.4159948825836 + }, + "roundtrip": { + "p50": 664.0639901161194, + "p90": 679.9039840698242, + "p95": 693.5679912567139, + "p99": 743.1359887123108 + }, + "isolatedSum": { + "p50": 692.2879815101624, + "p90": 726.1439859867096, + "p95": 743.8720166683197, + "p99": 810.4639947414398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 580.5439949035645, + "p90": 593.3759808540344, + "p95": 602.3679971694946, + "p99": 637.6640200614929 + }, + "combine": { + "p50": 628.3519864082336, + "p90": 641.1839723587036, + "p95": 648.3839750289917, + "p99": 680.9279918670654 + }, + "roundtrip": { + "p50": 1181.7920207977295, + "p90": 1199.295997619629, + "p95": 1210.3359699249268, + "p99": 1255.4240226745605 + }, + "isolatedSum": { + "p50": 1208.895981311798, + "p90": 1234.559953212738, + "p95": 1250.7519721984863, + "p99": 1318.5920119285583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c14326f0", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", + "colorKey": "h200_189562cd", + "comparisonKey": "b9475bb176588857", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:32.803411+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "38fd0bcf7109c32", + "workloadId": "set:3:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271862413", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271862413", + "createdAt": "2026-06-26T23:57:32.803411+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.11999905109406, + "p90": 140.00000059604645, + "p95": 149.88799393177032, + "p99": 171.23199999332428 + }, + "combine": { + "p50": 118.65600198507309, + "p90": 132.64000415802002, + "p95": 137.60000467300415, + "p99": 164.95999693870544 + }, + "roundtrip": { + "p50": 221.5680032968521, + "p90": 238.14399540424347, + "p95": 251.71199440956116, + "p99": 291.6480004787445 + }, + "isolatedSum": { + "p50": 243.77600103616714, + "p90": 272.64000475406647, + "p95": 287.4879986047745, + "p99": 336.1919969320297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 245.02399563789368, + "p90": 257.31199979782104, + "p95": 265.8880054950714, + "p99": 298.72000217437744 + }, + "combine": { + "p50": 263.68001103401184, + "p90": 275.32801032066345, + "p95": 281.9199860095978, + "p99": 299.1679906845093 + }, + "roundtrip": { + "p50": 481.9839894771576, + "p90": 495.6800043582916, + "p95": 506.1759948730469, + "p99": 808.3199858665466 + }, + "isolatedSum": { + "p50": 508.7040066719055, + "p90": 532.6400101184845, + "p95": 547.8079915046692, + "p99": 597.8879928588867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 725.1200079917908, + "p90": 748.0959892272949, + "p95": 763.0079984664917, + "p99": 812.0959997177124 + }, + "combine": { + "p50": 799.3280291557312, + "p90": 813.9839768409729, + "p95": 823.5200047492981, + "p99": 875.6160140037537 + }, + "roundtrip": { + "p50": 1494.3679571151733, + "p90": 1516.1919593811035, + "p95": 1528.2560586929321, + "p99": 1709.8560333251953 + }, + "isolatedSum": { + "p50": 1524.448037147522, + "p90": 1562.0799660682678, + "p95": 1586.5280032157898, + "p99": 1687.712013721466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17171887", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_189562cd", + "comparisonKey": "b9475bb176588857", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:28.346517+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": "set:6:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272106904", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272106904", + "createdAt": "2026-06-27T00:05:28.346517+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.41600114107132, + "p90": 159.9999964237213, + "p95": 167.39200055599213, + "p99": 211.42399311065674 + }, + "combine": { + "p50": 118.01599711179733, + "p90": 146.5280055999756, + "p95": 150.27199685573578, + "p99": 162.9759967327118 + }, + "roundtrip": { + "p50": 220.2560007572174, + "p90": 253.91998887062073, + "p95": 258.432000875473, + "p99": 271.42399549484253 + }, + "isolatedSum": { + "p50": 242.43199825286865, + "p90": 306.5280020236969, + "p95": 317.6639974117279, + "p99": 374.39998984336853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.432000041008, + "p90": 170.6240028142929, + "p95": 175.04000663757324, + "p99": 188.38399648666382 + }, + "combine": { + "p50": 165.0879979133606, + "p90": 175.7120043039322, + "p95": 179.83999848365784, + "p99": 191.77600741386414 + }, + "roundtrip": { + "p50": 301.66399478912354, + "p90": 317.3759877681732, + "p95": 322.6880133152008, + "p99": 333.69600772857666 + }, + "isolatedSum": { + "p50": 327.5199979543686, + "p90": 346.3360071182251, + "p95": 354.8800051212311, + "p99": 380.16000390052795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.97600173950195, + "p90": 247.0400035381317, + "p95": 250.0160038471222, + "p99": 259.39199328422546 + }, + "combine": { + "p50": 261.9200050830841, + "p90": 275.2000093460083, + "p95": 279.58399057388306, + "p99": 300.4480004310608 + }, + "roundtrip": { + "p50": 482.33601450920105, + "p90": 499.1999864578247, + "p95": 507.3919892311096, + "p99": 570.527970790863 + }, + "isolatedSum": { + "p50": 500.89600682258606, + "p90": 522.24001288414, + "p95": 529.5999944210052, + "p99": 559.8399937152863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 392.8639888763428, + "p90": 402.72000432014465, + "p95": 406.23998641967773, + "p99": 445.3760087490082 + }, + "combine": { + "p50": 443.1680142879486, + "p90": 455.80801367759705, + "p95": 461.5040123462677, + "p99": 481.53600096702576 + }, + "roundtrip": { + "p50": 817.5680041313171, + "p90": 835.2320194244385, + "p95": 845.3760147094727, + "p99": 893.887996673584 + }, + "isolatedSum": { + "p50": 836.0320031642914, + "p90": 858.5280179977417, + "p95": 867.7439987659454, + "p99": 926.9120097160339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 718.783974647522, + "p90": 730.3680181503296, + "p95": 737.280011177063, + "p99": 808.1920146942139 + }, + "combine": { + "p50": 797.4399924278259, + "p90": 810.8800053596497, + "p95": 820.032000541687, + "p99": 849.3760228157043 + }, + "roundtrip": { + "p50": 1490.3680086135864, + "p90": 1507.5839757919312, + "p95": 1519.2960500717163, + "p99": 1630.944013595581 + }, + "isolatedSum": { + "p50": 1516.223967075348, + "p90": 1541.2480235099792, + "p95": 1557.31201171875, + "p99": 1657.5680375099182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1386.0160112380981, + "p90": 1401.0560512542725, + "p95": 1406.1440229415894, + "p99": 1621.7279434204102 + }, + "combine": { + "p50": 1483.199954032898, + "p90": 1497.5999593734741, + "p95": 1502.17604637146, + "p99": 1538.0480289459229 + }, + "roundtrip": { + "p50": 2845.855951309204, + "p90": 2863.840103149414, + "p95": 2879.647970199585, + "p99": 3068.063974380493 + }, + "isolatedSum": { + "p50": 2869.215965270996, + "p90": 2898.6560106277466, + "p95": 2908.3200693130493, + "p99": 3159.775972366333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f354b9c6", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_80a72891", + "comparisonKey": "52b3ac7f405659bf", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:25.966329+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_7", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": "set:6:b952d4a43d688b50", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272110404", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272110404", + "createdAt": "2026-06-27T00:05:25.966329+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.24799871444702, + "p90": 134.17600095272064, + "p95": 140.25600254535675, + "p99": 158.84800255298615 + }, + "combine": { + "p50": 107.68000036478043, + "p90": 119.39200013875961, + "p95": 123.99999797344208, + "p99": 129.82399761676788 + }, + "roundtrip": { + "p50": 196.60800695419312, + "p90": 215.16799926757812, + "p95": 223.07200729846954, + "p99": 271.232008934021 + }, + "isolatedSum": { + "p50": 224.92799907922745, + "p90": 253.56800109148026, + "p95": 264.2560005187988, + "p99": 288.672000169754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.9520012140274, + "p90": 165.69599509239197, + "p95": 176.35199427604675, + "p99": 214.49600160121918 + }, + "combine": { + "p50": 143.61600577831268, + "p90": 153.28000485897064, + "p95": 157.3439985513687, + "p99": 169.91999745368958 + }, + "roundtrip": { + "p50": 263.7439966201782, + "p90": 279.1680097579956, + "p95": 287.07200288772583, + "p99": 316.0960078239441 + }, + "isolatedSum": { + "p50": 289.5680069923401, + "p90": 318.9759999513626, + "p95": 333.69599282741547, + "p99": 384.41599905490875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.96000349521637, + "p90": 222.81600534915924, + "p95": 232.1919947862625, + "p99": 259.552001953125 + }, + "combine": { + "p50": 222.4320024251938, + "p90": 239.51999843120575, + "p95": 245.2480047941208, + "p99": 269.3760097026825 + }, + "roundtrip": { + "p50": 400.83199739456177, + "p90": 421.7279851436615, + "p95": 431.3279986381531, + "p99": 482.14399814605713 + }, + "isolatedSum": { + "p50": 427.39200592041016, + "p90": 462.336003780365, + "p95": 477.4399995803833, + "p99": 528.9280116558075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.76001238822937, + "p90": 342.24000573158264, + "p95": 361.11998558044434, + "p99": 480.3520143032074 + }, + "combine": { + "p50": 359.20000076293945, + "p90": 373.79199266433716, + "p95": 381.9519877433777, + "p99": 407.77599811553955 + }, + "roundtrip": { + "p50": 644.2880034446716, + "p90": 664.1600131988525, + "p95": 676.4799952507019, + "p99": 748.8639950752258 + }, + "isolatedSum": { + "p50": 672.9600131511688, + "p90": 716.0319983959198, + "p95": 743.071973323822, + "p99": 888.128012418747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 538.3679866790771, + "p90": 557.1839809417725, + "p95": 566.0160183906555, + "p99": 608.0639958381653 + }, + "combine": { + "p50": 618.9759969711304, + "p90": 630.3359866142273, + "p95": 636.2559795379639, + "p99": 653.5680294036865 + }, + "roundtrip": { + "p50": 1131.2960386276245, + "p90": 1151.263952255249, + "p95": 1159.0080261230469, + "p99": 1297.9520559310913 + }, + "isolatedSum": { + "p50": 1157.3439836502075, + "p90": 1187.5199675559998, + "p95": 1202.2719979286194, + "p99": 1261.6320252418518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.2240214347839, + "p90": 1003.5840272903442, + "p95": 1015.2319669723511, + "p99": 1056.480050086975 + }, + "combine": { + "p50": 1093.9840078353882, + "p90": 1107.9679727554321, + "p95": 1119.9040412902832, + "p99": 1297.055959701538 + }, + "roundtrip": { + "p50": 2046.5600490570068, + "p90": 2070.3680515289307, + "p95": 2092.5118923187256, + "p99": 2573.024034500122 + }, + "isolatedSum": { + "p50": 2078.208029270172, + "p90": 2111.5520000457764, + "p95": 2135.1360082626343, + "p99": 2353.536009788513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db979d37", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_580d7b05", + "comparisonKey": "b1de1efab41abbdf", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:02:37.856020+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272024348", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272024348", + "createdAt": "2026-06-27T00:02:37.856020+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.87200313806534, + "p90": 125.88800489902496, + "p95": 131.71200454235077, + "p99": 142.46399700641632 + }, + "combine": { + "p50": 103.96800190210342, + "p90": 115.48800021409988, + "p95": 122.68800288438797, + "p99": 204.3199986219406 + }, + "roundtrip": { + "p50": 195.5839991569519, + "p90": 206.65599405765533, + "p95": 212.25599944591522, + "p99": 236.03199422359467 + }, + "isolatedSum": { + "p50": 219.84000504016876, + "p90": 241.37600511312485, + "p95": 254.40000742673874, + "p99": 346.78399562835693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.39200472831726, + "p90": 165.50399363040924, + "p95": 174.20800030231476, + "p99": 197.11999595165253 + }, + "combine": { + "p50": 146.7839926481247, + "p90": 158.55999290943146, + "p95": 162.9440039396286, + "p99": 175.20000040531158 + }, + "roundtrip": { + "p50": 266.7520046234131, + "p90": 286.24001145362854, + "p95": 293.1840121746063, + "p99": 322.33598828315735 + }, + "isolatedSum": { + "p50": 294.17599737644196, + "p90": 324.0639865398407, + "p95": 337.15200424194336, + "p99": 372.3199963569641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.74399423599243, + "p90": 221.11999988555908, + "p95": 229.95199263095856, + "p99": 253.08799743652344 + }, + "combine": { + "p50": 222.52799570560455, + "p90": 234.72000658512115, + "p95": 238.24000358581543, + "p99": 259.3280076980591 + }, + "roundtrip": { + "p50": 398.17601442337036, + "p90": 415.74400663375854, + "p95": 422.04800248146057, + "p99": 459.26401019096375 + }, + "isolatedSum": { + "p50": 426.271989941597, + "p90": 455.84000647068024, + "p95": 468.191996216774, + "p99": 512.4160051345825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 310.36800146102905, + "p90": 327.93599367141724, + "p95": 333.72798562049866, + "p99": 371.8079924583435 + }, + "combine": { + "p50": 355.9679985046387, + "p90": 369.4719970226288, + "p95": 383.07198882102966, + "p99": 431.4880073070526 + }, + "roundtrip": { + "p50": 641.9199705123901, + "p90": 660.9920263290405, + "p95": 668.9280271530151, + "p99": 718.9760208129883 + }, + "isolatedSum": { + "p50": 666.3359999656677, + "p90": 697.407990694046, + "p95": 716.7999744415283, + "p99": 803.2959997653961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 537.1519923210144, + "p90": 553.5680055618286, + "p95": 562.6559853553772, + "p99": 586.9759917259216 + }, + "combine": { + "p50": 612.1600270271301, + "p90": 625.0240206718445, + "p95": 633.8880062103271, + "p99": 660.863995552063 + }, + "roundtrip": { + "p50": 1119.968056678772, + "p90": 1136.064052581787, + "p95": 1145.2480554580688, + "p99": 1263.4880542755127 + }, + "isolatedSum": { + "p50": 1149.3120193481445, + "p90": 1178.592026233673, + "p95": 1196.5439915657043, + "p99": 1247.8399872779846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1003.8080215454102, + "p90": 1027.008056640625, + "p95": 1034.432053565979, + "p99": 1060.1919889450073 + }, + "combine": { + "p50": 1111.0399961471558, + "p90": 1125.8879899978638, + "p95": 1135.3280544281006, + "p99": 1165.727972984314 + }, + "roundtrip": { + "p50": 2077.5039196014404, + "p90": 2101.6640663146973, + "p95": 2114.016056060791, + "p99": 2324.8000144958496 + }, + "isolatedSum": { + "p50": 2114.848017692566, + "p90": 2152.8960466384888, + "p95": 2169.7601079940796, + "p99": 2225.9199619293213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-59b7e35e", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", + "colorKey": "h200_b6aa6110", + "comparisonKey": "b89b8b0279afe699", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:56:59.891356+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4caecd33bedf786", + "workloadId": "set:3:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271848591", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271848591", + "createdAt": "2026-06-26T23:56:59.891356+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.46400117874146, + "p90": 133.85599851608276, + "p95": 141.15199446678162, + "p99": 168.12799870967865 + }, + "combine": { + "p50": 112.5440001487732, + "p90": 125.791996717453, + "p95": 132.1599930524826, + "p99": 143.327996134758 + }, + "roundtrip": { + "p50": 215.7440036535263, + "p90": 240.03200232982635, + "p95": 247.13599681854248, + "p99": 281.5360128879547 + }, + "isolatedSum": { + "p50": 235.00800132751465, + "p90": 259.64799523353577, + "p95": 273.3119875192642, + "p99": 311.45599484443665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.0480021238327, + "p90": 249.439999461174, + "p95": 253.34399938583374, + "p99": 271.39198780059814 + }, + "combine": { + "p50": 259.3280076980591, + "p90": 273.6639976501465, + "p95": 278.1440019607544, + "p99": 748.5759854316711 + }, + "roundtrip": { + "p50": 472.7039933204651, + "p90": 492.76798963546753, + "p95": 497.5360035896301, + "p99": 524.8640179634094 + }, + "isolatedSum": { + "p50": 497.3760098218918, + "p90": 523.1039971113205, + "p95": 531.4880013465881, + "p99": 1019.9679732322693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 671.0079908370972, + "p90": 682.7840209007263, + "p95": 691.6159987449646, + "p99": 782.4000120162964 + }, + "combine": { + "p50": 788.0319952964783, + "p90": 803.0400276184082, + "p95": 810.4000091552734, + "p99": 879.2639970779419 + }, + "roundtrip": { + "p50": 1432.5439929962158, + "p90": 1457.2800397872925, + "p95": 1470.2719449996948, + "p99": 1641.3120031356812 + }, + "isolatedSum": { + "p50": 1459.0399861335754, + "p90": 1485.8240485191345, + "p95": 1502.016007900238, + "p99": 1661.6640090942383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-520b6c38", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_b6aa6110", + "comparisonKey": "b89b8b0279afe699", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:30.997265+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272049186", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272049186", + "createdAt": "2026-06-27T00:03:30.997265+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.14399874210358, + "p90": 134.14399325847626, + "p95": 146.01600170135498, + "p99": 162.62400150299072 + }, + "combine": { + "p50": 112.92800307273865, + "p90": 121.11999839544296, + "p95": 126.68800354003906, + "p99": 141.50400459766388 + }, + "roundtrip": { + "p50": 214.30400013923645, + "p90": 228.28799486160278, + "p95": 232.57599771022797, + "p99": 247.48800694942474 + }, + "isolatedSum": { + "p50": 235.07200181484222, + "p90": 255.26399165391922, + "p95": 272.70400524139404, + "p99": 304.1280061006546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 170.43200135231018, + "p90": 187.71199882030487, + "p95": 196.0960030555725, + "p99": 223.00800681114197 + }, + "combine": { + "p50": 163.87200355529785, + "p90": 181.60000443458557, + "p95": 186.36800348758698, + "p99": 197.02400267124176 + }, + "roundtrip": { + "p50": 303.8400113582611, + "p90": 328.000009059906, + "p95": 333.0560028553009, + "p99": 366.2079870700836 + }, + "isolatedSum": { + "p50": 334.30400490760803, + "p90": 369.31200325489044, + "p95": 382.4640065431595, + "p99": 420.0320094823837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 235.83999276161194, + "p90": 244.6720004081726, + "p95": 248.86399507522583, + "p99": 265.4080092906952 + }, + "combine": { + "p50": 259.90399718284607, + "p90": 269.6639895439148, + "p95": 276.06400847435, + "p99": 299.0399897098541 + }, + "roundtrip": { + "p50": 473.471999168396, + "p90": 492.12801456451416, + "p95": 498.3679950237274, + "p99": 528.544008731842 + }, + "isolatedSum": { + "p50": 495.743989944458, + "p90": 514.3359899520874, + "p95": 524.9280035495758, + "p99": 564.4479990005493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 381.087988615036, + "p90": 397.47199416160583, + "p95": 404.35200929641724, + "p99": 493.4079945087433 + }, + "combine": { + "p50": 437.27999925613403, + "p90": 450.8799910545349, + "p95": 458.3039879798889, + "p99": 476.25601291656494 + }, + "roundtrip": { + "p50": 790.5600070953369, + "p90": 804.9920201301575, + "p95": 813.9200210571289, + "p99": 841.5359854698181 + }, + "isolatedSum": { + "p50": 818.36798787117, + "p90": 848.3519852161407, + "p95": 862.6559972763062, + "p99": 969.6640074253082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 672.1280217170715, + "p90": 685.0879788398743, + "p95": 689.9200081825256, + "p99": 743.4560060501099 + }, + "combine": { + "p50": 783.1360101699829, + "p90": 793.0560111999512, + "p95": 796.6399788856506, + "p99": 806.5599799156189 + }, + "roundtrip": { + "p50": 1425.7919788360596, + "p90": 1442.0160055160522, + "p95": 1455.4879665374756, + "p99": 1550.75204372406 + }, + "isolatedSum": { + "p50": 1455.2640318870544, + "p90": 1478.1439900398254, + "p95": 1486.5599870681763, + "p99": 1550.0159859657288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1269.1839933395386, + "p90": 1284.1919660568237, + "p95": 1291.8720245361328, + "p99": 1339.2640352249146 + }, + "combine": { + "p50": 1472.8000164031982, + "p90": 1489.8879528045654, + "p95": 1502.17604637146, + "p99": 1692.639946937561 + }, + "roundtrip": { + "p50": 2711.7760181427, + "p90": 2730.015993118286, + "p95": 2753.5040378570557, + "p99": 2926.464080810547 + }, + "isolatedSum": { + "p50": 2741.984009742737, + "p90": 2774.079918861389, + "p95": 2794.048070907593, + "p99": 3031.9039821624756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5907eae", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", + "colorKey": "h200_c5b3365a", + "comparisonKey": "d19848fb38a35ed8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:57:20.998823+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3dd868cb33839a3", + "workloadId": "set:3:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271855852", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271855852", + "createdAt": "2026-06-26T23:57:20.998823+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.72800236940384, + "p90": 127.48800218105316, + "p95": 133.91999900341034, + "p99": 146.11199498176575 + }, + "combine": { + "p50": 107.29599744081497, + "p90": 117.3119992017746, + "p95": 122.43200093507767, + "p99": 134.11200046539307 + }, + "roundtrip": { + "p50": 205.85599541664124, + "p90": 220.09600698947906, + "p95": 228.5120040178299, + "p99": 244.09599602222443 + }, + "isolatedSum": { + "p50": 225.0239998102188, + "p90": 244.80000138282776, + "p95": 256.351999938488, + "p99": 280.2239954471588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.89600014686584, + "p90": 245.12000381946564, + "p95": 253.4399926662445, + "p99": 292.03200340270996 + }, + "combine": { + "p50": 245.34399807453156, + "p90": 260.25599241256714, + "p95": 269.27998661994934, + "p99": 297.37600684165955 + }, + "roundtrip": { + "p50": 454.68801259994507, + "p90": 472.6080000400543, + "p95": 486.6560101509094, + "p99": 522.4639773368835 + }, + "isolatedSum": { + "p50": 478.2399982213974, + "p90": 505.3759962320328, + "p95": 522.7199792861938, + "p99": 589.4080102443695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 662.335991859436, + "p90": 673.632025718689, + "p95": 681.2160015106201, + "p99": 744.5759773254395 + }, + "combine": { + "p50": 772.5759744644165, + "p90": 791.8720245361328, + "p95": 806.6239953041077, + "p99": 855.2640080451965 + }, + "roundtrip": { + "p50": 1405.9840440750122, + "p90": 1435.2960586547852, + "p95": 1455.7119607925415, + "p99": 1716.3519859313965 + }, + "isolatedSum": { + "p50": 1434.9119663238525, + "p90": 1465.5040502548218, + "p95": 1487.8399968147278, + "p99": 1599.839985370636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-75dcaec2", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_c5b3365a", + "comparisonKey": "d19848fb38a35ed8", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:55.820445+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272093905", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272093905", + "createdAt": "2026-06-27T00:04:55.820445+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.14399808645248, + "p90": 130.17599284648895, + "p95": 135.5839967727661, + "p99": 147.07200229167938 + }, + "combine": { + "p50": 108.83200168609619, + "p90": 120.57600170373917, + "p95": 127.55200266838074, + "p99": 140.73599874973297 + }, + "roundtrip": { + "p50": 206.65599405765533, + "p90": 219.04000639915466, + "p95": 224.48000311851501, + "p99": 242.0479953289032 + }, + "isolatedSum": { + "p50": 226.97599977254868, + "p90": 250.75199455022812, + "p95": 263.13599944114685, + "p99": 287.80800104141235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.45600652694702, + "p90": 165.95199704170227, + "p95": 173.92000555992126, + "p99": 202.39999890327454 + }, + "combine": { + "p50": 150.94399452209473, + "p90": 162.59199380874634, + "p95": 170.3680008649826, + "p99": 186.24000251293182 + }, + "roundtrip": { + "p50": 287.6800000667572, + "p90": 302.94400453567505, + "p95": 309.7279965877533, + "p99": 357.7919900417328 + }, + "isolatedSum": { + "p50": 306.40000104904175, + "p90": 328.5439908504486, + "p95": 344.28800642490387, + "p99": 388.64000141620636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 230.9119999408722, + "p90": 242.11199581623077, + "p95": 249.66399371623993, + "p99": 269.8880136013031 + }, + "combine": { + "p50": 247.16800451278687, + "p90": 260.5760097503662, + "p95": 264.6400034427643, + "p99": 289.66400027275085 + }, + "roundtrip": { + "p50": 456.86399936676025, + "p90": 473.28001260757446, + "p95": 481.1519980430603, + "p99": 534.8799824714661 + }, + "isolatedSum": { + "p50": 478.08000445365906, + "p90": 502.688005566597, + "p95": 514.3039971590042, + "p99": 559.552013874054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.2719888687134, + "p90": 386.6559863090515, + "p95": 397.5679874420166, + "p99": 506.0480237007141 + }, + "combine": { + "p50": 423.1039881706238, + "p90": 436.0319972038269, + "p95": 440.8319890499115, + "p99": 470.97599506378174 + }, + "roundtrip": { + "p50": 771.232008934021, + "p90": 783.9679718017578, + "p95": 795.5520153045654, + "p99": 828.4800052642822 + }, + "isolatedSum": { + "p50": 797.3759770393372, + "p90": 822.6879835128784, + "p95": 838.3999764919281, + "p99": 977.0240187644958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 661.8559956550598, + "p90": 673.9199757575989, + "p95": 679.6159744262695, + "p99": 697.5039839744568 + }, + "combine": { + "p50": 770.6559896469116, + "p90": 781.1520099639893, + "p95": 786.7839932441711, + "p99": 830.560028553009 + }, + "roundtrip": { + "p50": 1405.791997909546, + "p90": 1421.280026435852, + "p95": 1432.2559833526611, + "p99": 1481.6319942474365 + }, + "isolatedSum": { + "p50": 1432.5119853019714, + "p90": 1455.0719857215881, + "p95": 1466.3999676704407, + "p99": 1528.0640125274658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1246.2400197982788, + "p90": 1261.631965637207, + "p95": 1269.5679664611816, + "p99": 1482.5600385665894 + }, + "combine": { + "p50": 1440.384030342102, + "p90": 1459.455966949463, + "p95": 1471.519947052002, + "p99": 1634.0479850769043 + }, + "roundtrip": { + "p50": 2662.400007247925, + "p90": 2688.096046447754, + "p95": 2712.4478816986084, + "p99": 2846.719980239868 + }, + "isolatedSum": { + "p50": 2686.624050140381, + "p90": 2721.08793258667, + "p95": 2741.0879135131836, + "p99": 3116.6080236434937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9bcc6cfd", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_06aa1194", + "comparisonKey": "fe01776775c5fb5e", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:05:23.968491+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": "set:6:1ca614e23cc66be1", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272097307", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272097307", + "createdAt": "2026-06-27T00:05:23.968491+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.79200285673141, + "p90": 122.94399738311768, + "p95": 127.96799838542938, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 104.38399761915207, + "p90": 111.35999858379364, + "p95": 117.79200285673141, + "p99": 128.63999605178833 + }, + "roundtrip": { + "p50": 197.82400131225586, + "p90": 205.85599541664124, + "p95": 212.351992726326, + "p99": 252.86400318145752 + }, + "isolatedSum": { + "p50": 222.17600047588348, + "p90": 234.30399596691132, + "p95": 245.7600012421608, + "p99": 266.975998878479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.60000598430634, + "p90": 154.14400398731232, + "p95": 158.39999914169312, + "p99": 173.63199591636658 + }, + "combine": { + "p50": 145.6959992647171, + "p90": 150.56000649929047, + "p95": 155.2640050649643, + "p99": 165.56799411773682 + }, + "roundtrip": { + "p50": 267.520010471344, + "p90": 276.99199318885803, + "p95": 283.03998708724976, + "p99": 307.3599934577942 + }, + "isolatedSum": { + "p50": 291.29600524902344, + "p90": 304.7040104866028, + "p95": 313.6640042066574, + "p99": 339.1999900341034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.79999482631683, + "p90": 213.85599672794342, + "p95": 218.27200055122375, + "p99": 238.52799832820892 + }, + "combine": { + "p50": 219.4879949092865, + "p90": 226.9439995288849, + "p95": 233.66400599479675, + "p99": 274.944007396698 + }, + "roundtrip": { + "p50": 400.160014629364, + "p90": 409.7279906272888, + "p95": 419.16799545288086, + "p99": 445.6320106983185 + }, + "isolatedSum": { + "p50": 424.28798973560333, + "p90": 440.7999962568283, + "p95": 451.9360065460205, + "p99": 513.4720057249069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 317.7599906921387, + "p90": 327.87200808525085, + "p95": 340.06398916244507, + "p99": 393.3440148830414 + }, + "combine": { + "p50": 356.1600148677826, + "p90": 364.6079897880554, + "p95": 369.82399225234985, + "p99": 396.8319892883301 + }, + "roundtrip": { + "p50": 649.6959924697876, + "p90": 660.3519916534424, + "p95": 664.7040247917175, + "p99": 683.4239959716797 + }, + "isolatedSum": { + "p50": 673.9200055599213, + "p90": 692.4799978733063, + "p95": 709.8879814147949, + "p99": 790.1760041713715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 541.1199927330017, + "p90": 549.8560070991516, + "p95": 555.4239749908447, + "p99": 643.6160206794739 + }, + "combine": { + "p50": 614.8800253868103, + "p90": 626.3039708137512, + "p95": 632.2240233421326, + "p99": 680.8639764785767 + }, + "roundtrip": { + "p50": 1131.7440271377563, + "p90": 1142.7839994430542, + "p95": 1148.192048072815, + "p99": 1196.768045425415 + }, + "isolatedSum": { + "p50": 1156.000018119812, + "p90": 1176.1599779129028, + "p95": 1187.6479983329773, + "p99": 1324.4799971580505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1027.9680490493774, + "p90": 1046.720027923584, + "p95": 1055.4239749908447, + "p99": 1100.000023841858 + }, + "combine": { + "p50": 1124.384045600891, + "p90": 1135.9679698944092, + "p95": 1140.8640146255493, + "p99": 1170.9760427474976 + }, + "roundtrip": { + "p50": 2114.5920753479004, + "p90": 2138.495922088623, + "p95": 2152.127981185913, + "p99": 2480.2560806274414 + }, + "isolatedSum": { + "p50": 2152.3520946502686, + "p90": 2182.687997817993, + "p95": 2196.287989616394, + "p99": 2270.9760665893555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e075077e", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_6a794fcd", + "comparisonKey": "b6c24dab2941895d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:10.125267+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": "set:6:a224603e5a1640b8", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272065129", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272065129", + "createdAt": "2026-06-27T00:04:10.125267+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.52799487113953, + "p90": 140.3840035200119, + "p95": 146.17599546909332, + "p99": 177.08800733089447 + }, + "combine": { + "p50": 116.73600226640701, + "p90": 128.86400520801544, + "p95": 133.63200426101685, + "p99": 143.8719928264618 + }, + "roundtrip": { + "p50": 216.35200083255768, + "p90": 234.3360036611557, + "p95": 240.25599658489227, + "p99": 277.3120105266571 + }, + "isolatedSum": { + "p50": 243.26399713754654, + "p90": 269.24800872802734, + "p95": 279.80799973011017, + "p99": 320.96000015735626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.96799683570862, + "p90": 176.256000995636, + "p95": 180.4479956626892, + "p99": 201.50400698184967 + }, + "combine": { + "p50": 160.41600704193115, + "p90": 173.0560064315796, + "p95": 178.3680021762848, + "p99": 186.75200641155243 + }, + "roundtrip": { + "p50": 298.94399642944336, + "p90": 319.487988948822, + "p95": 328.0960023403168, + "p99": 354.65601086616516 + }, + "isolatedSum": { + "p50": 324.38400387763977, + "p90": 349.3120074272156, + "p95": 358.815997838974, + "p99": 388.2560133934021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.72799968719482, + "p90": 252.48000025749207, + "p95": 263.8719975948334, + "p99": 307.16800689697266 + }, + "combine": { + "p50": 262.1760070323944, + "p90": 279.1999876499176, + "p95": 284.7999930381775, + "p99": 311.8399977684021 + }, + "roundtrip": { + "p50": 477.82400250434875, + "p90": 500.70399045944214, + "p95": 516.5759921073914, + "p99": 701.632022857666 + }, + "isolatedSum": { + "p50": 499.90400671958923, + "p90": 531.6799879074097, + "p95": 548.6719906330109, + "p99": 619.0080046653748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 378.495991230011, + "p90": 390.04799723625183, + "p95": 399.58399534225464, + "p99": 429.6320080757141 + }, + "combine": { + "p50": 439.9360120296478, + "p90": 452.2880017757416, + "p95": 457.15200901031494, + "p99": 474.047988653183 + }, + "roundtrip": { + "p50": 797.4079847335815, + "p90": 816.32000207901, + "p95": 828.6399841308594, + "p99": 955.839991569519 + }, + "isolatedSum": { + "p50": 818.4320032596588, + "p90": 842.3359990119934, + "p95": 856.7360043525696, + "p99": 903.6799967288971 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 679.4559955596924, + "p90": 694.208025932312, + "p95": 704.255998134613, + "p99": 742.8159713745117 + }, + "combine": { + "p50": 780.7040214538574, + "p90": 795.1679825782776, + "p95": 804.7360181808472, + "p99": 879.7439932823181 + }, + "roundtrip": { + "p50": 1432.0640563964844, + "p90": 1453.279972076416, + "p95": 1465.8559560775757, + "p99": 1602.3039817810059 + }, + "isolatedSum": { + "p50": 1460.1600170135498, + "p90": 1489.3760085105896, + "p95": 1508.9920163154602, + "p99": 1622.5599646568298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1287.071943283081, + "p90": 1304.8959970474243, + "p95": 1310.7839822769165, + "p99": 1432.2240352630615 + }, + "combine": { + "p50": 1463.6160135269165, + "p90": 1483.8080406188965, + "p95": 1511.7119550704956, + "p99": 1699.0400552749634 + }, + "roundtrip": { + "p50": 2723.9038944244385, + "p90": 2744.607925415039, + "p95": 2758.2719326019287, + "p99": 2967.616081237793 + }, + "isolatedSum": { + "p50": 2750.6879568099976, + "p90": 2788.704037666321, + "p95": 2822.495937347412, + "p99": 3131.264090538025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4768a96", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_b2ffaf91", + "comparisonKey": "d826aaa5f1321f31", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:16.163335+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_12", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": "set:6:a224603e5a1640b8", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272068834", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272068834", + "createdAt": "2026-06-27T00:04:16.163335+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.07999759912491, + "p90": 128.63999605178833, + "p95": 134.62400436401367, + "p99": 156.2879979610443 + }, + "combine": { + "p50": 105.47199845314026, + "p90": 114.43199962377548, + "p95": 119.19999867677689, + "p99": 136.09600067138672 + }, + "roundtrip": { + "p50": 197.24799692630768, + "p90": 206.01600408554077, + "p95": 211.0079973936081, + "p99": 226.01599991321564 + }, + "isolatedSum": { + "p50": 223.55199605226517, + "p90": 243.0719956755638, + "p95": 253.82400304079056, + "p99": 292.38399863243103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.08799529075623, + "p90": 157.27999806404114, + "p95": 161.56800091266632, + "p99": 172.83199727535248 + }, + "combine": { + "p50": 143.77599954605103, + "p90": 148.99200201034546, + "p95": 152.12799608707428, + "p99": 163.68000209331512 + }, + "roundtrip": { + "p50": 265.28000831604004, + "p90": 273.50398898124695, + "p95": 279.35999631881714, + "p99": 293.37599873542786 + }, + "isolatedSum": { + "p50": 292.86399483680725, + "p90": 306.2720000743866, + "p95": 313.6959969997406, + "p99": 336.5119993686676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.2080055475235, + "p90": 216.99200570583344, + "p95": 223.03999960422516, + "p99": 264.44798707962036 + }, + "combine": { + "p50": 225.40800273418427, + "p90": 233.37599635124207, + "p95": 238.65599930286407, + "p99": 253.56799364089966 + }, + "roundtrip": { + "p50": 404.4800102710724, + "p90": 415.2959883213043, + "p95": 423.552006483078, + "p99": 451.9039988517761 + }, + "isolatedSum": { + "p50": 431.61600828170776, + "p90": 450.3680020570755, + "p95": 461.69599890708923, + "p99": 518.01598072052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.27998638153076, + "p90": 324.8960077762604, + "p95": 334.7199857234955, + "p99": 349.2160141468048 + }, + "combine": { + "p50": 357.05599188804626, + "p90": 370.59199810028076, + "p95": 381.4080059528351, + "p99": 418.43199729919434 + }, + "roundtrip": { + "p50": 643.7439918518066, + "p90": 656.0959815979004, + "p95": 666.2399768829346, + "p99": 702.9759883880615 + }, + "isolatedSum": { + "p50": 670.335978269577, + "p90": 695.4880058765411, + "p95": 716.1279916763306, + "p99": 767.6480114459991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 530.1439762115479, + "p90": 539.5519733428955, + "p95": 543.008029460907, + "p99": 568.9600110054016 + }, + "combine": { + "p50": 611.5840077400208, + "p90": 622.048020362854, + "p95": 629.2799711227417, + "p99": 677.5040030479431 + }, + "roundtrip": { + "p50": 1115.488052368164, + "p90": 1129.248023033142, + "p95": 1135.583996772766, + "p99": 1275.6479978561401 + }, + "isolatedSum": { + "p50": 1141.7279839515686, + "p90": 1161.5999937057495, + "p95": 1172.2880005836487, + "p99": 1246.4640140533447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 986.1119985580444, + "p90": 1002.2720098495483, + "p95": 1011.0080242156982, + "p99": 1069.0239667892456 + }, + "combine": { + "p50": 1125.3440380096436, + "p90": 1136.6080045700073, + "p95": 1142.3360109329224, + "p99": 1163.8400554656982 + }, + "roundtrip": { + "p50": 2081.088066101074, + "p90": 2097.9840755462646, + "p95": 2111.0079288482666, + "p99": 2311.743974685669 + }, + "isolatedSum": { + "p50": 2111.456036567688, + "p90": 2138.8800144195557, + "p95": 2153.3440351486206, + "p99": 2232.864022254944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1ecd1d4", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_f2b19f62", + "comparisonKey": "a7c9c0202574b9d0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:45.749249+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:6709a02c31933a9f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272079152", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272079152", + "createdAt": "2026-06-27T00:04:45.749249+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.94399738311768, + "p90": 134.20799374580383, + "p95": 138.87999951839447, + "p99": 150.87999403476715 + }, + "combine": { + "p50": 111.90400272607803, + "p90": 122.43200093507767, + "p95": 128.38399410247803, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 213.8880044221878, + "p90": 230.43200373649597, + "p95": 236.735999584198, + "p99": 261.4080011844635 + }, + "isolatedSum": { + "p50": 234.8480001091957, + "p90": 256.6399946808815, + "p95": 267.2639936208725, + "p99": 287.32798993587494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.72799468040466, + "p90": 174.20800030231476, + "p95": 182.49599635601044, + "p99": 194.72000002861023 + }, + "combine": { + "p50": 158.27199816703796, + "p90": 174.8799979686737, + "p95": 179.58399653434753, + "p99": 191.26400351524353 + }, + "roundtrip": { + "p50": 296.9920039176941, + "p90": 319.0079927444458, + "p95": 327.2320032119751, + "p99": 340.03201127052307 + }, + "isolatedSum": { + "p50": 319.9999928474426, + "p90": 349.08799827098846, + "p95": 362.07999289035797, + "p99": 385.98400354385376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.34399676322937, + "p90": 252.19199061393738, + "p95": 259.48798656463623, + "p99": 274.0800082683563 + }, + "combine": { + "p50": 260.44800877571106, + "p90": 278.2079875469208, + "p95": 284.7999930381775, + "p99": 298.880010843277 + }, + "roundtrip": { + "p50": 475.1040041446686, + "p90": 495.2319860458374, + "p95": 509.3119740486145, + "p99": 531.8080186843872 + }, + "isolatedSum": { + "p50": 497.79200553894043, + "p90": 530.3999781608582, + "p95": 544.2879796028137, + "p99": 572.9600191116333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.4879913330078, + "p90": 389.60000872612, + "p95": 395.6800103187561, + "p99": 409.92000699043274 + }, + "combine": { + "p50": 438.1760060787201, + "p90": 452.06400752067566, + "p95": 457.69599080085754, + "p99": 494.59201097488403 + }, + "roundtrip": { + "p50": 794.2079901695251, + "p90": 809.7919821739197, + "p95": 823.6799836158752, + "p99": 875.6160140037537 + }, + "isolatedSum": { + "p50": 817.6639974117279, + "p90": 841.6640162467957, + "p95": 853.3760011196136, + "p99": 904.5120179653168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 671.2319850921631, + "p90": 682.6879978179932, + "p95": 689.2480254173279, + "p99": 929.0879964828491 + }, + "combine": { + "p50": 786.7839932441711, + "p90": 799.1999983787537, + "p95": 804.2880296707153, + "p99": 833.6960077285767 + }, + "roundtrip": { + "p50": 1430.0800561904907, + "p90": 1449.9200582504272, + "p95": 1461.3120555877686, + "p99": 1667.8080558776855 + }, + "isolatedSum": { + "p50": 1458.0159783363342, + "p90": 1481.8879961967468, + "p95": 1493.5360550880432, + "p99": 1762.7840042114258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1269.2480087280273, + "p90": 1284.5439910888672, + "p95": 1292.9919958114624, + "p99": 1424.064040184021 + }, + "combine": { + "p50": 1480.6400537490845, + "p90": 1504.7039985656738, + "p95": 1519.10400390625, + "p99": 1724.0320444107056 + }, + "roundtrip": { + "p50": 2719.4879055023193, + "p90": 2740.70405960083, + "p95": 2764.8000717163086, + "p99": 3076.0960578918457 + }, + "isolatedSum": { + "p50": 2749.888062477112, + "p90": 2789.247989654541, + "p95": 2812.0959997177124, + "p99": 3148.0960845947266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f58892d6", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_bac4102c", + "comparisonKey": "402825358de599a6", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:04:49.601548+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:6709a02c31933a9f", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272082600", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272082600", + "createdAt": "2026-06-27T00:04:49.601548+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.72800236940384, + "p90": 136.25599443912506, + "p95": 140.8960074186325, + "p99": 185.34399569034576 + }, + "combine": { + "p50": 103.61599922180176, + "p90": 115.9679964184761, + "p95": 122.49600142240524, + "p99": 137.7599984407425 + }, + "roundtrip": { + "p50": 197.02400267124176, + "p90": 215.13600647449493, + "p95": 222.6240038871765, + "p99": 233.43999683856964 + }, + "isolatedSum": { + "p50": 221.3440015912056, + "p90": 252.22399085760117, + "p95": 263.39200884103775, + "p99": 323.10399413108826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.21600306034088, + "p90": 166.1120057106018, + "p95": 175.1679927110672, + "p99": 194.91200149059296 + }, + "combine": { + "p50": 144.22400295734406, + "p90": 156.2879979610443, + "p95": 161.18399798870087, + "p99": 171.90399765968323 + }, + "roundtrip": { + "p50": 262.87999749183655, + "p90": 277.5999903678894, + "p95": 286.3999903202057, + "p99": 298.97600412368774 + }, + "isolatedSum": { + "p50": 289.44000601768494, + "p90": 322.4000036716461, + "p95": 336.35199069976807, + "p99": 366.8159991502762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.3279985189438, + "p90": 218.36799383163452, + "p95": 226.1440008878708, + "p99": 242.8479939699173 + }, + "combine": { + "p50": 223.00800681114197, + "p90": 237.5359982252121, + "p95": 245.7599937915802, + "p99": 267.2959864139557 + }, + "roundtrip": { + "p50": 399.77601170539856, + "p90": 420.415997505188, + "p95": 433.1839978694916, + "p99": 505.40798902511597 + }, + "isolatedSum": { + "p50": 426.33600533008575, + "p90": 455.9039920568466, + "p95": 471.903994679451, + "p99": 510.143980383873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.1280074119568, + "p90": 327.7119994163513, + "p95": 334.879994392395, + "p99": 400.4479944705963 + }, + "combine": { + "p50": 352.7680039405823, + "p90": 362.527996301651, + "p95": 367.6159977912903, + "p99": 386.0799968242645 + }, + "roundtrip": { + "p50": 641.1839723587036, + "p90": 658.1119894981384, + "p95": 666.0159826278687, + "p99": 719.5199728012085 + }, + "isolatedSum": { + "p50": 664.8960113525391, + "p90": 690.2399957180023, + "p95": 702.4959921836853, + "p99": 786.5279912948608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 527.5200009346008, + "p90": 542.4320101737976, + "p95": 550.4639744758606, + "p99": 575.2959847450256 + }, + "combine": { + "p50": 620.3839778900146, + "p90": 633.5999965667725, + "p95": 639.2639875411987, + "p99": 673.8560199737549 + }, + "roundtrip": { + "p50": 1121.1520433425903, + "p90": 1137.0879411697388, + "p95": 1147.3599672317505, + "p99": 1174.7519969940186 + }, + "isolatedSum": { + "p50": 1147.9039788246155, + "p90": 1176.03200674057, + "p95": 1189.7279620170593, + "p99": 1249.1520047187805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1003.3919811248779, + "p90": 1031.5200090408325, + "p95": 1040.4160022735596, + "p99": 1070.2400207519531 + }, + "combine": { + "p50": 1121.9840049743652, + "p90": 1135.7760429382324, + "p95": 1145.0239419937134, + "p99": 1167.8400039672852 + }, + "roundtrip": { + "p50": 2083.0399990081787, + "p90": 2113.568067550659, + "p95": 2122.431993484497, + "p99": 2277.791976928711 + }, + "isolatedSum": { + "p50": 2125.375986099243, + "p90": 2167.296051979065, + "p95": 2185.439944267273, + "p99": 2238.0800247192383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8c2088d8", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_1eda221e", + "comparisonKey": "6ee0b18a3e276ae1", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:03:37.741116+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272052634", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272052634", + "createdAt": "2026-06-27T00:03:37.741116+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.65600198507309, + "p90": 133.56800377368927, + "p95": 139.1039937734604, + "p99": 146.97599411010742 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 118.01599711179733, + "p95": 121.76000326871872, + "p99": 131.77600502967834 + }, + "roundtrip": { + "p50": 197.02400267124176, + "p90": 214.75200355052948, + "p95": 219.67999637126923, + "p99": 230.97600042819977 + }, + "isolatedSum": { + "p50": 222.97599911689758, + "p90": 251.5840008854866, + "p95": 260.8639970421791, + "p99": 278.75199913978577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.41600441932678, + "p90": 161.8880033493042, + "p95": 168.96000504493713, + "p99": 186.43200397491455 + }, + "combine": { + "p50": 143.19999516010284, + "p90": 153.08800339698792, + "p95": 157.4079990386963, + "p99": 164.60800170898438 + }, + "roundtrip": { + "p50": 262.87999749183655, + "p90": 275.32801032066345, + "p95": 282.4000120162964, + "p99": 291.00799560546875 + }, + "isolatedSum": { + "p50": 287.6159995794296, + "p90": 314.9760067462921, + "p95": 326.3680040836334, + "p99": 351.0400056838989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.0079960823059, + "p90": 220.768004655838, + "p95": 227.55199670791626, + "p99": 253.63200902938843 + }, + "combine": { + "p50": 219.4879949092865, + "p90": 227.52000391483307, + "p95": 231.23200237751007, + "p99": 248.79999458789825 + }, + "roundtrip": { + "p50": 397.0560133457184, + "p90": 409.5039963722229, + "p95": 413.4719967842102, + "p99": 425.82398653030396 + }, + "isolatedSum": { + "p50": 422.4959909915924, + "p90": 448.2880085706711, + "p95": 458.78399908542633, + "p99": 502.4320036172867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 311.5839958190918, + "p90": 334.52799916267395, + "p95": 339.2319977283478, + "p99": 353.88800501823425 + }, + "combine": { + "p50": 350.20801424980164, + "p90": 362.0480000972748, + "p95": 365.9839928150177, + "p99": 423.71198534965515 + }, + "roundtrip": { + "p50": 636.7999911308289, + "p90": 650.1439809799194, + "p95": 654.2080044746399, + "p99": 711.4560008049011 + }, + "isolatedSum": { + "p50": 661.7920100688934, + "p90": 696.5759992599487, + "p95": 705.2159905433655, + "p99": 777.5999903678894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 528.3839702606201, + "p90": 545.7599759101868, + "p95": 551.9999861717224, + "p99": 572.2879767417908 + }, + "combine": { + "p50": 608.959972858429, + "p90": 620.9920048713684, + "p95": 626.1119842529297, + "p99": 657.0559740066528 + }, + "roundtrip": { + "p50": 1110.2720499038696, + "p90": 1125.0239610671997, + "p95": 1132.032036781311, + "p99": 1183.0079555511475 + }, + "isolatedSum": { + "p50": 1137.343943119049, + "p90": 1166.7519807815552, + "p95": 1178.111970424652, + "p99": 1229.3439507484436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.4639801979065, + "p90": 1016.1600112915039, + "p95": 1023.9039659500122, + "p99": 1042.0479774475098 + }, + "combine": { + "p50": 1103.2960414886475, + "p90": 1116.2559986114502, + "p95": 1121.7600107192993, + "p99": 1139.4879817962646 + }, + "roundtrip": { + "p50": 2056.544065475464, + "p90": 2077.9199600219727, + "p95": 2088.671922683716, + "p99": 2251.3279914855957 + }, + "isolatedSum": { + "p50": 2097.760021686554, + "p90": 2132.416009902954, + "p95": 2145.6639766693115, + "p99": 2181.5359592437744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e568434", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_c851a534", + "comparisonKey": "1f9e00010b0d6e5b", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:59.726916+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254392935", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", + "createdAt": "2026-06-26T17:29:59.726916+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.44800007343292, + "p90": 126.97599828243256, + "p95": 137.92000710964203, + "p99": 159.96800363063812 + }, + "combine": { + "p50": 103.55199873447418, + "p90": 113.11999708414078, + "p95": 120.80000340938568, + "p99": 147.10399508476257 + }, + "roundtrip": { + "p50": 194.62400674819946, + "p90": 208.19200575351715, + "p95": 215.39199352264404, + "p99": 238.75199258327484 + }, + "isolatedSum": { + "p50": 219.9999988079071, + "p90": 240.09599536657333, + "p95": 258.7200105190277, + "p99": 307.0719987154007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.97599411010742, + "p90": 163.07200491428375, + "p95": 171.77599668502808, + "p99": 191.42399728298187 + }, + "combine": { + "p50": 142.84799993038177, + "p90": 154.78399395942688, + "p95": 165.12000560760498, + "p99": 172.28800058364868 + }, + "roundtrip": { + "p50": 267.0080065727234, + "p90": 288.9600098133087, + "p95": 295.77600955963135, + "p99": 315.71200489997864 + }, + "isolatedSum": { + "p50": 289.8239940404892, + "p90": 317.85599887371063, + "p95": 336.89600229263306, + "p99": 363.71199786663055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.68000185489655, + "p90": 228.64000499248505, + "p95": 236.92800104618073, + "p99": 267.90401339530945 + }, + "combine": { + "p50": 210.36800742149353, + "p90": 225.0239998102188, + "p95": 234.68799889087677, + "p99": 271.58400416374207 + }, + "roundtrip": { + "p50": 390.49598574638367, + "p90": 413.37600350379944, + "p95": 420.28799653053284, + "p99": 449.8240053653717 + }, + "isolatedSum": { + "p50": 418.0480092763901, + "p90": 453.66400480270386, + "p95": 471.6159999370575, + "p99": 539.4880175590515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 324.8960077762604, + "p90": 341.5679931640625, + "p95": 351.4559864997864, + "p99": 364.73599076271057 + }, + "combine": { + "p50": 328.0960023403168, + "p90": 339.6480083465576, + "p95": 345.95200419425964, + "p99": 362.8480136394501 + }, + "roundtrip": { + "p50": 628.9600133895874, + "p90": 643.231987953186, + "p95": 649.3120193481445, + "p99": 664.3199920654297 + }, + "isolatedSum": { + "p50": 652.9920101165771, + "p90": 681.2160015106201, + "p95": 697.407990694046, + "p99": 727.5840044021606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 566.3679838180542, + "p90": 581.0880064964294, + "p95": 587.2960090637207, + "p99": 609.1520190238953 + }, + "combine": { + "p50": 560.9920024871826, + "p90": 573.0559825897217, + "p95": 578.2399773597717, + "p99": 609.7279787063599 + }, + "roundtrip": { + "p50": 1097.3440408706665, + "p90": 1114.400029182434, + "p95": 1121.791958808899, + "p99": 1286.6239547729492 + }, + "isolatedSum": { + "p50": 1127.3599863052368, + "p90": 1154.1439890861511, + "p95": 1165.5359864234924, + "p99": 1218.8799977302551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1051.8079996109009, + "p90": 1067.8720474243164, + "p95": 1078.271985054016, + "p99": 1161.4079475402832 + }, + "combine": { + "p50": 1028.9920568466187, + "p90": 1044.0959930419922, + "p95": 1054.4320344924927, + "p99": 1218.783974647522 + }, + "roundtrip": { + "p50": 2049.3760108947754, + "p90": 2068.4800148010254, + "p95": 2079.200029373169, + "p99": 2593.600034713745 + }, + "isolatedSum": { + "p50": 2080.8000564575195, + "p90": 2111.9680404663086, + "p95": 2132.704019546509, + "p99": 2380.191922187805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6764a75f", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", + "colorKey": "h200_a1e795ec", + "comparisonKey": "5a22622d9db14749", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:54.944678+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_8", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": "set:6:2dad1a73ff872905", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254443915", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", + "createdAt": "2026-06-26T17:30:54.944678+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.88800621032715, + "p90": 147.16799557209015, + "p95": 159.5200002193451, + "p99": 177.76000499725342 + }, + "combine": { + "p50": 119.39200013875961, + "p90": 131.80799782276154, + "p95": 139.74399864673615, + "p99": 152.48000621795654 + }, + "roundtrip": { + "p50": 227.64800488948822, + "p90": 249.05599653720856, + "p95": 255.74401021003723, + "p99": 274.3679881095886 + }, + "isolatedSum": { + "p50": 253.28000634908676, + "p90": 278.9759933948517, + "p95": 299.26399886608124, + "p99": 330.24001121520996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 176.54399573802948, + "p90": 188.4160041809082, + "p95": 203.07199656963348, + "p99": 299.8400032520294 + }, + "combine": { + "p50": 169.91999745368958, + "p90": 175.48799514770508, + "p95": 180.16000092029572, + "p99": 187.51999735832214 + }, + "roundtrip": { + "p50": 319.4560110569, + "p90": 328.7679851055145, + "p95": 336.32001280784607, + "p99": 355.0400137901306 + }, + "isolatedSum": { + "p50": 346.46399319171906, + "p90": 363.9039993286133, + "p95": 383.2319974899292, + "p99": 487.36000061035156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 269.567996263504, + "p90": 288.12798857688904, + "p95": 294.048011302948, + "p99": 315.3280019760132 + }, + "combine": { + "p50": 262.0159983634949, + "p90": 282.1120023727417, + "p95": 286.5920066833496, + "p99": 306.11199140548706 + }, + "roundtrip": { + "p50": 505.7920217514038, + "p90": 531.9039821624756, + "p95": 535.7760190963745, + "p99": 544.6720123291016 + }, + "isolatedSum": { + "p50": 531.5839946269989, + "p90": 570.2399909496307, + "p95": 580.6400179862976, + "p99": 621.4399933815002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 443.87200474739075, + "p90": 459.55199003219604, + "p95": 467.74399280548096, + "p99": 487.199991941452 + }, + "combine": { + "p50": 427.64800786972046, + "p90": 442.81598925590515, + "p95": 451.58401131629944, + "p99": 483.13599824905396 + }, + "roundtrip": { + "p50": 844.7999954223633, + "p90": 860.0640296936035, + "p95": 867.0719861984253, + "p99": 924.67200756073 + }, + "isolatedSum": { + "p50": 871.5200126171112, + "p90": 902.3679792881012, + "p95": 919.3280041217804, + "p99": 970.335990190506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 806.1119914054871, + "p90": 823.7119913101196, + "p95": 832.4480056762695, + "p99": 892.3199772834778 + }, + "combine": { + "p50": 758.9120268821716, + "p90": 777.1199941635132, + "p95": 790.3040051460266, + "p99": 827.3919820785522 + }, + "roundtrip": { + "p50": 1534.5920324325562, + "p90": 1550.75204372406, + "p95": 1561.3759756088257, + "p99": 1597.9520082473755 + }, + "isolatedSum": { + "p50": 1565.0240182876587, + "p90": 1600.8319854736328, + "p95": 1622.7520108222961, + "p99": 1719.71195936203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1534.7520112991333, + "p90": 1552.4159669876099, + "p95": 1570.9120035171509, + "p99": 1686.7519617080688 + }, + "combine": { + "p50": 1415.2640104293823, + "p90": 1439.2000436782837, + "p95": 1449.120044708252, + "p99": 1643.1679725646973 + }, + "roundtrip": { + "p50": 2922.528028488159, + "p90": 2943.743944168091, + "p95": 2957.535982131958, + "p99": 3040.5759811401367 + }, + "isolatedSum": { + "p50": 2950.0160217285156, + "p90": 2991.6160106658936, + "p95": 3020.032048225403, + "p99": 3329.919934272766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e63750d6", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", + "colorKey": "h200_0a93a01f", + "comparisonKey": "f4911d0a95d49c62", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:31:03.582434+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_0", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254452252", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", + "createdAt": "2026-06-26T17:31:03.582434+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.86399644613266, + "p90": 133.53599607944489, + "p95": 138.5280042886734, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 112.64000087976456, + "p90": 124.86399710178375, + "p95": 130.5599957704544, + "p99": 142.7839994430542 + }, + "roundtrip": { + "p50": 213.47199380397797, + "p90": 229.72799837589264, + "p95": 238.68800699710846, + "p99": 280.8000147342682 + }, + "isolatedSum": { + "p50": 233.50399732589722, + "p90": 258.39999318122864, + "p95": 269.0880000591278, + "p99": 296.80000245571136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.55200111865997, + "p90": 173.7920045852661, + "p95": 181.66400492191315, + "p99": 202.87999510765076 + }, + "combine": { + "p50": 156.54399991035461, + "p90": 170.9119975566864, + "p95": 178.20799350738525, + "p99": 194.62400674819946 + }, + "roundtrip": { + "p50": 297.1839904785156, + "p90": 314.65598940849304, + "p95": 321.02400064468384, + "p99": 352.28800773620605 + }, + "isolatedSum": { + "p50": 320.0960010290146, + "p90": 344.7040021419525, + "p95": 359.8719984292984, + "p99": 397.5040018558502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.56800591945648, + "p90": 246.72000110149384, + "p95": 252.44799256324768, + "p99": 262.2720003128052 + }, + "combine": { + "p50": 242.3039972782135, + "p90": 256.99201226234436, + "p95": 264.5759880542755, + "p99": 294.17601227760315 + }, + "roundtrip": { + "p50": 457.5679898262024, + "p90": 477.27999091148376, + "p95": 485.6959879398346, + "p99": 519.9679732322693 + }, + "isolatedSum": { + "p50": 479.87200319767, + "p90": 503.7120133638382, + "p95": 517.0239806175232, + "p99": 556.4480125904083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 384.0320110321045, + "p90": 394.9120044708252, + "p95": 400.2879858016968, + "p99": 411.77600622177124 + }, + "combine": { + "p50": 408.2239866256714, + "p90": 420.22401094436646, + "p95": 427.39200592041016, + "p99": 457.5679898262024 + }, + "roundtrip": { + "p50": 765.9199833869934, + "p90": 785.9519720077515, + "p95": 798.2079982757568, + "p99": 844.543993473053 + }, + "isolatedSum": { + "p50": 792.2559976577759, + "p90": 815.1360154151917, + "p95": 827.6799917221069, + "p99": 869.3439960479736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 669.6959733963013, + "p90": 682.3359727859497, + "p95": 689.0559792518616, + "p99": 731.8080067634583 + }, + "combine": { + "p50": 727.1360158920288, + "p90": 740.4800057411194, + "p95": 746.783971786499, + "p99": 762.8480195999146 + }, + "roundtrip": { + "p50": 1366.0800457000732, + "p90": 1389.631986618042, + "p95": 1405.6639671325684, + "p99": 1561.8239641189575 + }, + "isolatedSum": { + "p50": 1396.83198928833, + "p90": 1422.815978527069, + "p95": 1435.8399510383606, + "p99": 1494.6560263633728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1259.1999769210815, + "p90": 1273.1839418411255, + "p95": 1278.5600423812866, + "p99": 1390.463948249817 + }, + "combine": { + "p50": 1366.8160438537598, + "p90": 1383.2319974899292, + "p95": 1391.2960290908813, + "p99": 1428.5119771957397 + }, + "roundtrip": { + "p50": 2598.0799198150635, + "p90": 2617.0880794525146, + "p95": 2628.2238960266113, + "p99": 2879.9679279327393 + }, + "isolatedSum": { + "p50": 2626.0160207748413, + "p90": 2656.4159393310547, + "p95": 2669.856071472168, + "p99": 2818.9759254455566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-353049ec", + "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", + "colorKey": "h200_993777bf", + "comparisonKey": "cb74cc9ee6130bb2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:47:04.200207+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": "set:6:830e36e88869e222", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28255303840", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", + "createdAt": "2026-06-26T17:47:04.200207+00:00", + "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.40799993276596, + "p90": 132.54399597644806, + "p95": 140.06400108337402, + "p99": 154.27200496196747 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 118.04799735546112, + "p95": 123.99999797344208, + "p99": 158.75199437141418 + }, + "roundtrip": { + "p50": 193.9840018749237, + "p90": 207.68000185489655, + "p95": 215.61600267887115, + "p99": 244.6720004081726 + }, + "isolatedSum": { + "p50": 221.72799706459045, + "p90": 250.59199333190918, + "p95": 264.0639990568161, + "p99": 313.02399933338165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.68799936771393, + "p90": 160.64000129699707, + "p95": 169.855996966362, + "p99": 192.06400215625763 + }, + "combine": { + "p50": 142.91200041770935, + "p90": 152.0320028066635, + "p95": 157.98400342464447, + "p99": 178.0479997396469 + }, + "roundtrip": { + "p50": 266.1440074443817, + "p90": 278.7199914455414, + "p95": 285.6000065803528, + "p99": 310.43198704719543 + }, + "isolatedSum": { + "p50": 289.5999997854233, + "p90": 312.6720041036606, + "p95": 327.84000039100647, + "p99": 370.11200189590454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 208.12800526618958, + "p90": 229.8559993505478, + "p95": 237.34399676322937, + "p99": 272.5760042667389 + }, + "combine": { + "p50": 210.62399446964264, + "p90": 222.75200486183167, + "p95": 228.99200022220612, + "p99": 251.45599246025085 + }, + "roundtrip": { + "p50": 391.4879858493805, + "p90": 413.05598616600037, + "p95": 424.54400658607483, + "p99": 474.047988653183 + }, + "isolatedSum": { + "p50": 418.7519997358322, + "p90": 452.60800421237946, + "p95": 466.3359969854355, + "p99": 524.0319967269897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 322.7840065956116, + "p90": 342.78398752212524, + "p95": 351.6800105571747, + "p99": 378.2399892807007 + }, + "combine": { + "p50": 330.1439881324768, + "p90": 345.0239896774292, + "p95": 349.8559892177582, + "p99": 379.13599610328674 + }, + "roundtrip": { + "p50": 626.2080073356628, + "p90": 646.8480229377747, + "p95": 661.1520051956177, + "p99": 823.4559893608093 + }, + "isolatedSum": { + "p50": 652.9279947280884, + "p90": 687.8079771995544, + "p95": 701.5359997749329, + "p99": 757.3759853839874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 569.7280168533325, + "p90": 585.7920050621033, + "p95": 596.2240099906921, + "p99": 690.7520294189453 + }, + "combine": { + "p50": 569.1199898719788, + "p90": 583.1040143966675, + "p95": 591.0400152206421, + "p99": 609.503984451294 + }, + "roundtrip": { + "p50": 1109.8560094833374, + "p90": 1127.8719902038574, + "p95": 1138.335943222046, + "p99": 1191.648006439209 + }, + "isolatedSum": { + "p50": 1138.8480067253113, + "p90": 1168.8960194587708, + "p95": 1187.2640252113342, + "p99": 1300.2560138702393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1082.5920104980469, + "p90": 1103.16801071167, + "p95": 1116.927981376648, + "p99": 1311.8400573730469 + }, + "combine": { + "p50": 1018.3039903640747, + "p90": 1032.4480533599854, + "p95": 1047.5200414657593, + "p99": 1417.472004890442 + }, + "roundtrip": { + "p50": 2072.60799407959, + "p90": 2096.7679023742676, + "p95": 2112.7359867095947, + "p99": 2388.000011444092 + }, + "isolatedSum": { + "p50": 2100.8960008621216, + "p90": 2135.6160640716553, + "p95": 2164.448022842407, + "p99": 2729.3120622634888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5c3f9114", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_edd92e38", + "comparisonKey": "696a49bd5b0de953", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:13.181201+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm) [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254409438", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", + "createdAt": "2026-06-26T17:30:13.181201+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.032002389431, + "p90": 116.12799763679504, + "p95": 120.83200365304947, + "p99": 131.00799918174744 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 115.167997777462, + "p95": 120.95999717712402, + "p99": 125.76000392436981 + }, + "roundtrip": { + "p50": 182.23999440670013, + "p90": 196.48000597953796, + "p95": 200.095996260643, + "p99": 249.7600018978119 + }, + "isolatedSum": { + "p50": 207.10400491952896, + "p90": 231.29599541425705, + "p95": 241.7920008301735, + "p99": 256.76800310611725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.03200018405914, + "p90": 151.96800231933594, + "p95": 158.4639996290207, + "p99": 170.68800330162048 + }, + "combine": { + "p50": 142.59199798107147, + "p90": 157.53600001335144, + "p95": 161.18399798870087, + "p99": 179.6800047159195 + }, + "roundtrip": { + "p50": 252.8960108757019, + "p90": 265.28000831604004, + "p95": 271.232008934021, + "p99": 293.4400141239166 + }, + "isolatedSum": { + "p50": 278.6239981651306, + "p90": 309.5040023326874, + "p95": 319.64799761772156, + "p99": 350.36800801754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.87999379634857, + "p90": 210.33599972724915, + "p95": 215.87200462818146, + "p99": 243.9039945602417 + }, + "combine": { + "p50": 208.064004778862, + "p90": 222.04799950122833, + "p95": 230.14399409294128, + "p99": 255.42399287223816 + }, + "roundtrip": { + "p50": 378.84798645973206, + "p90": 394.9120044708252, + "p95": 405.5039882659912, + "p99": 434.27199125289917 + }, + "isolatedSum": { + "p50": 402.94399857521057, + "p90": 432.3839992284775, + "p95": 446.01599872112274, + "p99": 499.32798743247986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.99200654029846, + "p90": 334.1119885444641, + "p95": 342.9119884967804, + "p99": 389.15199041366577 + }, + "combine": { + "p50": 326.1120021343231, + "p90": 339.35999870300293, + "p95": 347.3280072212219, + "p99": 393.0560052394867 + }, + "roundtrip": { + "p50": 614.0159964561462, + "p90": 628.4800171852112, + "p95": 635.7759833335876, + "p99": 708.4479928016663 + }, + "isolatedSum": { + "p50": 639.1040086746216, + "p90": 673.471987247467, + "p95": 690.2399957180023, + "p99": 782.2079956531525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 549.3760108947754, + "p90": 563.264012336731, + "p95": 569.2480206489563, + "p99": 593.1519865989685 + }, + "combine": { + "p50": 560.8000159263611, + "p90": 573.2799768447876, + "p95": 579.8400044441223, + "p99": 591.871976852417 + }, + "roundtrip": { + "p50": 1080.9600353240967, + "p90": 1097.5359678268433, + "p95": 1106.0800552368164, + "p99": 1136.512041091919 + }, + "isolatedSum": { + "p50": 1110.1760268211365, + "p90": 1136.5439891815186, + "p95": 1149.0880250930786, + "p99": 1185.0239634513855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1017.7919864654541, + "p90": 1032.1600437164307, + "p95": 1039.6480560302734, + "p99": 1061.1519813537598 + }, + "combine": { + "p50": 1013.0879878997803, + "p90": 1025.823950767517, + "p95": 1031.775951385498, + "p99": 1097.7599620819092 + }, + "roundtrip": { + "p50": 2001.5358924865723, + "p90": 2015.7439708709717, + "p95": 2029.7598838806152, + "p99": 2119.1039085388184 + }, + "isolatedSum": { + "p50": 2030.8799743652344, + "p90": 2057.9839944839478, + "p95": 2071.4240074157715, + "p99": 2158.911943435669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1047fdc", + "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_76bb7d5d", + "comparisonKey": "174936235ac15d2c", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:49:44.261568+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271611947", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271611947", + "createdAt": "2026-06-26T23:49:44.261568+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.3199971318245, + "p90": 121.50400131940842, + "p95": 125.50400197505951, + "p99": 141.76000654697418 + }, + "combine": { + "p50": 104.032002389431, + "p90": 119.71200257539749, + "p95": 123.96799772977829, + "p99": 145.4080045223236 + }, + "roundtrip": { + "p50": 184.4799965620041, + "p90": 197.24799692630768, + "p95": 202.11200416088104, + "p99": 221.91999852657318 + }, + "isolatedSum": { + "p50": 208.3519995212555, + "p90": 241.2160038948059, + "p95": 249.4719997048378, + "p99": 287.1680110692978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.49600338935852, + "p90": 149.59999918937683, + "p95": 156.63999319076538, + "p99": 199.0080028772354 + }, + "combine": { + "p50": 143.71199905872345, + "p90": 156.51200711727142, + "p95": 161.6639941930771, + "p99": 174.14399981498718 + }, + "roundtrip": { + "p50": 254.88001108169556, + "p90": 277.50399708747864, + "p95": 284.09600257873535, + "p99": 315.20000100135803 + }, + "isolatedSum": { + "p50": 278.20800244808197, + "p90": 306.11200630664825, + "p95": 318.30398738384247, + "p99": 373.1520026922226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 192.89599359035492, + "p90": 207.39200711250305, + "p95": 213.53599429130554, + "p99": 229.8240065574646 + }, + "combine": { + "p50": 222.88000583648682, + "p90": 239.77600038051605, + "p95": 244.06400322914124, + "p99": 276.16000175476074 + }, + "roundtrip": { + "p50": 388.51198554039, + "p90": 405.08800745010376, + "p95": 412.6400053501129, + "p99": 470.43201327323914 + }, + "isolatedSum": { + "p50": 415.77599942684174, + "p90": 447.1680074930191, + "p95": 457.5999975204468, + "p99": 505.98400831222534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.32000756263733, + "p90": 328.2560110092163, + "p95": 334.6239924430847, + "p99": 354.8159897327423 + }, + "combine": { + "p50": 352.35199332237244, + "p90": 364.1279935836792, + "p95": 372.44799733161926, + "p99": 391.80800318717957 + }, + "roundtrip": { + "p50": 630.1760077476501, + "p90": 646.7840075492859, + "p95": 655.135989189148, + "p99": 679.5520186424255 + }, + "isolatedSum": { + "p50": 656.6720008850098, + "p90": 692.3840045928955, + "p95": 707.071989774704, + "p99": 746.6239929199219 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 515.2000188827515, + "p90": 530.0800204277039, + "p95": 538.9119982719421, + "p99": 611.7119789123535 + }, + "combine": { + "p50": 611.2319827079773, + "p90": 623.5520243644714, + "p95": 633.2160234451294, + "p99": 764.1919851303101 + }, + "roundtrip": { + "p50": 1099.4880199432373, + "p90": 1118.4959411621094, + "p95": 1131.1999559402466, + "p99": 1154.2079448699951 + }, + "isolatedSum": { + "p50": 1126.4320015907288, + "p90": 1153.6320447921753, + "p95": 1172.1280217170715, + "p99": 1375.9039640426636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 964.959979057312, + "p90": 992.2879934310913, + "p95": 1000.3199577331543, + "p99": 1034.4959497451782 + }, + "combine": { + "p50": 1105.7920455932617, + "p90": 1125.1840591430664, + "p95": 1137.5679969787598, + "p99": 1247.26402759552 + }, + "roundtrip": { + "p50": 2036.895990371704, + "p90": 2068.3839321136475, + "p95": 2084.383964538574, + "p99": 2168.4799194335938 + }, + "isolatedSum": { + "p50": 2070.7520246505737, + "p90": 2117.4720525741577, + "p95": 2137.887954711914, + "p99": 2281.7599773406982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e384c8f8", + "identity": "h200|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_9979edfc", + "comparisonKey": "ca4b77cbfe002bae", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:27.799131+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_11", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287507619", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287507619", + "createdAt": "2026-06-27T11:14:27.799131+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.2159993648529, + "p90": 103.67999970912933, + "p95": 112.0000034570694, + "p99": 133.63200426101685 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 91.71199798583984, + "p95": 96.99200093746185, + "p99": 107.45599865913391 + }, + "roundtrip": { + "p50": 171.1679995059967, + "p90": 215.87200462818146, + "p95": 231.36000335216522, + "p99": 281.3119888305664 + }, + "isolatedSum": { + "p50": 150.94400197267532, + "p90": 195.39199769496918, + "p95": 208.99200439453125, + "p99": 241.08800292015076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 92.32000261545181, + "p90": 114.88000303506851, + "p95": 124.54400211572647, + "p99": 145.6959992647171 + }, + "combine": { + "p50": 98.78399968147278, + "p90": 115.99999666213989, + "p95": 121.76000326871872, + "p99": 152.92799472808838 + }, + "roundtrip": { + "p50": 223.29600155353546, + "p90": 252.16001272201538, + "p95": 263.90400528907776, + "p99": 281.72799944877625 + }, + "isolatedSum": { + "p50": 191.1040022969246, + "p90": 230.8799996972084, + "p95": 246.3040053844452, + "p99": 298.6239939928055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 120.38400024175644, + "p90": 147.77599275112152, + "p95": 158.78400206565857, + "p99": 194.87999379634857 + }, + "combine": { + "p50": 148.44800531864166, + "p90": 163.71199488639832, + "p95": 171.6800034046173, + "p99": 186.8479996919632 + }, + "roundtrip": { + "p50": 343.9359962940216, + "p90": 367.64800548553467, + "p95": 382.9120099544525, + "p99": 435.84001064300537 + }, + "isolatedSum": { + "p50": 268.8320055603981, + "p90": 311.48798763751984, + "p95": 330.4640054702759, + "p99": 381.72799348831177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 174.97600615024567, + "p90": 203.07199656963348, + "p95": 215.71199595928192, + "p99": 236.76800727844238 + }, + "combine": { + "p50": 243.68000030517578, + "p90": 257.6960027217865, + "p95": 264.16000723838806, + "p99": 295.26400566101074 + }, + "roundtrip": { + "p50": 581.7599892616272, + "p90": 607.3920130729675, + "p95": 614.687979221344, + "p99": 658.847987651825 + }, + "isolatedSum": { + "p50": 418.65600645542145, + "p90": 460.76799929142, + "p95": 479.87200319767, + "p99": 532.0320129394531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 281.1200022697449, + "p90": 304.1599988937378, + "p95": 312.032014131546, + "p99": 346.3360071182251 + }, + "combine": { + "p50": 413.12000155448914, + "p90": 429.3760061264038, + "p95": 438.87999653816223, + "p99": 470.8159863948822 + }, + "roundtrip": { + "p50": 1013.4719610214233, + "p90": 1037.824034690857, + "p95": 1052.0960092544556, + "p99": 1194.1440105438232 + }, + "isolatedSum": { + "p50": 694.240003824234, + "p90": 733.5360050201416, + "p95": 750.9120106697083, + "p99": 817.1519935131073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 500.7359981536865, + "p90": 529.9519896507263, + "p95": 540.992021560669, + "p99": 584.3200087547302 + }, + "combine": { + "p50": 754.8159956932068, + "p90": 771.6479897499084, + "p95": 786.4639759063721, + "p99": 983.8079810142517 + }, + "roundtrip": { + "p50": 1906.6879749298096, + "p90": 1934.656023979187, + "p95": 1949.887990951538, + "p99": 2083.967924118042 + }, + "isolatedSum": { + "p50": 1255.5519938468933, + "p90": 1301.5999794006348, + "p95": 1327.455997467041, + "p99": 1568.127989768982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26de8d70", + "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_87683f6c", + "comparisonKey": "b7adcc489d58bf89", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:37.273038+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": "set:6:76d8142d69406335", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271739849", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271739849", + "createdAt": "2026-06-26T23:53:37.273038+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 233.2800030708313, + "p90": 296.25600576400757, + "p95": 315.45600295066833, + "p99": 387.84000277519226 + }, + "combine": { + "p50": 74.72000271081924, + "p90": 92.96000003814697, + "p95": 97.98400104045868, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 278.9759933948517, + "p90": 337.44001388549805, + "p95": 363.5840117931366, + "p99": 408.9600145816803 + }, + "isolatedSum": { + "p50": 308.00000578165054, + "p90": 389.21600580215454, + "p95": 413.440003991127, + "p99": 512.703999876976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 240.28800427913666, + "p90": 292.03200340270996, + "p95": 306.97599053382874, + "p99": 329.5679986476898 + }, + "combine": { + "p50": 98.30400347709656, + "p90": 115.07199704647064, + "p95": 119.00799721479416, + "p99": 131.9359987974167 + }, + "roundtrip": { + "p50": 325.408011674881, + "p90": 376.67199969291687, + "p95": 392.8639888763428, + "p99": 439.520001411438 + }, + "isolatedSum": { + "p50": 338.5920077562332, + "p90": 407.1040004491806, + "p95": 425.9839877486229, + "p99": 461.5039974451065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 315.45600295066833, + "p90": 357.08799958229065, + "p95": 369.9199855327606, + "p99": 407.039999961853 + }, + "combine": { + "p50": 147.45600521564484, + "p90": 164.67200219631195, + "p95": 168.16000640392303, + "p99": 182.52800405025482 + }, + "roundtrip": { + "p50": 460.4479968547821, + "p90": 508.575975894928, + "p95": 523.360013961792, + "p99": 576.0959982872009 + }, + "isolatedSum": { + "p50": 462.91200816631317, + "p90": 521.7600017786026, + "p95": 538.0799919366837, + "p99": 589.5680040121078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 458.2720100879669, + "p90": 501.5680193901062, + "p95": 517.632007598877, + "p99": 562.1119737625122 + }, + "combine": { + "p50": 241.2160038948059, + "p90": 252.06398963928223, + "p95": 257.34400749206543, + "p99": 279.83999252319336 + }, + "roundtrip": { + "p50": 681.9199919700623, + "p90": 713.4079933166504, + "p95": 728.8320064544678, + "p99": 805.8239817619324 + }, + "isolatedSum": { + "p50": 699.4880139827728, + "p90": 753.6320090293884, + "p95": 774.9760150909424, + "p99": 841.9519662857056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 734.112024307251, + "p90": 769.8879837989807, + "p95": 783.7439775466919, + "p99": 899.9680280685425 + }, + "combine": { + "p50": 410.17600893974304, + "p90": 422.4640130996704, + "p95": 427.64800786972046, + "p99": 457.72799849510193 + }, + "roundtrip": { + "p50": 1137.4399662017822, + "p90": 1176.416039466858, + "p95": 1203.328013420105, + "p99": 1318.8159465789795 + }, + "isolatedSum": { + "p50": 1144.288033246994, + "p90": 1192.3519968986511, + "p95": 1211.3919854164124, + "p99": 1357.6960265636444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1373.792052268982, + "p90": 1396.7679738998413, + "p95": 1406.9440364837646, + "p99": 1577.5359869003296 + }, + "combine": { + "p50": 750.3679990768433, + "p90": 762.6879811286926, + "p95": 770.3359723091125, + "p99": 788.0319952964783 + }, + "roundtrip": { + "p50": 2134.335994720459, + "p90": 2161.439895629883, + "p95": 2178.2400608062744, + "p99": 2561.3439083099365 + }, + "isolatedSum": { + "p50": 2124.160051345825, + "p90": 2159.455955028534, + "p95": 2177.280008792877, + "p99": 2365.567982196808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e0e49b4", + "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_87683f6c", + "comparisonKey": "dcdf4b262ed1d48f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:08.323229+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": "set:6:28c0c09b13ff0acf", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271755854", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271755854", + "createdAt": "2026-06-26T23:54:08.323229+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 219.84000504016876, + "p90": 274.01599287986755, + "p95": 289.5039916038513, + "p99": 343.77598762512207 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 91.90399944782257, + "p95": 99.55199807882309, + "p99": 105.79200088977814 + }, + "roundtrip": { + "p50": 288.57600688934326, + "p90": 340.2239978313446, + "p95": 353.95199060440063, + "p99": 388.0319893360138 + }, + "isolatedSum": { + "p50": 300.9280040860176, + "p90": 365.9199923276901, + "p95": 389.0559896826744, + "p99": 449.5679885149002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 254.62400913238525, + "p90": 299.74400997161865, + "p95": 313.2160007953644, + "p99": 335.6480002403259 + }, + "combine": { + "p50": 112.60800063610077, + "p90": 124.57600235939026, + "p95": 128.31999361515045, + "p99": 137.472003698349 + }, + "roundtrip": { + "p50": 357.88801312446594, + "p90": 402.78398990631104, + "p95": 418.7839925289154, + "p99": 468.3839976787567 + }, + "isolatedSum": { + "p50": 367.232009768486, + "p90": 424.3200123310089, + "p95": 441.53599441051483, + "p99": 473.1200039386749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 338.17601203918457, + "p90": 376.48001313209534, + "p95": 391.80800318717957, + "p99": 431.71200156211853 + }, + "combine": { + "p50": 170.43200135231018, + "p90": 182.8480064868927, + "p95": 187.77599930763245, + "p99": 198.46400618553162 + }, + "roundtrip": { + "p50": 509.5679759979248, + "p90": 558.2079887390137, + "p95": 577.6960253715515, + "p99": 617.7600026130676 + }, + "isolatedSum": { + "p50": 508.60801339149475, + "p90": 559.328019618988, + "p95": 579.584002494812, + "p99": 630.1760077476501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 505.0879716873169, + "p90": 540.7040119171143, + "p95": 552.6720285415649, + "p99": 595.1679944992065 + }, + "combine": { + "p50": 273.75999093055725, + "p90": 285.66399216651917, + "p95": 291.4240062236786, + "p99": 313.05599212646484 + }, + "roundtrip": { + "p50": 780.2879810333252, + "p90": 834.7839713096619, + "p95": 867.3920035362244, + "p99": 1058.9760541915894 + }, + "isolatedSum": { + "p50": 778.8479626178741, + "p90": 826.3680040836334, + "p95": 844.0960347652435, + "p99": 908.2239866256714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 859.1039776802063, + "p90": 874.3680119514465, + "p95": 884.447991847992, + "p99": 1000.8000135421753 + }, + "combine": { + "p50": 476.0960042476654, + "p90": 487.5839948654175, + "p95": 495.9680140018463, + "p99": 551.2639880180359 + }, + "roundtrip": { + "p50": 1315.2320384979248, + "p90": 1342.4960374832153, + "p95": 1364.9920225143433, + "p99": 1437.1839761734009 + }, + "isolatedSum": { + "p50": 1335.1999819278717, + "p90": 1361.952006816864, + "p95": 1380.4160058498383, + "p99": 1552.0640015602112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1618.3040142059326, + "p90": 1638.8479471206665, + "p95": 1650.3679752349854, + "p99": 1797.8880405426025 + }, + "combine": { + "p50": 871.5839982032776, + "p90": 885.4719996452332, + "p95": 893.7280178070068, + "p99": 936.1280202865601 + }, + "roundtrip": { + "p50": 2472.0640182495117, + "p90": 2496.8960285186768, + "p95": 2517.6639556884766, + "p99": 2775.1998901367188 + }, + "isolatedSum": { + "p50": 2489.88801240921, + "p90": 2524.3199467658997, + "p95": 2544.095993041992, + "p99": 2734.0160608291626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cd909950", + "identity": "h200|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_9979edfc", + "comparisonKey": "eb524229a3f58a63", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:00.891802+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287496212", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287496212", + "createdAt": "2026-06-27T11:14:00.891802+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.54400300979614, + "p90": 112.44799941778183, + "p95": 123.03999811410904, + "p99": 166.81599617004395 + }, + "combine": { + "p50": 87.99999952316284, + "p90": 105.56799918413162, + "p95": 112.35199868679047, + "p99": 141.34399592876434 + }, + "roundtrip": { + "p50": 196.16000354290009, + "p90": 240.22400379180908, + "p95": 254.91198897361755, + "p99": 326.30398869514465 + }, + "isolatedSum": { + "p50": 172.54400253295898, + "p90": 218.01599860191345, + "p95": 235.3919968008995, + "p99": 308.1599920988083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 101.75999999046326, + "p90": 124.09599870443344, + "p95": 130.11200726032257, + "p99": 143.61600577831268 + }, + "combine": { + "p50": 120.41600048542023, + "p90": 135.13599336147308, + "p95": 138.5280042886734, + "p99": 143.8719928264618 + }, + "roundtrip": { + "p50": 278.0480086803436, + "p90": 296.9599962234497, + "p95": 302.91199684143066, + "p99": 346.3360071182251 + }, + "isolatedSum": { + "p50": 222.17600047588348, + "p90": 259.2319920659065, + "p95": 268.640011548996, + "p99": 287.4879986047745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 131.6159963607788, + "p90": 153.18399667739868, + "p95": 161.82400286197662, + "p99": 184.79999899864197 + }, + "combine": { + "p50": 191.00800156593323, + "p90": 205.1199972629547, + "p95": 210.62399446964264, + "p99": 231.87200725078583 + }, + "roundtrip": { + "p50": 444.19199228286743, + "p90": 466.5600061416626, + "p95": 479.13599014282227, + "p99": 664.7359728813171 + }, + "isolatedSum": { + "p50": 322.62399792671204, + "p90": 358.3039939403534, + "p95": 372.44799733161926, + "p99": 416.6720062494278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 195.64799964427948, + "p90": 217.21599996089935, + "p95": 222.78399765491486, + "p99": 280.0320088863373 + }, + "combine": { + "p50": 306.304007768631, + "p90": 318.39999556541443, + "p95": 326.6240060329437, + "p99": 356.9279909133911 + }, + "roundtrip": { + "p50": 739.0080094337463, + "p90": 759.2960000038147, + "p95": 774.0479707717896, + "p99": 813.5039806365967 + }, + "isolatedSum": { + "p50": 501.95200741291046, + "p90": 535.6159955263138, + "p95": 549.4080036878586, + "p99": 636.9599997997284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 324.319988489151, + "p90": 343.77598762512207, + "p95": 355.9359908103943, + "p99": 389.2799913883209 + }, + "combine": { + "p50": 538.1439924240112, + "p90": 549.2799878120422, + "p95": 556.9919943809509, + "p99": 602.7839779853821 + }, + "roundtrip": { + "p50": 1345.0239896774292, + "p90": 1376.4480352401733, + "p95": 1404.3519496917725, + "p99": 1568.6399936676025 + }, + "isolatedSum": { + "p50": 862.4639809131622, + "p90": 893.0559754371643, + "p95": 912.9279851913452, + "p99": 992.063969373703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 583.4559798240662, + "p90": 596.5120196342468, + "p95": 603.9360165596008, + "p99": 710.3360295295715 + }, + "combine": { + "p50": 978.2400131225586, + "p90": 994.8480129241943, + "p95": 1007.7120065689087, + "p99": 1139.9359703063965 + }, + "roundtrip": { + "p50": 2591.327905654907, + "p90": 2624.3200302124023, + "p95": 2637.3119354248047, + "p99": 2756.351947784424 + }, + "isolatedSum": { + "p50": 1561.6959929466248, + "p90": 1591.3600325584412, + "p95": 1611.6480231285095, + "p99": 1850.271999835968 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-92d6dac4", + "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_87683f6c", + "comparisonKey": "5878390fb0ef3ac0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:54:33.209811+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:9f5e1e005a35e937", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271771597", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271771597", + "createdAt": "2026-06-26T23:54:33.209811+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 237.12000250816345, + "p90": 447.00801372528076, + "p95": 466.2080109119415, + "p99": 509.2800259590149 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 118.20799857378006, + "p95": 120.38400024175644, + "p99": 131.55199587345123 + }, + "roundtrip": { + "p50": 299.51998591423035, + "p90": 465.9839868545532, + "p95": 490.01601338386536, + "p99": 533.9199900627136 + }, + "isolatedSum": { + "p50": 326.7199993133545, + "p90": 565.2160122990608, + "p95": 586.592011153698, + "p99": 640.8320218324661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 272.96000719070435, + "p90": 312.19199299812317, + "p95": 322.7840065956116, + "p99": 376.6080141067505 + }, + "combine": { + "p50": 121.91999703645706, + "p90": 133.34399461746216, + "p95": 139.1039937734604, + "p99": 144.48000490665436 + }, + "roundtrip": { + "p50": 388.5760009288788, + "p90": 429.28001284599304, + "p95": 448.5439956188202, + "p99": 507.87198543548584 + }, + "isolatedSum": { + "p50": 394.8800042271614, + "p90": 445.5359876155853, + "p95": 461.88800036907196, + "p99": 521.0880190134048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 375.61601400375366, + "p90": 427.4879992008209, + "p95": 443.77601146698, + "p99": 500.4799962043762 + }, + "combine": { + "p50": 192.9599940776825, + "p90": 205.08800446987152, + "p95": 213.47199380397797, + "p99": 237.92000114917755 + }, + "roundtrip": { + "p50": 553.5680055618286, + "p90": 599.2000102996826, + "p95": 623.583972454071, + "p99": 716.1920070648193 + }, + "isolatedSum": { + "p50": 568.5760080814362, + "p90": 632.5760036706924, + "p95": 657.248005270958, + "p99": 738.3999973535538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 557.6000213623047, + "p90": 596.7360138893127, + "p95": 607.3920130729675, + "p99": 644.9599862098694 + }, + "combine": { + "p50": 306.335985660553, + "p90": 316.3520097732544, + "p95": 320.51199674606323, + "p99": 334.52799916267395 + }, + "roundtrip": { + "p50": 853.1839847564697, + "p90": 880.8959722518921, + "p95": 895.3920006752014, + "p99": 966.7840003967285 + }, + "isolatedSum": { + "p50": 863.9360070228577, + "p90": 913.0880236625671, + "p95": 927.9040098190308, + "p99": 979.4879853725433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 987.8720045089722, + "p90": 1001.9840002059937, + "p95": 1013.2479667663574, + "p99": 1395.5520391464233 + }, + "combine": { + "p50": 540.9280061721802, + "p90": 573.7280249595642, + "p95": 584.6400260925293, + "p99": 626.0480284690857 + }, + "roundtrip": { + "p50": 1523.6799716949463, + "p90": 1545.408010482788, + "p95": 1558.1120252609253, + "p99": 1704.2880058288574 + }, + "isolatedSum": { + "p50": 1528.8000106811523, + "p90": 1575.7120251655579, + "p95": 1597.8879928588867, + "p99": 2021.600067615509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1865.3759956359863, + "p90": 1883.2000494003296, + "p95": 1893.02396774292, + "p99": 1925.7279634475708 + }, + "combine": { + "p50": 981.823980808258, + "p90": 994.0800070762634, + "p95": 1002.7199983596802, + "p99": 1096.3200330734253 + }, + "roundtrip": { + "p50": 2907.2320461273193, + "p90": 2933.151960372925, + "p95": 2943.104028701782, + "p99": 3191.3599967956543 + }, + "isolatedSum": { + "p50": 2847.1999764442444, + "p90": 2877.280056476593, + "p95": 2895.7439661026, + "p99": 3022.047996520996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e6cb64c3", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_9979edfc", + "comparisonKey": "73a640c71287a1ce", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:33.521456+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_5", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286433802", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286433802", + "createdAt": "2026-06-27T10:26:33.521456+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.17600053548813, + "p90": 102.20800340175629, + "p95": 112.15999722480774, + "p99": 126.68800354003906 + }, + "combine": { + "p50": 96.44799679517746, + "p90": 110.97600311040878, + "p95": 116.83200299739838, + "p99": 120.44800072908401 + }, + "roundtrip": { + "p50": 209.98400449752808, + "p90": 236.95999383926392, + "p95": 250.40000677108765, + "p99": 302.11201310157776 + }, + "isolatedSum": { + "p50": 182.6239973306656, + "p90": 213.18400651216507, + "p95": 228.99200022220612, + "p99": 247.13600426912308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.39199751615524, + "p90": 121.05599790811539, + "p95": 127.96799838542938, + "p99": 135.6479972600937 + }, + "combine": { + "p50": 137.79200613498688, + "p90": 151.07199549674988, + "p95": 155.13600409030914, + "p99": 164.89599645137787 + }, + "roundtrip": { + "p50": 314.2400085926056, + "p90": 329.50401306152344, + "p95": 339.26400542259216, + "p99": 374.36801195144653 + }, + "isolatedSum": { + "p50": 241.18400365114212, + "p90": 272.12799340486526, + "p95": 283.1040024757385, + "p99": 300.54399371147156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 135.96799969673157, + "p90": 154.91199493408203, + "p95": 162.75200247764587, + "p99": 174.5920032262802 + }, + "combine": { + "p50": 218.62399578094482, + "p90": 232.80000686645508, + "p95": 239.99999463558197, + "p99": 370.59199810028076 + }, + "roundtrip": { + "p50": 495.2639937400818, + "p90": 509.2160105705261, + "p95": 516.9280171394348, + "p99": 547.6800203323364 + }, + "isolatedSum": { + "p50": 354.5919954776764, + "p90": 387.7120018005371, + "p95": 402.75199711322784, + "p99": 545.184001326561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 203.07199656963348, + "p90": 224.95999932289124, + "p95": 230.04800081253052, + "p99": 242.5920069217682 + }, + "combine": { + "p50": 351.967990398407, + "p90": 361.5039885044098, + "p95": 367.2640025615692, + "p99": 383.2319974899292 + }, + "roundtrip": { + "p50": 836.3519906997681, + "p90": 849.6959805488586, + "p95": 854.1439771652222, + "p99": 861.3759875297546 + }, + "isolatedSum": { + "p50": 555.0399869680405, + "p90": 586.463987827301, + "p95": 597.3120033740997, + "p99": 625.8240044116974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 333.2799971103668, + "p90": 350.14399886131287, + "p95": 359.96800661087036, + "p99": 417.4720048904419 + }, + "combine": { + "p50": 617.3120141029358, + "p90": 628.0959844589233, + "p95": 631.6159963607788, + "p99": 644.8959708213806 + }, + "roundtrip": { + "p50": 1508.4160566329956, + "p90": 1521.9520330429077, + "p95": 1531.7440032958984, + "p99": 1626.688003540039 + }, + "isolatedSum": { + "p50": 950.5920112133026, + "p90": 978.2399833202362, + "p95": 991.5840029716492, + "p99": 1062.3679757118225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 604.9280166625977, + "p90": 614.6559715270996, + "p95": 619.488000869751, + "p99": 634.335994720459 + }, + "combine": { + "p50": 1122.1439838409424, + "p90": 1135.9360218048096, + "p95": 1145.7600593566895, + "p99": 1211.1680507659912 + }, + "roundtrip": { + "p50": 2860.6081008911133, + "p90": 2879.5840740203857, + "p95": 2889.3120288848877, + "p99": 3131.5200328826904 + }, + "isolatedSum": { + "p50": 1727.07200050354, + "p90": 1750.5919933319092, + "p95": 1765.2480602264404, + "p99": 1845.5040454864502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4da6f6db", + "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_87683f6c", + "comparisonKey": "90a8a7fc3b314f23", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:44.259181+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271640687", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271640687", + "createdAt": "2026-06-26T23:50:44.259181+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 239.3600046634674, + "p90": 286.52799129486084, + "p95": 313.79199028015137, + "p99": 391.2000060081482 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 110.59200018644333, + "p95": 116.67200177907944, + "p99": 134.783998131752 + }, + "roundtrip": { + "p50": 309.9519908428192, + "p90": 360.48001050949097, + "p95": 381.5680146217346, + "p99": 466.94400906562805 + }, + "isolatedSum": { + "p50": 336.5760073065758, + "p90": 397.11999148130417, + "p95": 430.4639920592308, + "p99": 525.9840041399002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 291.0720109939575, + "p90": 340.5759930610657, + "p95": 355.19999265670776, + "p99": 430.30399084091187 + }, + "combine": { + "p50": 137.7599984407425, + "p90": 154.30399775505066, + "p95": 160.41600704193115, + "p99": 182.3360025882721 + }, + "roundtrip": { + "p50": 415.8079922199249, + "p90": 464.0960097312927, + "p95": 484.5759868621826, + "p99": 556.8320155143738 + }, + "isolatedSum": { + "p50": 428.8320094347, + "p90": 494.87999081611633, + "p95": 515.6159996986389, + "p99": 612.639993429184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 408.28800201416016, + "p90": 486.4000082015991, + "p95": 495.7759976387024, + "p99": 554.3680191040039 + }, + "combine": { + "p50": 219.10400688648224, + "p90": 233.37599635124207, + "p95": 239.48800563812256, + "p99": 266.07999205589294 + }, + "roundtrip": { + "p50": 607.4560284614563, + "p90": 650.2400040626526, + "p95": 670.5920100212097, + "p99": 729.3760180473328 + }, + "isolatedSum": { + "p50": 627.3920089006424, + "p90": 719.7760045528412, + "p95": 735.264003276825, + "p99": 820.4480111598969 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 621.9840049743652, + "p90": 667.8720116615295, + "p95": 696.0639953613281, + "p99": 765.0880217552185 + }, + "combine": { + "p50": 346.8480110168457, + "p90": 362.08000779151917, + "p95": 368.47999691963196, + "p99": 384.89601016044617 + }, + "roundtrip": { + "p50": 955.2639722824097, + "p90": 1010.1120471954346, + "p95": 1039.4879579544067, + "p99": 1108.6399555206299 + }, + "isolatedSum": { + "p50": 968.8320159912109, + "p90": 1029.9520194530487, + "p95": 1064.54399228096, + "p99": 1149.9840319156647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1107.7439785003662, + "p90": 1126.9439458847046, + "p95": 1137.887954711914, + "p99": 1176.8319606781006 + }, + "combine": { + "p50": 609.9200248718262, + "p90": 624.4159936904907, + "p95": 631.8399906158447, + "p99": 652.1919965744019 + }, + "roundtrip": { + "p50": 1692.2240257263184, + "p90": 1713.1520509719849, + "p95": 1732.5439453125, + "p99": 1810.7199668884277 + }, + "isolatedSum": { + "p50": 1717.6640033721924, + "p90": 1751.3599395751953, + "p95": 1769.7279453277588, + "p99": 1829.0239572525024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2100.4478931427, + "p90": 2129.312038421631, + "p95": 2148.47993850708, + "p99": 2358.464002609253 + }, + "combine": { + "p50": 1102.6560068130493, + "p90": 1120.0640201568604, + "p95": 1132.8959465026855, + "p99": 1158.560037612915 + }, + "roundtrip": { + "p50": 3193.376064300537, + "p90": 3219.615936279297, + "p95": 3229.9840450286865, + "p99": 3288.5758876800537 + }, + "isolatedSum": { + "p50": 3203.1038999557495, + "p90": 3249.376058578491, + "p95": 3281.3758850097656, + "p99": 3517.024040222168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-15326a90", + "identity": "h200|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_9979edfc", + "comparisonKey": "0bd4a1be28b155b0", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:15.177243+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_4", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287502149", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287502149", + "createdAt": "2026-06-27T11:14:15.177243+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.40000224113464, + "p90": 109.18399691581726, + "p95": 114.3999993801117, + "p99": 152.0960032939911 + }, + "combine": { + "p50": 96.99200093746185, + "p90": 110.55999994277954, + "p95": 116.83200299739838, + "p99": 123.64800274372101 + }, + "roundtrip": { + "p50": 211.42399311065674, + "p90": 238.11200261116028, + "p95": 247.8400021791458, + "p99": 270.81599831581116 + }, + "isolatedSum": { + "p50": 183.3920031785965, + "p90": 219.7439968585968, + "p95": 231.23200237751007, + "p99": 275.7440060377121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 104.38399761915207, + "p90": 124.89599734544754, + "p95": 131.71200454235077, + "p99": 141.66399836540222 + }, + "combine": { + "p50": 137.05599308013916, + "p90": 149.82399344444275, + "p95": 154.14400398731232, + "p99": 171.87200486660004 + }, + "roundtrip": { + "p50": 308.8639974594116, + "p90": 326.7520070075989, + "p95": 331.2320113182068, + "p99": 342.52798557281494 + }, + "isolatedSum": { + "p50": 241.43999069929123, + "p90": 274.7199907898903, + "p95": 285.8560085296631, + "p99": 313.53600323200226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 141.12000167369843, + "p90": 168.47999393939972, + "p95": 177.47199535369873, + "p99": 233.43999683856964 + }, + "combine": { + "p50": 215.83999693393707, + "p90": 233.60000550746918, + "p95": 237.7600073814392, + "p99": 313.08799982070923 + }, + "roundtrip": { + "p50": 488.5759949684143, + "p90": 503.32802534103394, + "p95": 508.67199897766113, + "p99": 524.0640044212341 + }, + "isolatedSum": { + "p50": 356.9599986076355, + "p90": 402.0799994468689, + "p95": 415.23200273513794, + "p99": 546.5279966592789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 205.79199492931366, + "p90": 228.89600694179535, + "p95": 234.46400463581085, + "p99": 248.89600276947021 + }, + "combine": { + "p50": 347.3599851131439, + "p90": 359.0080142021179, + "p95": 364.73599076271057, + "p99": 389.3119990825653 + }, + "roundtrip": { + "p50": 830.016016960144, + "p90": 851.2319922447205, + "p95": 861.8239760398865, + "p99": 894.0479755401611 + }, + "isolatedSum": { + "p50": 553.1519800424576, + "p90": 587.9040211439133, + "p95": 599.1999953985214, + "p99": 638.2080018520355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 340.1600122451782, + "p90": 360.1279854774475, + "p95": 373.7280070781708, + "p99": 421.4720129966736 + }, + "combine": { + "p50": 600.1920104026794, + "p90": 613.1839752197266, + "p95": 621.2480068206787, + "p99": 657.696008682251 + }, + "roundtrip": { + "p50": 1490.880012512207, + "p90": 1514.016032218933, + "p95": 1529.2479991912842, + "p99": 1652.6720523834229 + }, + "isolatedSum": { + "p50": 940.3520226478577, + "p90": 973.3119606971741, + "p95": 994.9760138988495, + "p99": 1079.1680216789246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 612.2879981994629, + "p90": 627.1359920501709, + "p95": 634.656012058258, + "p99": 680.351972579956 + }, + "combine": { + "p50": 1088.5440111160278, + "p90": 1107.0400476455688, + "p95": 1131.872057914734, + "p99": 1238.976001739502 + }, + "roundtrip": { + "p50": 2821.4080333709717, + "p90": 2847.007989883423, + "p95": 2862.6561164855957, + "p99": 3033.9200496673584 + }, + "isolatedSum": { + "p50": 1700.8320093154907, + "p90": 1734.1760396957397, + "p95": 1766.528069972992, + "p99": 1919.327974319458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2673258", + "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_87683f6c", + "comparisonKey": "ae4528707b5ffd7f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:53:16.316846+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": "set:6:b23bc0c4b6402c69", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271725115", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271725115", + "createdAt": "2026-06-26T23:53:16.316846+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 221.27999365329742, + "p90": 242.20800399780273, + "p95": 255.3279995918274, + "p99": 294.94398832321167 + }, + "combine": { + "p50": 96.67199850082397, + "p90": 103.20000350475311, + "p95": 107.32799768447876, + "p99": 117.85600334405899 + }, + "roundtrip": { + "p50": 306.8479895591736, + "p90": 331.07200264930725, + "p95": 352.31998562812805, + "p99": 409.05600786209106 + }, + "isolatedSum": { + "p50": 317.9519921541214, + "p90": 345.40800750255585, + "p95": 362.65599727630615, + "p99": 412.79999166727066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 282.04798698425293, + "p90": 307.3279857635498, + "p95": 327.2320032119751, + "p99": 442.68798828125 + }, + "combine": { + "p50": 138.87999951839447, + "p90": 145.05599439144135, + "p95": 152.73599326610565, + "p99": 170.01600563526154 + }, + "roundtrip": { + "p50": 410.46398878097534, + "p90": 435.39199233055115, + "p95": 465.6960070133209, + "p99": 525.2479910850525 + }, + "isolatedSum": { + "p50": 420.9279865026474, + "p90": 452.38398015499115, + "p95": 479.96799647808075, + "p99": 612.7039939165115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 390.9760117530823, + "p90": 407.8719913959503, + "p95": 414.3039882183075, + "p99": 448.2240080833435 + }, + "combine": { + "p50": 212.3199999332428, + "p90": 220.2560007572174, + "p95": 229.08799350261688, + "p99": 299.71200227737427 + }, + "roundtrip": { + "p50": 589.3120169639587, + "p90": 609.9839806556702, + "p95": 625.5040168762207, + "p99": 686.6880059242249 + }, + "isolatedSum": { + "p50": 603.2960116863251, + "p90": 628.1279921531677, + "p95": 643.3919817209244, + "p99": 747.9360103607178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 601.7919778823853, + "p90": 624.064028263092, + "p95": 640.0960087776184, + "p99": 705.2800059318542 + }, + "combine": { + "p50": 343.29599142074585, + "p90": 351.39200091362, + "p95": 357.02401399612427, + "p99": 386.01601123809814 + }, + "roundtrip": { + "p50": 930.400013923645, + "p90": 953.1520009040833, + "p95": 967.1040177345276, + "p99": 1069.5680379867554 + }, + "isolatedSum": { + "p50": 945.0879693031311, + "p90": 975.456029176712, + "p95": 997.1200227737427, + "p99": 1091.2960171699524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1100.0959873199463, + "p90": 1113.9520406723022, + "p95": 1130.784034729004, + "p99": 1221.2159633636475 + }, + "combine": { + "p50": 596.3199734687805, + "p90": 606.9440245628357, + "p95": 612.6400232315063, + "p99": 648.5120058059692 + }, + "roundtrip": { + "p50": 1675.5199432373047, + "p90": 1687.999963760376, + "p95": 1695.3599452972412, + "p99": 2014.2719745635986 + }, + "isolatedSum": { + "p50": 1696.4159607887268, + "p90": 1720.896065235138, + "p95": 1743.4240579605103, + "p99": 1869.7279691696167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2087.3920917510986, + "p90": 2099.519968032837, + "p95": 2110.6879711151123, + "p99": 2213.7598991394043 + }, + "combine": { + "p50": 1087.4559879302979, + "p90": 1099.4240045547485, + "p95": 1103.5200357437134, + "p99": 1151.8080234527588 + }, + "roundtrip": { + "p50": 3166.016101837158, + "p90": 3187.0079040527344, + "p95": 3196.5761184692383, + "p99": 3422.0480918884277 + }, + "isolatedSum": { + "p50": 3174.8480796813965, + "p90": 3198.9439725875854, + "p95": 3214.2080068588257, + "p99": 3365.567922592163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a82a4d9", + "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_3a17d46b", + "comparisonKey": "680e15fb3428bab0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:05.917629+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_10", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254401482", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", + "createdAt": "2026-06-26T17:30:05.917629+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.81599795818329, + "p90": 108.2879975438118, + "p95": 115.26399850845337, + "p99": 141.79199934005737 + }, + "combine": { + "p50": 96.38399630784988, + "p90": 114.68800157308578, + "p95": 119.55200135707855, + "p99": 138.72000575065613 + }, + "roundtrip": { + "p50": 210.59200167655945, + "p90": 242.94400215148926, + "p95": 254.17599081993103, + "p99": 313.27998638153076 + }, + "isolatedSum": { + "p50": 183.19999426603317, + "p90": 222.97599911689758, + "p95": 234.81599986553192, + "p99": 280.5120050907135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.2319962978363, + "p90": 128.28800082206726, + "p95": 134.8160058259964, + "p99": 155.07200360298157 + }, + "combine": { + "p50": 133.66399705410004, + "p90": 149.79200065135956, + "p95": 157.21599757671356, + "p99": 173.37599396705627 + }, + "roundtrip": { + "p50": 304.22401428222656, + "p90": 332.41599798202515, + "p95": 337.92001008987427, + "p99": 353.2800078392029 + }, + "isolatedSum": { + "p50": 236.89599335193634, + "p90": 278.0800014734268, + "p95": 292.03200340270996, + "p99": 328.44799757003784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 135.77599823474884, + "p90": 162.30399906635284, + "p95": 169.95200514793396, + "p99": 237.98400163650513 + }, + "combine": { + "p50": 203.2960057258606, + "p90": 220.41599452495575, + "p95": 226.55999660491943, + "p99": 257.31199979782104 + }, + "roundtrip": { + "p50": 476.9600033760071, + "p90": 496.63999676704407, + "p95": 511.55197620391846, + "p99": 544.7999835014343 + }, + "isolatedSum": { + "p50": 339.07200396060944, + "p90": 382.7199935913086, + "p95": 396.5120017528534, + "p99": 495.2960014343262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 196.57599925994873, + "p90": 218.87999773025513, + "p95": 225.3119945526123, + "p99": 253.7280023097992 + }, + "combine": { + "p50": 320.607990026474, + "p90": 335.2319896221161, + "p95": 344.4800078868866, + "p99": 365.9519851207733 + }, + "roundtrip": { + "p50": 794.7199940681458, + "p90": 817.6959753036499, + "p95": 837.0879888534546, + "p99": 910.5280041694641 + }, + "isolatedSum": { + "p50": 517.1839892864227, + "p90": 554.1119873523712, + "p95": 569.7920024394989, + "p99": 619.6799874305725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 320.16000151634216, + "p90": 343.55199337005615, + "p95": 363.45601081848145, + "p99": 439.9999976158142 + }, + "combine": { + "p50": 554.8160076141357, + "p90": 569.7919726371765, + "p95": 577.6000022888184, + "p99": 639.3280029296875 + }, + "roundtrip": { + "p50": 1425.7279634475708, + "p90": 1448.3519792556763, + "p95": 1468.4480428695679, + "p99": 1752.8959512710571 + }, + "isolatedSum": { + "p50": 874.9760091304779, + "p90": 913.3439660072327, + "p95": 941.0560131072998, + "p99": 1079.3280005455017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 572.4160075187683, + "p90": 584.447979927063, + "p95": 591.6479825973511, + "p99": 629.6640038490295 + }, + "combine": { + "p50": 1012.6080513000488, + "p90": 1025.696039199829, + "p95": 1030.2400588989258, + "p99": 1060.1279735565186 + }, + "roundtrip": { + "p50": 2698.7199783325195, + "p90": 2725.055932998657, + "p95": 2745.215892791748, + "p99": 2952.064037322998 + }, + "isolatedSum": { + "p50": 1585.0240588188171, + "p90": 1610.144019126892, + "p95": 1621.8880414962769, + "p99": 1689.791977405548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da3555d5", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h200_50a9ee63", + "comparisonKey": "ee1a607167629f55", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:23.809590+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 (norm) [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254418007", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", + "createdAt": "2026-06-26T17:30:23.809590+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.69600236415863, + "p90": 84.63999629020691, + "p95": 90.08000046014786, + "p99": 106.6880002617836 + }, + "combine": { + "p50": 95.20000219345093, + "p90": 106.97600245475769, + "p95": 112.28799819946289, + "p99": 135.77599823474884 + }, + "roundtrip": { + "p50": 196.70400023460388, + "p90": 213.79199624061584, + "p95": 224.16000068187714, + "p99": 281.0240089893341 + }, + "isolatedSum": { + "p50": 168.89600455760956, + "p90": 191.6159987449646, + "p95": 202.36799865961075, + "p99": 242.46399849653244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 91.71199798583984, + "p90": 108.0000028014183, + "p95": 111.87200248241425, + "p99": 124.57600235939026 + }, + "combine": { + "p50": 132.7359974384308, + "p90": 146.2399959564209, + "p95": 151.8400013446808, + "p99": 165.56799411773682 + }, + "roundtrip": { + "p50": 291.456013917923, + "p90": 308.57598781585693, + "p95": 313.34400177001953, + "p99": 330.78399300575256 + }, + "isolatedSum": { + "p50": 224.44799542427063, + "p90": 254.2399987578392, + "p95": 263.71200382709503, + "p99": 290.1439964771271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 125.50400197505951, + "p90": 144.3520039319992, + "p95": 149.85600113868713, + "p99": 213.6639952659607 + }, + "combine": { + "p50": 203.10400426387787, + "p90": 215.64799547195435, + "p95": 220.47999501228333, + "p99": 236.92800104618073 + }, + "roundtrip": { + "p50": 464.7040069103241, + "p90": 485.5999946594238, + "p95": 495.64799666404724, + "p99": 524.3520140647888 + }, + "isolatedSum": { + "p50": 328.6080062389374, + "p90": 359.99999940395355, + "p95": 370.33599615097046, + "p99": 450.5919963121414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 184.1599941253662, + "p90": 198.94400238990784, + "p95": 204.352006316185, + "p99": 232.12799429893494 + }, + "combine": { + "p50": 318.39999556541443, + "p90": 328.96000146865845, + "p95": 333.15199613571167, + "p99": 352.7359962463379 + }, + "roundtrip": { + "p50": 782.4640274047852, + "p90": 796.064019203186, + "p95": 802.4960160255432, + "p99": 826.4960050582886 + }, + "isolatedSum": { + "p50": 502.55998969078064, + "p90": 527.9040038585663, + "p95": 537.5040024518967, + "p99": 584.8639905452728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 304.3519854545593, + "p90": 320.8320140838623, + "p95": 336.2559974193573, + "p99": 371.42398953437805 + }, + "combine": { + "p50": 550.4000186920166, + "p90": 560.2880120277405, + "p95": 567.7760243415833, + "p99": 656.8959951400757 + }, + "roundtrip": { + "p50": 1410.4959964752197, + "p90": 1427.456021308899, + "p95": 1436.4160299301147, + "p99": 1585.2479934692383 + }, + "isolatedSum": { + "p50": 854.7520041465759, + "p90": 881.1200261116028, + "p95": 904.0320217609406, + "p99": 1028.3199846744537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 542.8479909896851, + "p90": 557.5680136680603, + "p95": 565.5360221862793, + "p99": 587.7760052680969 + }, + "combine": { + "p50": 1013.5680437088013, + "p90": 1026.4320373535156, + "p95": 1031.999945640564, + "p99": 1048.192024230957 + }, + "roundtrip": { + "p50": 2668.4160232543945, + "p90": 2694.3039894104004, + "p95": 2716.320037841797, + "p99": 3019.615888595581 + }, + "isolatedSum": { + "p50": 1556.4160346984863, + "p90": 1584.000051021576, + "p95": 1597.5359678268433, + "p99": 1635.968029499054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4a1bc537", + "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_4f483b60", + "comparisonKey": "ac62097ce902c24f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:50:33.490755+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_1", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271633476", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271633476", + "createdAt": "2026-06-26T23:50:33.490755+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.71200281381607, + "p90": 95.29600292444229, + "p95": 102.11200267076492, + "p99": 128.83199751377106 + }, + "combine": { + "p50": 97.31200337409973, + "p90": 115.93600362539291, + "p95": 120.80000340938568, + "p99": 140.44800400733948 + }, + "roundtrip": { + "p50": 200.8959949016571, + "p90": 248.28800559043884, + "p95": 261.24799251556396, + "p99": 302.5600016117096 + }, + "isolatedSum": { + "p50": 173.0240061879158, + "p90": 211.2320065498352, + "p95": 222.9120060801506, + "p99": 269.28000152111053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 91.61599725484848, + "p90": 110.33599823713303, + "p95": 116.35199934244156, + "p99": 134.17600095272064 + }, + "combine": { + "p50": 136.76799833774567, + "p90": 151.5199989080429, + "p95": 159.04000401496887, + "p99": 170.6240028142929 + }, + "roundtrip": { + "p50": 299.45600032806396, + "p90": 324.38400387763977, + "p95": 331.07200264930725, + "p99": 365.7279908657074 + }, + "isolatedSum": { + "p50": 228.38399559259415, + "p90": 261.85599714517593, + "p95": 275.39200335741043, + "p99": 304.80000376701355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 127.83999741077423, + "p90": 142.94399321079254, + "p95": 150.4960060119629, + "p99": 162.7199947834015 + }, + "combine": { + "p50": 214.62400257587433, + "p90": 226.78400576114655, + "p95": 231.51999711990356, + "p99": 242.14400351047516 + }, + "roundtrip": { + "p50": 483.5200011730194, + "p90": 497.2800016403198, + "p95": 504.5120120048523, + "p99": 540.831983089447 + }, + "isolatedSum": { + "p50": 342.46399998664856, + "p90": 369.7279989719391, + "p95": 382.01600313186646, + "p99": 404.86399829387665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 194.75199282169342, + "p90": 214.88000452518463, + "p95": 220.2879935503006, + "p99": 243.74400079250336 + }, + "combine": { + "p50": 346.3360071182251, + "p90": 362.8160059452057, + "p95": 374.4960129261017, + "p99": 426.56001448631287 + }, + "roundtrip": { + "p50": 824.5440125465393, + "p90": 852.5760173797607, + "p95": 862.2400164604187, + "p99": 896.6720104217529 + }, + "isolatedSum": { + "p50": 541.0879999399185, + "p90": 577.6960104703903, + "p95": 594.7840064764023, + "p99": 670.3040152788162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 325.0879943370819, + "p90": 342.52798557281494, + "p95": 348.9919900894165, + "p99": 374.9440014362335 + }, + "combine": { + "p50": 603.8720011711121, + "p90": 613.6959791183472, + "p95": 618.1120276451111, + "p99": 640.3520107269287 + }, + "roundtrip": { + "p50": 1486.36794090271, + "p90": 1510.7519626617432, + "p95": 1524.1600275039673, + "p99": 1566.3679838180542 + }, + "isolatedSum": { + "p50": 928.959995508194, + "p90": 956.2239646911621, + "p95": 967.1040177345276, + "p99": 1015.2960121631622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 586.624026298523, + "p90": 618.9759969711304, + "p95": 627.6800036430359, + "p99": 654.7200083732605 + }, + "combine": { + "p50": 1108.8639497756958, + "p90": 1126.1119842529297, + "p95": 1134.2079639434814, + "p99": 1169.376015663147 + }, + "roundtrip": { + "p50": 2817.1839714050293, + "p90": 2849.3120670318604, + "p95": 2871.0079193115234, + "p99": 3254.4960975646973 + }, + "isolatedSum": { + "p50": 1695.4879760742188, + "p90": 1745.08798122406, + "p95": 1761.8879675865173, + "p99": 1824.0960240364075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8ae4b608", + "identity": "h200|nccl-ep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|16|decode|normal|none|none|0|normalized|0.18|22edb632bb1b9d9", + "colorKey": "h200_45246fb2", + "comparisonKey": "bd3ee598fb548c4d", + "schemaVersion": 3, + "generatedAt": "2026-06-28T15:33:05.143900+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-multinode-ib", + "transport": "rdma", + "worldSize": 16, + "epSize": 16, + "label": "H200 EP16 · nccl-ep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 16, + "scaleUpDomain": 16 + }, + "routingConsistent": true, + "traceSignature": "22edb632bb1b9d9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28327088942", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28327088942", + "createdAt": "2026-06-28T15:33:05.143900+00:00", + "sha": "127785d43b1ea119c05a2b798bf0be56e5c9baa7" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 16, + "dispatch": { + "p50": 578.4000158309937, + "p90": 2543.3599948883057, + "p95": 2675.1999855041504, + "p99": 2675.1999855041504 + }, + "combine": { + "p50": 233.43999683856964, + "p90": 532.7680110931396, + "p95": 914.2079949378967, + "p99": 914.2079949378967 + }, + "roundtrip": { + "p50": 794.975996017456, + "p90": 861.2800240516663, + "p95": 1168.6400175094604, + "p99": 1168.6400175094604 + }, + "isolatedSum": { + "p50": 811.8400126695633, + "p90": 3076.1280059814453, + "p95": 3589.407980442047, + "p99": 3589.407980442047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1505280, + "combineLogicalBytes": 1505280, + "fanoutMean": 6.5625, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 32, + "dispatch": { + "p50": 547.2319722175598, + "p90": 880.2559971809387, + "p95": 977.3759841918945, + "p99": 977.3759841918945 + }, + "combine": { + "p50": 212.25599944591522, + "p90": 238.3359968662262, + "p95": 239.32799696922302, + "p99": 239.32799696922302 + }, + "roundtrip": { + "p50": 960.6080055236816, + "p90": 2553.6320209503174, + "p95": 2696.3839530944824, + "p99": 2696.3839530944824 + }, + "isolatedSum": { + "p50": 759.487971663475, + "p90": 1118.591994047165, + "p95": 1216.7039811611176, + "p99": 1216.7039811611176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3067904, + "combineLogicalBytes": 3067904, + "fanoutMean": 6.6875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 64, + "dispatch": { + "p50": 621.504008769989, + "p90": 645.1839804649353, + "p95": 711.0400199890137, + "p99": 711.0400199890137 + }, + "combine": { + "p50": 249.08800423145294, + "p90": 263.64800333976746, + "p95": 269.53598856925964, + "p99": 269.53598856925964 + }, + "roundtrip": { + "p50": 1369.53604221344, + "p90": 1802.5599718093872, + "p95": 1879.744052886963, + "p99": 1879.744052886963 + }, + "isolatedSum": { + "p50": 870.592013001442, + "p90": 908.8319838047028, + "p95": 980.5760085582733, + "p99": 980.5760085582733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5992448, + "combineLogicalBytes": 5992448, + "fanoutMean": 6.53125, + "recvTokensMax": 43, + "stragglerRank": 10, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 128, + "dispatch": { + "p50": 611.8080019950867, + "p90": 2058.079957962036, + "p95": 2190.5601024627686, + "p99": 2190.5601024627686 + }, + "combine": { + "p50": 238.46399784088135, + "p90": 636.1280083656311, + "p95": 679.2960166931152, + "p99": 679.2960166931152 + }, + "roundtrip": { + "p50": 799.5200157165527, + "p90": 1625.3759860992432, + "p95": 2821.2480545043945, + "p99": 2821.2480545043945 + }, + "isolatedSum": { + "p50": 850.271999835968, + "p90": 2694.2079663276672, + "p95": 2869.856119155884, + "p99": 2869.856119155884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12214272, + "combineLogicalBytes": 12214272, + "fanoutMean": 6.65625, + "recvTokensMax": 84, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 256, + "dispatch": { + "p50": 631.8399906158447, + "p90": 645.6639766693115, + "p95": 672.3840236663818, + "p99": 672.3840236663818 + }, + "combine": { + "p50": 256.9279968738556, + "p90": 264.1279995441437, + "p95": 272.41599559783936, + "p99": 272.41599559783936 + }, + "roundtrip": { + "p50": 827.135980129242, + "p90": 967.136025428772, + "p95": 1139.7440433502197, + "p99": 1139.7440433502197 + }, + "isolatedSum": { + "p50": 888.7679874897003, + "p90": 909.7919762134552, + "p95": 944.8000192642212, + "p99": 944.8000192642212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24127488, + "combineLogicalBytes": 24127488, + "fanoutMean": 6.57421875, + "recvTokensMax": 154, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 512, + "dispatch": { + "p50": 782.8800082206726, + "p90": 2639.967918395996, + "p95": 2675.584077835083, + "p99": 2675.584077835083 + }, + "combine": { + "p50": 265.855997800827, + "p90": 287.200003862381, + "p95": 290.43200612068176, + "p99": 290.43200612068176 + }, + "roundtrip": { + "p50": 890.496015548706, + "p90": 1573.8240480422974, + "p95": 2191.551923751831, + "p99": 2191.551923751831 + }, + "isolatedSum": { + "p50": 1048.7360060214996, + "p90": 2927.167922258377, + "p95": 2966.0160839557648, + "p99": 2966.0160839557648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 48140288, + "combineLogicalBytes": 48140288, + "fanoutMean": 6.55859375, + "recvTokensMax": 295, + "stragglerRank": 15, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 1024, + "dispatch": { + "p50": 679.3280243873596, + "p90": 740.6079769134521, + "p95": 822.9439854621887, + "p99": 822.9439854621887 + }, + "combine": { + "p50": 339.9040102958679, + "p90": 763.9359831809998, + "p95": 791.6160225868225, + "p99": 791.6160225868225 + }, + "roundtrip": { + "p50": 922.2720265388489, + "p90": 1468.127965927124, + "p95": 1530.8159589767456, + "p99": 1530.8159589767456 + }, + "isolatedSum": { + "p50": 1019.2320346832275, + "p90": 1504.543960094452, + "p95": 1614.5600080490112, + "p99": 1614.5600080490112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 96165888, + "combineLogicalBytes": 96165888, + "fanoutMean": 6.55078125, + "recvTokensMax": 573, + "stragglerRank": 14, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 2048, + "dispatch": { + "p50": 808.1279993057251, + "p90": 833.5999846458435, + "p95": 1317.952036857605, + "p99": 1317.952036857605 + }, + "combine": { + "p50": 518.9120173454285, + "p90": 535.0080132484436, + "p95": 549.5679974555969, + "p99": 549.5679974555969 + }, + "roundtrip": { + "p50": 1294.9440479278564, + "p90": 1688.86399269104, + "p95": 2760.256052017212, + "p99": 2760.256052017212 + }, + "isolatedSum": { + "p50": 1327.0400166511536, + "p90": 1368.607997894287, + "p95": 1867.520034313202, + "p99": 1867.520034313202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 191758336, + "combineLogicalBytes": 191758336, + "fanoutMean": 6.53125, + "recvTokensMax": 1126, + "stragglerRank": 15, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-d2620b3b", + "identity": "h200|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_c317e88d", + "comparisonKey": "8bbd7f30d0bdbd11", + "schemaVersion": 3, + "generatedAt": "2026-06-27T17:36:22.388714+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28296668644", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296668644", + "createdAt": "2026-06-27T17:36:22.388714+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 123.61600250005722, + "p90": 165.53600132465363, + "p95": 184.38400328159332, + "p99": 203.13599705696106 + }, + "combine": { + "p50": 83.93599838018417, + "p90": 102.33599692583084, + "p95": 113.76000195741653, + "p99": 124.89599734544754 + }, + "roundtrip": { + "p50": 184.32000279426575, + "p90": 227.52000391483307, + "p95": 243.3920055627823, + "p99": 272.38398790359497 + }, + "isolatedSum": { + "p50": 207.5520008802414, + "p90": 267.87199825048447, + "p95": 298.14400523900986, + "p99": 328.0319944024086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.84800279140472, + "p90": 152.6080071926117, + "p95": 160.0639969110489, + "p99": 180.9920072555542 + }, + "combine": { + "p50": 82.40000158548355, + "p90": 91.80799871683121, + "p95": 102.94400155544281, + "p99": 110.75200140476227 + }, + "roundtrip": { + "p50": 183.74399840831757, + "p90": 219.7120040655136, + "p95": 225.69599747657776, + "p99": 255.71200251579285 + }, + "isolatedSum": { + "p50": 197.24800437688828, + "p90": 244.4160059094429, + "p95": 263.0079984664917, + "p99": 291.74400866031647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 111.68000102043152, + "p90": 147.8080004453659, + "p95": 156.19200468063354, + "p99": 167.35999286174774 + }, + "combine": { + "p50": 84.1279998421669, + "p90": 91.96799993515015, + "p95": 107.55199939012527, + "p99": 117.85600334405899 + }, + "roundtrip": { + "p50": 196.44799828529358, + "p90": 245.2480047941208, + "p95": 256.3199996948242, + "p99": 278.0480086803436 + }, + "isolatedSum": { + "p50": 195.80800086259842, + "p90": 239.77600038051605, + "p95": 263.7440040707588, + "p99": 285.21599620580673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 112.83200234174728, + "p90": 151.19999647140503, + "p95": 155.87200224399567, + "p99": 166.33599996566772 + }, + "combine": { + "p50": 84.25600081682205, + "p90": 98.39999675750732, + "p95": 109.56799983978271, + "p99": 117.8240031003952 + }, + "roundtrip": { + "p50": 184.9920004606247, + "p90": 221.82400524616241, + "p95": 229.98400032520294, + "p99": 244.35199797153473 + }, + "isolatedSum": { + "p50": 197.08800315856934, + "p90": 249.59999322891235, + "p95": 265.4400020837784, + "p99": 284.1600030660629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 120.28799951076508, + "p90": 159.93599593639374, + "p95": 176.64000391960144, + "p99": 217.02399849891663 + }, + "combine": { + "p50": 85.9839990735054, + "p90": 95.42399644851685, + "p95": 103.64799946546555, + "p99": 113.63200098276138 + }, + "roundtrip": { + "p50": 203.0400037765503, + "p90": 253.91998887062073, + "p95": 280.5759906768799, + "p99": 364.51199650764465 + }, + "isolatedSum": { + "p50": 206.27199858427048, + "p90": 255.35999238491058, + "p95": 280.288003385067, + "p99": 330.655999481678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 123.58400225639343, + "p90": 147.96799421310425, + "p95": 156.38400614261627, + "p99": 169.5680022239685 + }, + "combine": { + "p50": 91.67999774217606, + "p90": 106.20799660682678, + "p95": 115.99999666213989, + "p99": 126.97599828243256 + }, + "roundtrip": { + "p50": 195.96800208091736, + "p90": 235.07200181484222, + "p95": 244.35199797153473, + "p99": 258.87998938560486 + }, + "isolatedSum": { + "p50": 215.2639999985695, + "p90": 254.17599081993103, + "p95": 272.38400280475616, + "p99": 296.54400050640106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.00000739097595, + "p90": 157.24800527095795, + "p95": 164.89599645137787, + "p99": 197.37599790096283 + }, + "combine": { + "p50": 100.54399818181992, + "p90": 108.22399705648422, + "p95": 118.40000003576279, + "p99": 127.07200646400452 + }, + "roundtrip": { + "p50": 203.96800339221954, + "p90": 239.96800184249878, + "p95": 250.46399235725403, + "p99": 268.38400959968567 + }, + "isolatedSum": { + "p50": 236.54400557279587, + "p90": 265.47200232744217, + "p95": 283.29599648714066, + "p99": 324.44800436496735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.15999698638916, + "p90": 172.38399386405945, + "p95": 181.66400492191315, + "p99": 197.4720060825348 + }, + "combine": { + "p50": 119.1679984331131, + "p90": 133.18400084972382, + "p95": 142.84799993038177, + "p99": 152.96000242233276 + }, + "roundtrip": { + "p50": 237.69600689411163, + "p90": 256.0639977455139, + "p95": 266.01600646972656, + "p99": 278.2079875469208 + }, + "isolatedSum": { + "p50": 275.32799541950226, + "p90": 305.56799471378326, + "p95": 324.5120048522949, + "p99": 350.43200850486755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec807828", + "identity": "h200|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_c317e88d", + "comparisonKey": "4f6cbb2ad4892beb", + "schemaVersion": 3, + "generatedAt": "2026-06-27T17:36:28.990296+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28296668644", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296668644", + "createdAt": "2026-06-27T17:36:28.990296+00:00", + "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 159.71200168132782, + "p90": 192.60799884796143, + "p95": 207.8399956226349, + "p99": 266.6560113430023 + }, + "combine": { + "p50": 120.92799693346024, + "p90": 134.20799374580383, + "p95": 145.9839940071106, + "p99": 155.7759940624237 + }, + "roundtrip": { + "p50": 235.00800132751465, + "p90": 250.94398856163025, + "p95": 275.55200457572937, + "p99": 301.66399478912354 + }, + "isolatedSum": { + "p50": 280.63999861478806, + "p90": 326.81599259376526, + "p95": 353.8239896297455, + "p99": 422.432005405426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.85599958896637, + "p90": 202.78400182724, + "p95": 209.82399582862854, + "p99": 239.71199989318848 + }, + "combine": { + "p50": 160.89600324630737, + "p90": 168.86399686336517, + "p95": 174.27200078964233, + "p99": 189.88800048828125 + }, + "roundtrip": { + "p50": 307.20001459121704, + "p90": 324.5759904384613, + "p95": 329.3440043926239, + "p99": 353.0240058898926 + }, + "isolatedSum": { + "p50": 346.75200283527374, + "p90": 371.64799869060516, + "p95": 384.0959966182709, + "p99": 429.6000003814697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.29600417613983, + "p90": 249.24799799919128, + "p95": 254.97600436210632, + "p99": 267.2320008277893 + }, + "combine": { + "p50": 236.80000007152557, + "p90": 243.93600225448608, + "p95": 246.72000110149384, + "p99": 257.1200132369995 + }, + "roundtrip": { + "p50": 436.2879991531372, + "p90": 448.3200013637543, + "p95": 454.52800393104553, + "p99": 473.2159972190857 + }, + "isolatedSum": { + "p50": 476.0960042476654, + "p90": 493.18400025367737, + "p95": 501.69600546360016, + "p99": 524.3520140647888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 351.74399614334106, + "p90": 368.99200081825256, + "p95": 383.35999846458435, + "p99": 419.23201084136963 + }, + "combine": { + "p50": 371.7440068721771, + "p90": 381.72799348831177, + "p95": 388.3199989795685, + "p99": 399.26400780677795 + }, + "roundtrip": { + "p50": 682.9439997673035, + "p90": 696.7359781265259, + "p95": 707.647979259491, + "p99": 768.2560086250305 + }, + "isolatedSum": { + "p50": 723.4880030155182, + "p90": 750.7199943065643, + "p95": 771.6799974441528, + "p99": 818.4960186481476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.7359895706177, + "p90": 603.0399799346924, + "p95": 611.1680269241333, + "p99": 635.0719928741455 + }, + "combine": { + "p50": 632.9600214958191, + "p90": 644.3520188331604, + "p95": 648.0640172958374, + "p99": 671.2639927864075 + }, + "roundtrip": { + "p50": 1173.792004585266, + "p90": 1189.3759965896606, + "p95": 1196.7999935150146, + "p99": 1212.448000907898 + }, + "isolatedSum": { + "p50": 1217.6960110664368, + "p90": 1247.3919987678528, + "p95": 1259.2320442199707, + "p99": 1306.335985660553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1028.6400318145752, + "p90": 1050.5599975585938, + "p95": 1060.0320100784302, + "p99": 1135.2959871292114 + }, + "combine": { + "p50": 1139.7119760513306, + "p90": 1153.1200408935547, + "p95": 1158.5919857025146, + "p99": 1179.0399551391602 + }, + "roundtrip": { + "p50": 2122.623920440674, + "p90": 2145.440101623535, + "p95": 2151.3919830322266, + "p99": 2202.49605178833 + }, + "isolatedSum": { + "p50": 2168.3520078659058, + "p90": 2203.6800384521484, + "p95": 2218.623995780945, + "p99": 2314.3359422683716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-279043f8", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "5776ea979804ef91", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:08:32.534640+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_05", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272169530", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272169530", + "createdAt": "2026-06-27T00:08:32.534640+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.19999876618385, + "p90": 43.000999838113785, + "p95": 44.56000030040741, + "p99": 47.880999743938446 + }, + "combine": { + "p50": 17.760999500751495, + "p90": 19.360000267624855, + "p95": 20.959999412298203, + "p99": 23.080000653862953 + }, + "roundtrip": { + "p50": 56.04099854826927, + "p90": 59.00000035762787, + "p95": 60.201000422239304, + "p99": 62.24000081419945 + }, + "isolatedSum": { + "p50": 57.96099826693535, + "p90": 62.36100010573864, + "p95": 65.51999971270561, + "p99": 70.9610003978014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.64099895954132, + "p90": 45.52000015974045, + "p95": 47.07999899983406, + "p99": 49.76100102066994 + }, + "combine": { + "p50": 16.599999740719795, + "p90": 18.60000006854534, + "p95": 19.79999989271164, + "p99": 23.080000653862953 + }, + "roundtrip": { + "p50": 58.96100029349327, + "p90": 62.39999830722809, + "p95": 64.32099640369415, + "p99": 102.64100134372711 + }, + "isolatedSum": { + "p50": 59.240998700261116, + "p90": 64.12000022828579, + "p95": 66.8799988925457, + "p99": 72.84100167453289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.160000652074814, + "p90": 44.76099833846092, + "p95": 46.20100185275078, + "p99": 48.5600009560585 + }, + "combine": { + "p50": 19.759999588131905, + "p90": 21.27999998629093, + "p95": 22.5210003554821, + "p99": 25.200000032782555 + }, + "roundtrip": { + "p50": 62.001001089811325, + "p90": 65.32099843025208, + "p95": 66.16000086069107, + "p99": 69.15999948978424 + }, + "isolatedSum": { + "p50": 61.92000024020672, + "p90": 66.04099832475185, + "p95": 68.72200220823288, + "p99": 73.76000098884106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.399998754262924, + "p90": 45.35999894142151, + "p95": 47.15999960899353, + "p99": 49.52000081539154 + }, + "combine": { + "p50": 20.880000665783882, + "p90": 23.08100089430809, + "p95": 24.04000051319599, + "p99": 26.441000401973724 + }, + "roundtrip": { + "p50": 62.52100318670273, + "p90": 65.64100086688995, + "p95": 66.56000018119812, + "p99": 68.84100288152695 + }, + "isolatedSum": { + "p50": 63.279999420046806, + "p90": 68.4409998357296, + "p95": 71.20000012218952, + "p99": 75.96100121736526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.52000153064728, + "p90": 45.32000049948692, + "p95": 46.640001237392426, + "p99": 49.04000088572502 + }, + "combine": { + "p50": 25.599999353289604, + "p90": 27.799999341368675, + "p95": 29.239999130368233, + "p99": 31.520001590251923 + }, + "roundtrip": { + "p50": 67.63999909162521, + "p90": 70.60100138187408, + "p95": 71.68100029230118, + "p99": 74.36099648475647 + }, + "isolatedSum": { + "p50": 68.12000088393688, + "p90": 73.1199998408556, + "p95": 75.88000036776066, + "p99": 80.56000247597694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60c60832", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "3677ee6ace04ac65", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:53:59.155172+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_05", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28273516714", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714", + "createdAt": "2026-06-27T00:53:59.155172+00:00", + "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.6000018119812, + "p90": 43.76000165939331, + "p95": 45.239999890327454, + "p99": 54.71999943256378 + }, + "combine": { + "p50": 17.920000478625298, + "p90": 19.039999693632126, + "p95": 20.999999716877937, + "p99": 22.87999913096428 + }, + "roundtrip": { + "p50": 56.32000043988228, + "p90": 59.4400018453598, + "p95": 60.64099818468094, + "p99": 63.19999694824219 + }, + "isolatedSum": { + "p50": 58.5200022906065, + "p90": 62.800001353025436, + "p95": 66.23999960720539, + "p99": 77.59999856352806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.64000058174133, + "p90": 45.35999894142151, + "p95": 46.76000028848648, + "p99": 50.23999884724617 + }, + "combine": { + "p50": 16.759999096393585, + "p90": 18.68000067770481, + "p95": 19.801000133156776, + "p99": 22.08000048995018 + }, + "roundtrip": { + "p50": 58.9199997484684, + "p90": 61.799999326467514, + "p95": 62.95999884605408, + "p99": 65.20000100135803 + }, + "isolatedSum": { + "p50": 59.39999967813492, + "p90": 64.03999961912632, + "p95": 66.56100042164326, + "p99": 72.31999933719635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.44000092148781, + "p90": 45.281000435352325, + "p95": 46.4400015771389, + "p99": 47.919999808073044 + }, + "combine": { + "p50": 19.999999552965164, + "p90": 21.99999988079071, + "p95": 23.360000923275948, + "p99": 25.72000026702881 + }, + "roundtrip": { + "p50": 61.91999837756157, + "p90": 65.20099937915802, + "p95": 66.3599967956543, + "p99": 67.84100085496902 + }, + "isolatedSum": { + "p50": 62.44000047445297, + "p90": 67.28100031614304, + "p95": 69.80000250041485, + "p99": 73.64000007510185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.44000092148781, + "p90": 45.00100016593933, + "p95": 46.88100144267082, + "p99": 49.27999898791313 + }, + "combine": { + "p50": 20.880000665783882, + "p90": 22.840000689029694, + "p95": 24.240000173449516, + "p99": 26.399999856948853 + }, + "roundtrip": { + "p50": 62.401000410318375, + "p90": 65.48000127077103, + "p95": 66.28099828958511, + "p99": 68.00000369548798 + }, + "isolatedSum": { + "p50": 63.32000158727169, + "p90": 67.84100085496902, + "p95": 71.12100161612034, + "p99": 75.67999884486198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.520999908447266, + "p90": 45.1200008392334, + "p95": 46.59999907016754, + "p99": 49.04000088572502 + }, + "combine": { + "p50": 25.8799996227026, + "p90": 27.879999950528145, + "p95": 29.239999130368233, + "p99": 31.800001859664917 + }, + "roundtrip": { + "p50": 67.80099868774414, + "p90": 71.16000354290009, + "p95": 72.2000002861023, + "p99": 74.47999715805054 + }, + "isolatedSum": { + "p50": 68.40099953114986, + "p90": 73.00000078976154, + "p95": 75.83999820053577, + "p99": 80.84000274538994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f513e0f0", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "43eedfb9c3cc2b53", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:07:01.734617+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_01", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "small-amplitude", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272162006", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272162006", + "createdAt": "2026-06-27T00:07:01.734617+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.44099897146225, + "p90": 43.72100159525871, + "p95": 45.1200008392334, + "p99": 51.600001752376556 + }, + "combine": { + "p50": 15.960000455379486, + "p90": 18.160000443458557, + "p95": 19.279999658465385, + "p99": 21.159999072551727 + }, + "roundtrip": { + "p50": 55.56099861860275, + "p90": 58.75999853014946, + "p95": 60.120001435279846, + "p99": 63.63999843597412 + }, + "isolatedSum": { + "p50": 56.400999426841736, + "p90": 61.88100203871727, + "p95": 64.40000049769878, + "p99": 72.76000082492828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.08099842071533, + "p90": 45.0810007750988, + "p95": 46.39999940991402, + "p99": 49.76100102066994 + }, + "combine": { + "p50": 16.00000075995922, + "p90": 18.60000006854534, + "p95": 19.55999992787838, + "p99": 21.920999512076378 + }, + "roundtrip": { + "p50": 58.32099914550781, + "p90": 61.64000183343887, + "p95": 63.600003719329834, + "p99": 67.59999692440033 + }, + "isolatedSum": { + "p50": 58.08099918067455, + "p90": 63.68100084364414, + "p95": 65.9599993377924, + "p99": 71.68200053274632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.839998215436935, + "p90": 44.920001178979874, + "p95": 46.28000035881996, + "p99": 49.40100014209747 + }, + "combine": { + "p50": 19.31999996304512, + "p90": 21.75999991595745, + "p95": 22.5600004196167, + "p99": 24.43999983370304 + }, + "roundtrip": { + "p50": 60.80099940299988, + "p90": 64.03999775648117, + "p95": 65.56099653244019, + "p99": 69.92000341415405 + }, + "isolatedSum": { + "p50": 61.159998178482056, + "p90": 66.68000109493732, + "p95": 68.84000077843666, + "p99": 73.84099997580051 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.080000042915344, + "p90": 45.20000144839287, + "p95": 46.64099961519241, + "p99": 48.43999817967415 + }, + "combine": { + "p50": 20.16099914908409, + "p90": 22.280000150203705, + "p95": 23.04000034928322, + "p99": 24.960000067949295 + }, + "roundtrip": { + "p50": 62.199998646974564, + "p90": 65.36100059747696, + "p95": 66.72099977731705, + "p99": 68.71999800205231 + }, + "isolatedSum": { + "p50": 62.240999191999435, + "p90": 67.48000159859657, + "p95": 69.68099996447563, + "p99": 73.39999824762344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.24099963903427, + "p90": 45.239999890327454, + "p95": 46.36099934577942, + "p99": 48.40100184082985 + }, + "combine": { + "p50": 24.639999493956566, + "p90": 26.88100002706051, + "p95": 27.881000190973282, + "p99": 30.079999938607216 + }, + "roundtrip": { + "p50": 67.47999787330627, + "p90": 70.60100138187408, + "p95": 72.28100299835205, + "p99": 75.20099729299545 + }, + "isolatedSum": { + "p50": 66.88099913299084, + "p90": 72.12099991738796, + "p95": 74.2419995367527, + "p99": 78.48100177943707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-67074ab6", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "2ccb7553c969aafc", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:07:48.076161+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_06", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "wide-dynamic-range", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272165928", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272165928", + "createdAt": "2026-06-27T00:07:48.076161+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.240999311208725, + "p90": 43.43999922275543, + "p95": 44.76099833846092, + "p99": 48.11999946832657 + }, + "combine": { + "p50": 16.839999705553055, + "p90": 18.319999799132347, + "p95": 19.600000232458115, + "p99": 23.399999365210533 + }, + "roundtrip": { + "p50": 56.120000779628754, + "p90": 59.48000028729439, + "p95": 60.76100096106529, + "p99": 65.24000316858292 + }, + "isolatedSum": { + "p50": 57.08099901676178, + "p90": 61.75999902188778, + "p95": 64.36099857091904, + "p99": 71.5199988335371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.44000092148781, + "p90": 45.48000171780586, + "p95": 46.51999846100807, + "p99": 49.19999837875366 + }, + "combine": { + "p50": 16.201000660657883, + "p90": 18.479999154806137, + "p95": 19.55999992787838, + "p99": 21.800000220537186 + }, + "roundtrip": { + "p50": 58.80099907517433, + "p90": 61.96000054478645, + "p95": 62.76000291109085, + "p99": 64.19999897480011 + }, + "isolatedSum": { + "p50": 58.64100158214569, + "p90": 63.960000872612, + "p95": 66.07999838888645, + "p99": 70.99999859929085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.04000160098076, + "p90": 44.280000030994415, + "p95": 45.921001583337784, + "p99": 49.28100109100342 + }, + "combine": { + "p50": 19.039999693632126, + "p90": 21.51999995112419, + "p95": 22.801000624895096, + "p99": 24.560000747442245 + }, + "roundtrip": { + "p50": 61.601001769304276, + "p90": 64.92000073194504, + "p95": 66.00099802017212, + "p99": 67.72000342607498 + }, + "isolatedSum": { + "p50": 61.080001294612885, + "p90": 65.7999999821186, + "p95": 68.72200220823288, + "p99": 73.84100183844566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.27999970316887, + "p90": 45.00000178813934, + "p95": 46.23999819159508, + "p99": 48.16000163555145 + }, + "combine": { + "p50": 20.320000126957893, + "p90": 23.32100085914135, + "p95": 25.439999997615814, + "p99": 57.88100138306618 + }, + "roundtrip": { + "p50": 62.3599998652935, + "p90": 65.0399997830391, + "p95": 66.0799965262413, + "p99": 68.00100207328796 + }, + "isolatedSum": { + "p50": 62.59999983012676, + "p90": 68.3210026472807, + "p95": 71.67999818921089, + "p99": 106.04100301861763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.399998754262924, + "p90": 45.0810007750988, + "p95": 46.23999819159508, + "p99": 48.8400012254715 + }, + "combine": { + "p50": 25.120999664068222, + "p90": 27.2000003606081, + "p95": 28.161000460386276, + "p99": 30.319999903440475 + }, + "roundtrip": { + "p50": 67.63999909162521, + "p90": 70.79999893903732, + "p95": 71.68000191450119, + "p99": 73.72000068426132 + }, + "isolatedSum": { + "p50": 67.52099841833115, + "p90": 72.2810011357069, + "p95": 74.40099865198135, + "p99": 79.16000112891197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-23f1ecd4", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||c774c8e4abb34da", + "colorKey": "mi355x_4ec24046", + "comparisonKey": "1ab1f06166250146", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:06:16.763261+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_02", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "zeros", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28272158268", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272158268", + "createdAt": "2026-06-27T00:06:16.763261+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.240999311208725, + "p90": 43.5199998319149, + "p95": 44.920001178979874, + "p99": 54.32000011205673 + }, + "combine": { + "p50": 17.680000513792038, + "p90": 19.401000812649727, + "p95": 20.759999752044678, + "p99": 23.80100078880787 + }, + "roundtrip": { + "p50": 56.040000170469284, + "p90": 59.12100151181221, + "p95": 60.47999858856201, + "p99": 63.040003180503845 + }, + "isolatedSum": { + "p50": 57.92099982500076, + "p90": 62.92100064456463, + "p95": 65.68000093102455, + "p99": 78.1210009008646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.319998145103455, + "p90": 44.87999901175499, + "p95": 46.480998396873474, + "p99": 49.320999532938 + }, + "combine": { + "p50": 16.720000654459, + "p90": 18.240999430418015, + "p95": 19.401000812649727, + "p99": 23.240000009536743 + }, + "roundtrip": { + "p50": 58.479998260736465, + "p90": 61.879999935626984, + "p95": 62.880001962184906, + "p99": 65.99999964237213 + }, + "isolatedSum": { + "p50": 59.039998799562454, + "p90": 63.120998442173004, + "p95": 65.8819992095232, + "p99": 72.56099954247475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.919998824596405, + "p90": 45.120999217033386, + "p95": 46.59999907016754, + "p99": 50.84000155329704 + }, + "combine": { + "p50": 19.79999989271164, + "p90": 21.27999998629093, + "p95": 23.16099964082241, + "p99": 25.400999933481216 + }, + "roundtrip": { + "p50": 61.51999905705452, + "p90": 64.40100073814392, + "p95": 65.80100208520889, + "p99": 68.24000179767609 + }, + "isolatedSum": { + "p50": 61.719998717308044, + "p90": 66.40099920332432, + "p95": 69.76099871098995, + "p99": 76.24100148677826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.121000587940216, + "p90": 45.04000023007393, + "p95": 46.31999880075455, + "p99": 50.641000270843506 + }, + "combine": { + "p50": 21.04100026190281, + "p90": 22.95999974012375, + "p95": 24.6799997985363, + "p99": 26.920000091195107 + }, + "roundtrip": { + "p50": 62.20100075006485, + "p90": 66.39999896287918, + "p95": 68.59999895095825, + "p99": 95.88100016117096 + }, + "isolatedSum": { + "p50": 63.162000849843025, + "p90": 67.99999997019768, + "p95": 70.99999859929085, + "p99": 77.56100036203861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.281001806259155, + "p90": 45.27999833226204, + "p95": 46.51999846100807, + "p99": 49.320001155138016 + }, + "combine": { + "p50": 25.919999927282333, + "p90": 28.080999851226807, + "p95": 29.559999704360962, + "p99": 32.35999867320061 + }, + "roundtrip": { + "p50": 67.31999665498734, + "p90": 70.2809989452362, + "p95": 71.40100002288818, + "p99": 74.16000217199326 + }, + "isolatedSum": { + "p50": 68.20100173354149, + "p90": 73.36099818348885, + "p95": 76.07999816536903, + "p99": 81.67999982833862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-83a44089", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2c22646e864c27e", + "colorKey": "mi355x_eb5b377e", + "comparisonKey": "5bbe7a250a72d8b4", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:24.839410+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_01", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2c22646e864c27e", + "workloadId": "set:5:7af12818400d6348", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271906612", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271906612", + "createdAt": "2026-06-26T23:58:24.839410+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.36099836230278, + "p90": 43.44100132584572, + "p95": 44.60100084543228, + "p99": 48.920001834630966 + }, + "combine": { + "p50": 16.3199994713068, + "p90": 18.880000337958336, + "p95": 19.88000050187111, + "p99": 21.880999207496643 + }, + "roundtrip": { + "p50": 57.20100179314613, + "p90": 60.63999980688095, + "p95": 61.72100082039833, + "p99": 64.56000357866287 + }, + "isolatedSum": { + "p50": 56.68099783360958, + "p90": 62.321001663804054, + "p95": 64.48100134730339, + "p99": 70.80100104212761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.64099895954132, + "p90": 45.680999755859375, + "p95": 47.2010001540184, + "p99": 49.47999864816666 + }, + "combine": { + "p50": 16.519999131560326, + "p90": 18.92000064253807, + "p95": 20.080000162124634, + "p99": 21.801000460982323 + }, + "roundtrip": { + "p50": 59.52100083231926, + "p90": 62.67999857664108, + "p95": 63.84100019931793, + "p99": 66.96099787950516 + }, + "isolatedSum": { + "p50": 59.160998091101646, + "p90": 64.60100039839745, + "p95": 67.28100031614304, + "p99": 71.28099910914898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.64000058174133, + "p90": 45.8809994161129, + "p95": 47.00100049376488, + "p99": 49.959998577833176 + }, + "combine": { + "p50": 20.759999752044678, + "p90": 23.600000888109207, + "p95": 24.480000138282776, + "p99": 26.760000735521317 + }, + "roundtrip": { + "p50": 64.12000209093094, + "p90": 67.08099693059921, + "p95": 67.88100302219391, + "p99": 70.36100327968597 + }, + "isolatedSum": { + "p50": 63.40000033378601, + "p90": 69.4810003042221, + "p95": 71.48100063204765, + "p99": 76.71999931335449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.7200011909008, + "p90": 45.88000103831291, + "p95": 47.36100137233734, + "p99": 49.60000142455101 + }, + "combine": { + "p50": 22.679999470710754, + "p90": 25.280000641942024, + "p95": 26.159999892115593, + "p99": 27.240000665187836 + }, + "roundtrip": { + "p50": 65.72099775075912, + "p90": 68.64099949598312, + "p95": 69.64000314474106, + "p99": 72.2000002861023 + }, + "isolatedSum": { + "p50": 65.40000066161156, + "p90": 71.16000168025494, + "p95": 73.52100126445293, + "p99": 76.84000208973885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.87999868392944, + "p90": 45.88000103831291, + "p95": 46.959999948740005, + "p99": 48.79999905824661 + }, + "combine": { + "p50": 28.119999915361404, + "p90": 30.44000081717968, + "p95": 31.401000916957855, + "p99": 33.640000969171524 + }, + "roundtrip": { + "p50": 71.80000096559525, + "p90": 75.15999674797058, + "p95": 76.39999687671661, + "p99": 78.31999659538269 + }, + "isolatedSum": { + "p50": 70.99999859929085, + "p90": 76.32000185549259, + "p95": 78.36100086569786, + "p99": 82.44000002741814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c1291ad7", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||15d7289bb70ed17", + "colorKey": "mi355x_ae729691", + "comparisonKey": "730c294e090417f2", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:10.167624+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_06", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15d7289bb70ed17", + "workloadId": "set:5:2eebbed158fe1320", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271910050", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271910050", + "createdAt": "2026-06-26T23:59:10.167624+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 36.80099919438362, + "p90": 39.80100154876709, + "p95": 40.76100140810013, + "p99": 43.63999888300896 + }, + "combine": { + "p50": 15.320000238716602, + "p90": 17.480000853538513, + "p95": 18.68000067770481, + "p99": 20.999999716877937 + }, + "roundtrip": { + "p50": 49.07999932765961, + "p90": 51.80000141263008, + "p95": 52.76099964976311, + "p99": 53.76100167632103 + }, + "isolatedSum": { + "p50": 52.12099943310022, + "p90": 57.2810024023056, + "p95": 59.44100208580494, + "p99": 64.6399985998869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 38.07999938726425, + "p90": 40.39999842643738, + "p95": 41.20099917054176, + "p99": 42.80000180006027 + }, + "combine": { + "p50": 15.799999237060547, + "p90": 17.999999225139618, + "p95": 19.279999658465385, + "p99": 21.040000021457672 + }, + "roundtrip": { + "p50": 51.600001752376556, + "p90": 53.92000079154968, + "p95": 55.24099990725517, + "p99": 57.32100084424019 + }, + "isolatedSum": { + "p50": 53.8799986243248, + "p90": 58.399997651576996, + "p95": 60.48099882900715, + "p99": 63.840001821517944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 32.71999955177307, + "p90": 35.5600006878376, + "p95": 36.559998989105225, + "p99": 39.000000804662704 + }, + "combine": { + "p50": 13.72000016272068, + "p90": 15.799999237060547, + "p95": 16.599999740719795, + "p99": 18.120000138878822 + }, + "roundtrip": { + "p50": 45.71999981999397, + "p90": 49.04000088572502, + "p95": 49.96100068092346, + "p99": 51.44000053405762 + }, + "isolatedSum": { + "p50": 46.43999971449375, + "p90": 51.35999992489815, + "p95": 53.15999872982502, + "p99": 57.12000094354153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 37.84099966287613, + "p90": 40.92000052332878, + "p95": 41.999999433755875, + "p99": 43.880000710487366 + }, + "combine": { + "p50": 14.919999986886978, + "p90": 17.27999933063984, + "p95": 18.039999529719353, + "p99": 19.55999992787838 + }, + "roundtrip": { + "p50": 52.241001278162, + "p90": 55.75999990105629, + "p95": 56.68000131845474, + "p99": 58.35999920964241 + }, + "isolatedSum": { + "p50": 52.76099964976311, + "p90": 58.19999985396862, + "p95": 60.03999896347523, + "p99": 63.440000638365746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 38.24099898338318, + "p90": 40.92000052332878, + "p95": 41.839998215436935, + "p99": 44.16000097990036 + }, + "combine": { + "p50": 16.24000072479248, + "p90": 18.841000273823738, + "p95": 19.88000050187111, + "p99": 22.280000150203705 + }, + "roundtrip": { + "p50": 54.28000167012215, + "p90": 57.840000838041306, + "p95": 58.800000697374344, + "p99": 60.96100062131882 + }, + "isolatedSum": { + "p50": 54.48099970817566, + "p90": 59.76100079715252, + "p95": 61.719998717308044, + "p99": 66.44000113010406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ace78f17", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||c8b7839b4895c1a", + "colorKey": "mi355x_62dc5cd4", + "comparisonKey": "316ae2638347880f", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:01:29.418642+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_00", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c8b7839b4895c1a", + "workloadId": "set:5:286be993cd819ed9", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271920340", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271920340", + "createdAt": "2026-06-27T00:01:29.418642+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 39.84000161290169, + "p90": 42.55999997258186, + "p95": 44.08000037074089, + "p99": 48.601001501083374 + }, + "combine": { + "p50": 16.200000420212746, + "p90": 17.960000783205032, + "p95": 19.07999999821186, + "p99": 21.640000864863396 + }, + "roundtrip": { + "p50": 55.44000118970871, + "p90": 58.27999860048294, + "p95": 59.20099839568138, + "p99": 60.920000076293945 + }, + "isolatedSum": { + "p50": 56.04000203311443, + "p90": 60.520000755786896, + "p95": 63.16000036895275, + "p99": 70.24100236594677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.1609990298748, + "p90": 44.920001178979874, + "p95": 45.80099880695343, + "p99": 47.800999134778976 + }, + "combine": { + "p50": 16.07999950647354, + "p90": 18.401000648736954, + "p95": 19.279999658465385, + "p99": 20.880000665783882 + }, + "roundtrip": { + "p50": 58.35999920964241, + "p90": 61.56099960207939, + "p95": 62.60000169277191, + "p99": 64.7599995136261 + }, + "isolatedSum": { + "p50": 58.24099853634834, + "p90": 63.32100182771683, + "p95": 65.08099846541882, + "p99": 68.68099980056286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.96000099182129, + "p90": 44.599998742341995, + "p95": 45.96000164747238, + "p99": 48.16100001335144 + }, + "combine": { + "p50": 19.401000812649727, + "p90": 21.880000829696655, + "p95": 23.080000653862953, + "p99": 24.12099950015545 + }, + "roundtrip": { + "p50": 61.68099865317345, + "p90": 65.20099937915802, + "p95": 65.99999964237213, + "p99": 67.4000009894371 + }, + "isolatedSum": { + "p50": 61.361001804471016, + "p90": 66.47999957203865, + "p95": 69.04000230133533, + "p99": 72.28199951350689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 41.80099815130234, + "p90": 44.2809984087944, + "p95": 45.559998601675034, + "p99": 48.39999973773956 + }, + "combine": { + "p50": 21.239999681711197, + "p90": 23.19999970495701, + "p95": 24.080000817775726, + "p99": 26.040000841021538 + }, + "roundtrip": { + "p50": 62.960997223854065, + "p90": 66.041000187397, + "p95": 66.91999733448029, + "p99": 68.71999800205231 + }, + "isolatedSum": { + "p50": 63.040997833013535, + "p90": 67.48099811375141, + "p95": 69.63999941945076, + "p99": 74.4400005787611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.1609990298748, + "p90": 45.00000178813934, + "p95": 45.96000164747238, + "p99": 50.40000006556511 + }, + "combine": { + "p50": 26.599999517202377, + "p90": 28.68100069463253, + "p95": 29.96000088751316, + "p99": 31.720001250505447 + }, + "roundtrip": { + "p50": 69.20100003480911, + "p90": 71.76099717617035, + "p95": 72.7199986577034, + "p99": 74.16000217199326 + }, + "isolatedSum": { + "p50": 68.76099854707718, + "p90": 73.68100248277187, + "p95": 75.92000253498554, + "p99": 82.12000131607056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2129d47b", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||4d5546b3fb85130", + "colorKey": "mi355x_570d6605", + "comparisonKey": "1ea3da47c00f36f8", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:59:55.992554+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "4d5546b3fb85130", + "workloadId": "set:5:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271913592", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271913592", + "createdAt": "2026-06-26T23:59:55.992554+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 39.03999924659729, + "p90": 41.76099970936775, + "p95": 43.40000078082085, + "p99": 47.15999960899353 + }, + "combine": { + "p50": 16.359999775886536, + "p90": 18.519999459385872, + "p95": 20.12000046670437, + "p99": 23.40099960565567 + }, + "roundtrip": { + "p50": 53.95999923348427, + "p90": 57.20100179314613, + "p95": 58.75999853014946, + "p99": 61.20099872350693 + }, + "isolatedSum": { + "p50": 55.399999022483826, + "p90": 60.280999168753624, + "p95": 63.520001247525215, + "p99": 70.5609992146492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 40.640998631715775, + "p90": 43.99999976158142, + "p95": 44.840000569820404, + "p99": 48.0009987950325 + }, + "combine": { + "p50": 16.519999131560326, + "p90": 18.561000004410744, + "p95": 20.24099975824356, + "p99": 23.520000278949738 + }, + "roundtrip": { + "p50": 55.52000179886818, + "p90": 59.321001172065735, + "p95": 60.72099879384041, + "p99": 68.88099759817123 + }, + "isolatedSum": { + "p50": 57.1609977632761, + "p90": 62.560999765992165, + "p95": 65.08100032806396, + "p99": 71.52099907398224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.28099977970123, + "p90": 44.16000097990036, + "p95": 45.00000178813934, + "p99": 47.68000170588493 + }, + "combine": { + "p50": 17.640000209212303, + "p90": 20.160000771284103, + "p95": 21.479999646544456, + "p99": 24.6799997985363 + }, + "roundtrip": { + "p50": 59.04100090265274, + "p90": 63.07999789714813, + "p95": 64.87999856472015, + "p99": 68.83999705314636 + }, + "isolatedSum": { + "p50": 58.920999988913536, + "p90": 64.32000175118446, + "p95": 66.4800014346838, + "p99": 72.36000150442123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 41.919998824596405, + "p90": 44.801000505685806, + "p95": 46.84000089764595, + "p99": 50.880998373031616 + }, + "combine": { + "p50": 19.600000232458115, + "p90": 22.120000794529915, + "p95": 23.520000278949738, + "p99": 26.799999177455902 + }, + "roundtrip": { + "p50": 61.000000685453415, + "p90": 64.56000357866287, + "p95": 65.88099896907806, + "p99": 69.52100247144699 + }, + "isolatedSum": { + "p50": 61.51999905705452, + "p90": 66.92100130021572, + "p95": 70.36000117659569, + "p99": 77.68099755048752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 41.839998215436935, + "p90": 44.920001178979874, + "p95": 46.92000150680542, + "p99": 50.1599982380867 + }, + "combine": { + "p50": 24.481000378727913, + "p90": 27.720000594854355, + "p95": 30.561000108718872, + "p99": 59.321001172065735 + }, + "roundtrip": { + "p50": 66.23999774456024, + "p90": 69.36100125312805, + "p95": 70.47999650239944, + "p99": 73.36000353097916 + }, + "isolatedSum": { + "p50": 66.32099859416485, + "p90": 72.64000177383423, + "p95": 77.48100161552429, + "p99": 109.48099941015244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-47886ba2", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||5c00b1a0c13aa3e", + "colorKey": "mi355x_6fd30e97", + "comparisonKey": "41d88b5d4da0110a", + "schemaVersion": 3, + "generatedAt": "2026-06-27T00:00:43.491121+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_03", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5c00b1a0c13aa3e", + "workloadId": "set:5:6b84350720aa8233", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271916622", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271916622", + "createdAt": "2026-06-27T00:00:43.491121+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 38.32000121474266, + "p90": 40.28100147843361, + "p95": 41.600000113248825, + "p99": 46.31999880075455 + }, + "combine": { + "p50": 15.720000490546227, + "p90": 17.03999936580658, + "p95": 18.640000373125076, + "p99": 20.800000056624413 + }, + "roundtrip": { + "p50": 51.16099864244461, + "p90": 53.55999991297722, + "p95": 54.96000126004219, + "p99": 57.760998606681824 + }, + "isolatedSum": { + "p50": 54.04000170528889, + "p90": 57.32100084424019, + "p95": 60.2400004863739, + "p99": 67.11999885737896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 40.47999903559685, + "p90": 42.64099895954132, + "p95": 44.47999969124794, + "p99": 48.760998994112015 + }, + "combine": { + "p50": 16.00099913775921, + "p90": 17.160000279545784, + "p95": 18.039999529719353, + "p99": 20.800000056624413 + }, + "roundtrip": { + "p50": 53.16000059247017, + "p90": 56.07999861240387, + "p95": 57.64099955558777, + "p99": 60.08100137114525 + }, + "isolatedSum": { + "p50": 56.480998173356056, + "p90": 59.800999239087105, + "p95": 62.51999922096729, + "p99": 69.56099905073643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.200000792741776, + "p90": 43.241001665592194, + "p95": 44.52100023627281, + "p99": 48.280999064445496 + }, + "combine": { + "p50": 17.240000888705254, + "p90": 18.519999459385872, + "p95": 20.19999921321869, + "p99": 22.5210003554821 + }, + "roundtrip": { + "p50": 56.561000645160675, + "p90": 59.241000562906265, + "p95": 60.440998524427414, + "p99": 64.4410029053688 + }, + "isolatedSum": { + "p50": 58.44000168144703, + "p90": 61.761001124978065, + "p95": 64.7209994494915, + "p99": 70.8019994199276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 41.31999984383583, + "p90": 43.28100010752678, + "p95": 44.679999351501465, + "p99": 46.480000019073486 + }, + "combine": { + "p50": 18.8400000333786, + "p90": 20.041000097990036, + "p95": 21.240999922156334, + "p99": 24.441000074148178 + }, + "roundtrip": { + "p50": 58.761000633239746, + "p90": 61.43999844789505, + "p95": 63.1600022315979, + "p99": 65.52000343799591 + }, + "isolatedSum": { + "p50": 60.15999987721443, + "p90": 63.322000205516815, + "p95": 65.9209992736578, + "p99": 70.92100009322166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 41.40099883079529, + "p90": 43.480001389980316, + "p95": 44.440001249313354, + "p99": 46.00000008940697 + }, + "combine": { + "p50": 22.87999913096428, + "p90": 24.6799997985363, + "p95": 26.559999212622643, + "p99": 29.40100058913231 + }, + "roundtrip": { + "p50": 63.19999694824219, + "p90": 65.76000154018402, + "p95": 67.28000193834305, + "p99": 69.64100152254105 + }, + "isolatedSum": { + "p50": 64.28099796175957, + "p90": 68.16000118851662, + "p95": 71.000000461936, + "p99": 75.40100067853928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8d163d45", + "identity": "mi355x|mori|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||d42040086b5de07", + "colorKey": "mi355x_65e339f9", + "comparisonKey": "2ba4cba3af48c2b3", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:39:01.384245+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d42040086b5de07", + "workloadId": "set:5:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": 4.875, + "eplbImbalanceAfter": 1.0033482142857144, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271245352", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271245352", + "createdAt": "2026-06-26T23:39:01.384245+00:00", + "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 39.319999516010284, + "p90": 42.11999848484993, + "p95": 43.15999895334244, + "p99": 46.52100056409836 + }, + "combine": { + "p50": 15.399999916553497, + "p90": 17.601000145077705, + "p95": 18.75999942421913, + "p99": 21.320000290870667 + }, + "roundtrip": { + "p50": 54.23999950289726, + "p90": 57.440001517534256, + "p95": 58.921001851558685, + "p99": 60.95999851822853 + }, + "isolatedSum": { + "p50": 54.71999943256378, + "p90": 59.720998629927635, + "p95": 61.91999837756157, + "p99": 67.84100085496902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 41.71999916434288, + "p90": 44.84099894762039, + "p95": 46.4400015771389, + "p99": 49.15999993681908 + }, + "combine": { + "p50": 15.599999576807022, + "p90": 17.839999869465828, + "p95": 19.88000050187111, + "p99": 22.5600004196167 + }, + "roundtrip": { + "p50": 57.08099901676178, + "p90": 60.67999824881554, + "p95": 61.59999966621399, + "p99": 63.48100304603577 + }, + "isolatedSum": { + "p50": 57.3199987411499, + "p90": 62.68099881708622, + "p95": 66.32000207901001, + "p99": 71.72000035643578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 5.0625, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.88000038266182, + "p90": 44.08099874854088, + "p95": 45.120999217033386, + "p99": 48.239998519420624 + }, + "combine": { + "p50": 18.719999119639397, + "p90": 21.04100026190281, + "p95": 22.760000079870224, + "p99": 26.760000735521317 + }, + "roundtrip": { + "p50": 61.43999844789505, + "p90": 64.43999707698822, + "p95": 65.68100303411484, + "p99": 67.87999719381332 + }, + "isolatedSum": { + "p50": 60.599999502301216, + "p90": 65.12199901044369, + "p95": 67.88099929690361, + "p99": 74.99999925494194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 41.839998215436935, + "p90": 44.79999840259552, + "p95": 46.23999819159508, + "p99": 48.36000129580498 + }, + "combine": { + "p50": 21.199999377131462, + "p90": 22.95999974012375, + "p95": 24.19999986886978, + "p99": 26.040000841021538 + }, + "roundtrip": { + "p50": 61.51999905705452, + "p90": 64.92000073194504, + "p95": 65.92000275850296, + "p99": 68.08000057935715 + }, + "isolatedSum": { + "p50": 63.0399975925684, + "p90": 67.75999814271927, + "p95": 70.43999806046486, + "p99": 74.40000213682652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4845568, + "combineLogicalBytes": 4845568, + "fanoutMean": 5.28125, + "recvTokensMax": 45, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.319998145103455, + "p90": 44.759999960660934, + "p95": 46.28000035881996, + "p99": 49.240998923778534 + }, + "combine": { + "p50": 24.879999458789825, + "p90": 27.079999446868896, + "p95": 28.440000489354134, + "p99": 56.88000097870827 + }, + "roundtrip": { + "p50": 66.3599967956543, + "p90": 69.95999813079834, + "p95": 70.91999799013138, + "p99": 73.00099730491638 + }, + "isolatedSum": { + "p50": 67.19999760389328, + "p90": 71.83999940752983, + "p95": 74.7200008481741, + "p99": 106.1209999024868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9676800, + "combineLogicalBytes": 9676800, + "fanoutMean": 5.2734375, + "recvTokensMax": 88, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d0599c0", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "colorKey": "mi355x_2fa43515", + "comparisonKey": "2796ed88af4b14b0", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:40:45.756534+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi355x-amds_04", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "unknown", + "conformanceClass": "minimum-functional", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": "set:5:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247575150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", + "createdAt": "2026-06-26T15:40:45.756534+00:00", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.55999964475632, + "p90": 43.15999895334244, + "p95": 44.881001114845276, + "p99": 47.55999892950058 + }, + "combine": { + "p50": 16.119999811053276, + "p90": 18.719999119639397, + "p95": 19.840000197291374, + "p99": 22.520000115036964 + }, + "roundtrip": { + "p50": 56.040000170469284, + "p90": 59.20000001788139, + "p95": 60.80099940299988, + "p99": 63.120998442173004 + }, + "isolatedSum": { + "p50": 56.67999945580959, + "p90": 61.879998072981834, + "p95": 64.72100131213665, + "p99": 70.07999904453754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.55999997258186, + "p90": 45.441001653671265, + "p95": 47.040000557899475, + "p99": 49.959998577833176 + }, + "combine": { + "p50": 16.16000011563301, + "p90": 18.360000103712082, + "p95": 19.600000232458115, + "p99": 22.63999916613102 + }, + "roundtrip": { + "p50": 58.83999913930893, + "p90": 61.88099831342697, + "p95": 63.48100304603577, + "p99": 65.40100276470184 + }, + "isolatedSum": { + "p50": 58.720000088214874, + "p90": 63.80100175738335, + "p95": 66.64000079035759, + "p99": 72.5999977439642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.160000652074814, + "p90": 44.840000569820404, + "p95": 46.28000035881996, + "p99": 49.84100162982941 + }, + "combine": { + "p50": 19.039999693632126, + "p90": 22.1599992364645, + "p95": 23.48100021481514, + "p99": 54.63999882340431 + }, + "roundtrip": { + "p50": 61.59999966621399, + "p90": 64.71999734640121, + "p95": 65.76000154018402, + "p99": 68.36000084877014 + }, + "isolatedSum": { + "p50": 61.20000034570694, + "p90": 66.9999998062849, + "p95": 69.7610005736351, + "p99": 104.48100045323372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.52000153064728, + "p90": 45.1200008392334, + "p95": 46.080999076366425, + "p99": 48.8400012254715 + }, + "combine": { + "p50": 20.479999482631683, + "p90": 22.520000115036964, + "p95": 23.479999974370003, + "p99": 25.800000876188278 + }, + "roundtrip": { + "p50": 62.67999857664108, + "p90": 65.5599981546402, + "p95": 66.880002617836, + "p99": 68.56100261211395 + }, + "isolatedSum": { + "p50": 63.00000101327896, + "p90": 67.64000095427036, + "p95": 69.56099905073643, + "p99": 74.64000210165977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.67999902367592, + "p90": 45.27999833226204, + "p95": 46.799998730421066, + "p99": 49.720000475645065 + }, + "combine": { + "p50": 24.921000003814697, + "p90": 27.240000665187836, + "p95": 28.07999961078167, + "p99": 30.27999959886074 + }, + "roundtrip": { + "p50": 67.9209977388382, + "p90": 71.04100286960602, + "p95": 72.12000340223312, + "p99": 74.08100366592407 + }, + "isolatedSum": { + "p50": 67.60099902749062, + "p90": 72.51999899744987, + "p95": 74.87999834120274, + "p99": 80.0000000745058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cd519ebd", + "identity": "mi355x|nccl-ep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|16|decode|normal|none|none|0|normalized|0.18|1a4734625a379e3", + "colorKey": "mi355x_1180f01d", + "comparisonKey": "919b62d5ead26bb1", + "schemaVersion": 3, + "generatedAt": "2026-06-28T17:32:29.450290+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_04", + "sku": "mi355x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-multinode-rdma", + "transport": "rdma", + "worldSize": 16, + "epSize": 16, + "label": "MI355X EP16 · nccl-ep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 256, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 16, + "scaleUpDomain": 16 + }, + "routingConsistent": true, + "traceSignature": "1a4734625a379e3", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28328718973", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28328718973", + "createdAt": "2026-06-28T17:32:29.450290+00:00", + "sha": "41135333c6788fca7a4051185dfbb3a850649ed5" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 16, + "dispatch": { + "p50": 360.44201254844666, + "p90": 381.12300634384155, + "p95": 1227.2510528564453, + "p99": 1227.2510528564453 + }, + "combine": { + "p50": 120.64100056886673, + "p90": 125.08100271224976, + "p95": 169.5210039615631, + "p99": 169.5210039615631 + }, + "roundtrip": { + "p50": 445.8029866218567, + "p90": 475.7640063762665, + "p95": 482.00398683547974, + "p99": 482.00398683547974 + }, + "isolatedSum": { + "p50": 481.0830131173134, + "p90": 506.2040090560913, + "p95": 1396.7720568180084, + "p99": 1396.7720568180084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1505280, + "combineLogicalBytes": 1505280, + "fanoutMean": 6.5625, + "recvTokensMax": 12, + "stragglerRank": 15, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 32, + "dispatch": { + "p50": 345.1229929924011, + "p90": 452.7229964733124, + "p95": 493.32401156425476, + "p99": 493.32401156425476 + }, + "combine": { + "p50": 124.20099973678589, + "p90": 149.48099851608276, + "p95": 168.08100044727325, + "p99": 168.08100044727325 + }, + "roundtrip": { + "p50": 448.28298687934875, + "p90": 470.24399042129517, + "p95": 487.1650040149689, + "p99": 487.1650040149689 + }, + "isolatedSum": { + "p50": 469.323992729187, + "p90": 602.2039949893951, + "p95": 661.405012011528, + "p99": 661.405012011528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3067904, + "combineLogicalBytes": 3067904, + "fanoutMean": 6.6875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 64, + "dispatch": { + "p50": 352.80299186706543, + "p90": 358.8019907474518, + "p95": 375.04300475120544, + "p99": 375.04300475120544 + }, + "combine": { + "p50": 128.24100255966187, + "p90": 134.04099643230438, + "p95": 137.12100684642792, + "p99": 137.12100684642792 + }, + "roundtrip": { + "p50": 448.76399636268616, + "p90": 456.76299929618835, + "p95": 464.20300006866455, + "p99": 464.20300006866455 + }, + "isolatedSum": { + "p50": 481.0439944267273, + "p90": 492.84298717975616, + "p95": 512.1640115976334, + "p99": 512.1640115976334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5992448, + "combineLogicalBytes": 5992448, + "fanoutMean": 6.53125, + "recvTokensMax": 43, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 128, + "dispatch": { + "p50": 430.6829869747162, + "p90": 1308.8120222091675, + "p95": 1478.0919551849365, + "p99": 1478.0919551849365 + }, + "combine": { + "p50": 140.1209980249405, + "p90": 159.64199602603912, + "p95": 194.28199529647827, + "p99": 194.28199529647827 + }, + "roundtrip": { + "p50": 471.68299555778503, + "p90": 499.44400787353516, + "p95": 1358.8520288467407, + "p99": 1358.8520288467407 + }, + "isolatedSum": { + "p50": 570.8039849996567, + "p90": 1468.4540182352066, + "p95": 1672.3739504814148, + "p99": 1672.3739504814148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12214272, + "combineLogicalBytes": 12214272, + "fanoutMean": 6.65625, + "recvTokensMax": 84, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + } + ], + "failures": [ + { + "id": "cxf-6e691abd", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "generatedAt": "2026-06-26T17:32:59.549027+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28254359089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", + "createdAt": "2026-06-26T17:32:59.549027+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + } + }, + { + "id": "cxf-25e7e895", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:49:09.827299+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271594334", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", + "createdAt": "2026-06-26T23:49:09.827299+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-433580a5", + "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:49:16.484836+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/runtime-visible", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271598000", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", + "createdAt": "2026-06-26T23:49:16.484836+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-bf8e2b86", + "identity": "h100|uccl||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "1970-01-01T00:00:00.000Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": null, + "url": null, + "createdAt": "1970-01-01T00:00:00.000Z", + "sha": null + } + }, + { + "id": "cxf-70961aef", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "generatedAt": "2026-06-26T17:31:08.227503+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28254435010", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", + "createdAt": "2026-06-26T17:31:08.227503+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + } + }, + { + "id": "cxf-e15f2b54", + "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:51:34.222899+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271653486", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", + "createdAt": "2026-06-26T23:51:34.222899+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-33a53f33", + "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "generatedAt": "2026-06-26T23:51:35.330044+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/runtime-visible", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28271656517", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", + "createdAt": "2026-06-26T23:51:35.330044+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + } + }, + { + "id": "cxf-26d1baf4", + "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "generatedAt": "2026-06-26T15:40:45.756534+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "config": "bf16/normal/layout-and-dispatch", + "reason": "resource-nonconforming", + "returnCode": null, + "run": { + "id": "28247575150", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", + "createdAt": "2026-06-26T15:40:45.756534+00:00", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + } + } + ], + "summaryCards": [ + { + "title": "Best backend · decode EP8", + "value": "flashinfer · B300", + "sub": "71 us RT p99 · mxfp8 · T=64" + }, + { + "title": "Best backend · prefill EP8", + "value": "flashinfer · B300", + "sub": "85 us RT p99 · nvfp4 · T=256" + }, + { + "title": "LL -> normal crossover", + "value": "T~128 tok/rank", + "sub": "H100 EP8 fp8 · normal RT p50 wins above this" + }, + { + "title": "Resource-normalized winner", + "value": "deepep · H100", + "sub": "113 us RT p99 · bf16 · T=64" + }, + { + "title": "Backend-default winner", + "value": "flashinfer · B300", + "sub": "71 us RT p99 · mxfp8 · T=64" + }, + { + "title": "Most unstable config", + "value": "H100 · deepep decode", + "sub": "3.27x p99 under zipf-heavy vs uniform", + "warning": true + }, + { + "title": "Invalid / diagnostic cases", + "value": "8", + "sub": "see Evidence failed table", + "warning": true, + "href": "#tab-evidence" + } + ], + "decision": { + "budgetsUs": [100, 250, 500], + "maxTokensUnderBudget": [ + { + "id": "cxb-3f6620d0", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-c27e2cad", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-567c4192", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-directcast", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-10314900", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-pertoken", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-238797ce", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 512 + } + }, + { + "id": "cxb-67e5feea", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 256 + } + }, + { + "id": "cxb-7cddf11f", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 32, + "500": 128 + } + }, + { + "id": "cxb-4a0e300c", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": null, + "500": 512 + } + }, + { + "id": "cxb-6136a9d3", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 128, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-30070070", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 128, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-9a73b5f5", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "dispatchDtype": "mxfp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 128, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-207d8ef2", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 128, + "250": 512, + "500": 1024 + } + }, + { + "id": "cxb-ae942e6d", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 256, + "250": 1024, + "500": 2048 + } + }, + { + "id": "cxb-dede56e2", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "mxfp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 256, + "250": 1024, + "500": 2048 + } + }, + { + "id": "cxb-85dec801", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "nvfp4", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 256, + "250": 1024, + "500": 2048 + } + }, + { + "id": "cxb-2fdde1de", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-8d828593", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 512 + } + }, + { + "id": "cxb-7171c240", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-6f4d88a5", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "ll", + "budgets": { + "100": 32, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-416fcf7d", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-d35502c2", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-directcast", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-779ba710", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-pertoken", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-d524fd7e", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 512 + } + }, + { + "id": "cxb-bf310e7a", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 256 + } + }, + { + "id": "cxb-0f748c2f", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 2, + "500": 128 + } + }, + { + "id": "cxb-402bdadc", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": null, + "500": 512 + } + }, + { + "id": "cxb-f1858975", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-236b5900", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-0d201725", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "dispatchDtype": "mxfp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-6fee4962", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 128, + "250": 256, + "500": 512 + } + }, + { + "id": "cxb-6d37a6fd", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 512, + "500": 1024 + } + }, + { + "id": "cxb-00728192", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "dispatchDtype": "mxfp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 512, + "500": 1024 + } + }, + { + "id": "cxb-5657eb6e", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 1, + "500": 128 + } + }, + { + "id": "cxb-8af55e63", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": null, + "500": 512 + } + }, + { + "id": "cxb-a3bb3bd5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-274a06b0", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "ll", + "budgets": { + "100": 32, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-1d12a6ce", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 32, + "500": 128 + } + }, + { + "id": "cxb-858b05cb", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-directcast", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 32, + "500": 128 + } + }, + { + "id": "cxb-339f09b5", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-pertoken", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": null, + "500": 128 + } + }, + { + "id": "cxb-bc48bfe5", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 512 + } + }, + { + "id": "cxb-e6cb64c3", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": null, + "500": 256 + } + }, + { + "id": "cxb-d2620b3b", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 8, + "500": 128 + } + }, + { + "id": "cxb-ec807828", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": null, + "500": 512 + } + }, + { + "id": "cxb-279043f8", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": 16, + "250": 16, + "500": 16 + } + } + ], + "recommendations": [ + { + "id": "cxr-d2992d7c", + "sku": "b300", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 71.4, + "config": "mxfp8/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-1c3060b2", + "sku": "b300", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 85, + "config": "nvfp4/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-8fcf986c", + "sku": "h100", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 53.1, + "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized", + "epSize": 8 + }, + { + "id": "cxr-466c0bc2", + "sku": "h100", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 104.6, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-c2fe14a3", + "sku": "h200", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 62.1, + "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized", + "epSize": 8 + }, + { + "id": "cxr-7e4f951f", + "sku": "h200", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 124.6, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/normalized", + "epSize": 8 + } + ], + "llCrossover": [ + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + } + ], + "resourcePareto": [ + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.2, + "dispatch_p99": 93, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57, + "dispatch_p99": 73.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 80.9, + "dispatch_p99": 89.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57, + "dispatch_p99": 73.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.4, + "dispatch_p99": 107, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57.8, + "dispatch_p99": 68.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.4, + "dispatch_p99": 93.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 59.3, + "dispatch_p99": 68.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 16, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 82.8, + "dispatch_p99": 97.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 62.8, + "dispatch_p99": 76.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 32, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 99.8, + "dispatch_p99": 106.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 69.2, + "dispatch_p99": 81.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 64, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 111.1, + "dispatch_p99": 119, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 82.5, + "dispatch_p99": 99.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 124.5, + "dispatch_p99": 138.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 93.9, + "dispatch_p99": 105, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 56.6, + "dispatch_p99": 67.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 56, + "dispatch_p99": 69.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 57.2, + "dispatch_p99": 67.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 56.9, + "dispatch_p99": 68.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 59.2, + "dispatch_p99": 68, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57.6, + "dispatch_p99": 67.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 59.8, + "dispatch_p99": 69.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 59.5, + "dispatch_p99": 73.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 16, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 61.2, + "dispatch_p99": 85.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 60.7, + "dispatch_p99": 69.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 32, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 64, + "dispatch_p99": 75.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 63.6, + "dispatch_p99": 72.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 64, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 72.4, + "dispatch_p99": 84, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 72.6, + "dispatch_p99": 82.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "fp8", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 86.4, + "dispatch_p99": 98.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 83.2, + "dispatch_p99": 90.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "bf16", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 125.3, + "dispatch_p99": 135.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 94.1, + "dispatch_p99": 116.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "bf16", + "T": 256, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 157.3, + "dispatch_p99": 174.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 135.4, + "dispatch_p99": 151, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "bf16", + "T": 512, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 222.4, + "dispatch_p99": 234.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 193.2, + "dispatch_p99": 206.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "bf16", + "T": 1024, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 346, + "dispatch_p99": 360.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 326.2, + "dispatch_p99": 341.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "bf16", + "T": 2048, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 592, + "dispatch_p99": 609.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 577.1, + "dispatch_p99": 591.3, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "bf16", + "T": 4096, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 1092.6, + "dispatch_p99": 1123.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 1069.5, + "dispatch_p99": 1090.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "fp8", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 86.3, + "dispatch_p99": 98.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 83.5, + "dispatch_p99": 102.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "fp8", + "T": 256, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 118, + "dispatch_p99": 129.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 116.7, + "dispatch_p99": 135.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "fp8", + "T": 512, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 176.9, + "dispatch_p99": 189.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 177.8, + "dispatch_p99": 191.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "fp8", + "T": 1024, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 299.3, + "dispatch_p99": 312.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 298, + "dispatch_p99": 319.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "fp8", + "T": 2048, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 535.1, + "dispatch_p99": 553.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 541.5, + "dispatch_p99": 557.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "prefill", + "dtype": "fp8", + "T": 4096, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 1012, + "dispatch_p99": 1036.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 1019.6, + "dispatch_p99": 1045.3, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "n_points": 4, + "curve": [ + { + "achieved_fraction": 0.0985, + "dispatch_p50": 97.2, + "dispatch_p99": 111.1, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.1515, + "dispatch_p50": 127.7, + "dispatch_p99": 143.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 95.7, + "dispatch_p99": 109.4, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.5985, + "dispatch_p50": 96.3, + "dispatch_p99": 108.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 179.5, + "dispatch_p99": 194.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 71.2, + "dispatch_p99": 107.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 130.9, + "dispatch_p99": 201, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 95.2, + "dispatch_p99": 439.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "n_points": 4, + "curve": [ + { + "achieved_fraction": 0.0985, + "dispatch_p50": 99.3, + "dispatch_p99": 113.5, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.1515, + "dispatch_p50": 133.1, + "dispatch_p99": 479, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 95.4, + "dispatch_p99": 113.5, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.5985, + "dispatch_p50": 96.7, + "dispatch_p99": 112.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 16, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 129.6, + "dispatch_p99": 203.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 95.7, + "dispatch_p99": 106.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 32, + "n_points": 4, + "curve": [ + { + "achieved_fraction": 0.0985, + "dispatch_p50": 103.3, + "dispatch_p99": 121.4, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.1515, + "dispatch_p50": 181.8, + "dispatch_p99": 324.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 80.9, + "dispatch_p99": 113.2, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.5985, + "dispatch_p50": 102.9, + "dispatch_p99": 114.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 64, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 147.7, + "dispatch_p99": 211.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 103.5, + "dispatch_p99": 125.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "bf16", + "T": 128, + "n_points": 4, + "curve": [ + { + "achieved_fraction": 0.0985, + "dispatch_p50": 129.7, + "dispatch_p99": 143.9, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.1515, + "dispatch_p50": 186.2, + "dispatch_p99": 208, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 111.3, + "dispatch_p99": 139.9, + "resource_class": "resource-constrained" + }, + { + "achieved_fraction": 0.5985, + "dispatch_p50": 129.1, + "dispatch_p99": 142.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 98.8, + "dispatch_p99": 114.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 98, + "dispatch_p99": 110.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 99.5, + "dispatch_p99": 111.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 72.2, + "dispatch_p99": 105.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 99.3, + "dispatch_p99": 110.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 72.3, + "dispatch_p99": 115.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 100.2, + "dispatch_p99": 111.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 97.5, + "dispatch_p99": 113.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 16, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 98.9, + "dispatch_p99": 112.2, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 97.1, + "dispatch_p99": 113.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 32, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 99.9, + "dispatch_p99": 181.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 73.1, + "dispatch_p99": 112.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 64, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 101.4, + "dispatch_p99": 370.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 78.7, + "dispatch_p99": 125.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "decode", + "dtype": "fp8", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 106.2, + "dispatch_p99": 117.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 90.3, + "dispatch_p99": 117.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "bf16", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 161.1, + "dispatch_p99": 170.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 110.5, + "dispatch_p99": 166, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "bf16", + "T": 256, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 186.6, + "dispatch_p99": 197.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 147.4, + "dispatch_p99": 154.3, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "bf16", + "T": 512, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 239.6, + "dispatch_p99": 250.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 204.9, + "dispatch_p99": 226.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "bf16", + "T": 1024, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 346.9, + "dispatch_p99": 358.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 319.9, + "dispatch_p99": 330.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "bf16", + "T": 2048, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 571.1, + "dispatch_p99": 621.2, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 570.9, + "dispatch_p99": 593.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "bf16", + "T": 4096, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 1035.6, + "dispatch_p99": 1074.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 1075.9, + "dispatch_p99": 1102.5, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "fp8", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 90, + "dispatch_p99": 158.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 89.6, + "dispatch_p99": 100.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "fp8", + "T": 256, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 107.5, + "dispatch_p99": 170.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 106.2, + "dispatch_p99": 125.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "fp8", + "T": 512, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 147, + "dispatch_p99": 460.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 138.5, + "dispatch_p99": 197.3, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "fp8", + "T": 1024, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 215.3, + "dispatch_p99": 223.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 197.4, + "dispatch_p99": 216.3, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "fp8", + "T": 2048, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 354.8, + "dispatch_p99": 380.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 318.7, + "dispatch_p99": 347.3, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h100", + "phase": "prefill", + "dtype": "fp8", + "T": 4096, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 641.4, + "dispatch_p99": 655.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 574.8, + "dispatch_p99": 604.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 123.6, + "dispatch_p99": 203.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 74.1, + "dispatch_p99": 138, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 114.8, + "dispatch_p99": 181, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 74.3, + "dispatch_p99": 131.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 111.7, + "dispatch_p99": 167.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 75, + "dispatch_p99": 139.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 112.8, + "dispatch_p99": 166.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 74.8, + "dispatch_p99": 123.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 16, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 120.3, + "dispatch_p99": 217, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 73.2, + "dispatch_p99": 195.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 32, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 123.6, + "dispatch_p99": 169.6, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 82.3, + "dispatch_p99": 134.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 64, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 136, + "dispatch_p99": 197.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 91.3, + "dispatch_p99": 146.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 156.2, + "dispatch_p99": 197.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 116, + "dispatch_p99": 149.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 73, + "dispatch_p99": 139.2, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 70.2, + "dispatch_p99": 121.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 71.4, + "dispatch_p99": 113.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 67.6, + "dispatch_p99": 144.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 72.7, + "dispatch_p99": 146.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 69.7, + "dispatch_p99": 228.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 70.1, + "dispatch_p99": 165.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 70.5, + "dispatch_p99": 151.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 16, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 73.7, + "dispatch_p99": 146.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 68.5, + "dispatch_p99": 126.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 32, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 71.6, + "dispatch_p99": 167.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 68.3, + "dispatch_p99": 114.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 64, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 79.8, + "dispatch_p99": 125.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 74.5, + "dispatch_p99": 120.6, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "fp8", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 88.4, + "dispatch_p99": 115.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 88.7, + "dispatch_p99": 129, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "bf16", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 159.7, + "dispatch_p99": 266.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 116.4, + "dispatch_p99": 160, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "bf16", + "T": 256, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 185.9, + "dispatch_p99": 239.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 147, + "dispatch_p99": 191.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "bf16", + "T": 512, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 239.3, + "dispatch_p99": 267.2, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 207.7, + "dispatch_p99": 267.9, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "bf16", + "T": 1024, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 351.7, + "dispatch_p99": 419.2, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 324.9, + "dispatch_p99": 364.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "bf16", + "T": 2048, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 584.7, + "dispatch_p99": 635.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 566.4, + "dispatch_p99": 609.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "bf16", + "T": 4096, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 1028.6, + "dispatch_p99": 1135.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 1051.8, + "dispatch_p99": 1161.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "fp8", + "T": 128, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 86.4, + "dispatch_p99": 152.1, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 86.8, + "dispatch_p99": 141.8, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "fp8", + "T": 256, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 104.4, + "dispatch_p99": 141.7, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 103.2, + "dispatch_p99": 155.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "fp8", + "T": 512, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 141.1, + "dispatch_p99": 233.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 135.8, + "dispatch_p99": 238, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "fp8", + "T": 1024, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 205.8, + "dispatch_p99": 248.9, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 196.6, + "dispatch_p99": 253.7, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "fp8", + "T": 2048, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 340.2, + "dispatch_p99": 421.5, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 320.2, + "dispatch_p99": 440, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "h200", + "phase": "prefill", + "dtype": "fp8", + "T": 4096, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1515, + "dispatch_p50": 612.3, + "dispatch_p99": 680.4, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1818, + "dispatch_p50": 572.4, + "dispatch_p99": 629.7, + "resource_class": "resource-constrained" + } + ] + } + ], + "topologyPenalty": [ + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "ep8_p50": 123.6, + "ep16_p50": 578.4, + "penalty_pct": 367.9 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "ep8_p50": 114.8, + "ep16_p50": 547.2, + "penalty_pct": 376.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "ep8_p50": 111.7, + "ep16_p50": 621.5, + "penalty_pct": 456.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "ep8_p50": 112.8, + "ep16_p50": 611.8, + "penalty_pct": 442.2 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 16, + "ep8_p50": 120.3, + "ep16_p50": 631.8, + "penalty_pct": 425.3 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 32, + "ep8_p50": 123.6, + "ep16_p50": 782.9, + "penalty_pct": 533.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 64, + "ep8_p50": 136, + "ep16_p50": 679.3, + "penalty_pct": 399.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 128, + "ep8_p50": 156.2, + "ep16_p50": 808.1, + "penalty_pct": 417.5 + }, + { + "sku": "mi355x", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "ep8_p50": 40.6, + "ep16_p50": 360.4, + "penalty_pct": 788.7 + }, + { + "sku": "mi355x", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "ep8_p50": 42.6, + "ep16_p50": 345.1, + "penalty_pct": 710.9 + }, + { + "sku": "mi355x", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "ep8_p50": 42.2, + "ep16_p50": 352.8, + "penalty_pct": 736.8 + }, + { + "sku": "mi355x", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "ep8_p50": 42.5, + "ep16_p50": 430.7, + "penalty_pct": 912.9 + } + ], + "skewPenalty": [ + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.694, + "p99_amplification": 0.867 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.695, + "p99_amplification": 0.811 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.697, + "p99_amplification": 0.683 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.716, + "p99_amplification": 0.76 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.716, + "p99_amplification": 0.881 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.743, + "p99_amplification": 0.837 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.718, + "p99_amplification": 0.756 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.813, + "p99_amplification": 0.898 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.698, + "p99_amplification": 0.753 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.723, + "p99_amplification": 0.798 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.76, + "p99_amplification": 0.82 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.828, + "p99_amplification": 0.816 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.722, + "p99_amplification": 0.819 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.707, + "p99_amplification": 0.777 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.705, + "p99_amplification": 0.634 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.713, + "p99_amplification": 0.806 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.709, + "p99_amplification": 0.817 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.705, + "p99_amplification": 0.887 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.707, + "p99_amplification": 0.736 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.736, + "p99_amplification": 0.729 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.699, + "p99_amplification": 0.752 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.699, + "p99_amplification": 0.708 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.68, + "p99_amplification": 0.77 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.737, + "p99_amplification": 0.823 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 21.723, + "p99_amplification": 36.695 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 21.728, + "p99_amplification": 38.053 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 21.606, + "p99_amplification": 31.919 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 21.676, + "p99_amplification": 57.264 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 21.27, + "p99_amplification": 35.187 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 17.906, + "p99_amplification": 53.04 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 16.017, + "p99_amplification": 28.424 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 14.456, + "p99_amplification": 24.57 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.707, + "p99_amplification": 0.797 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.712, + "p99_amplification": 0.752 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.722, + "p99_amplification": 0.662 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.739, + "p99_amplification": 0.785 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.767, + "p99_amplification": 0.905 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.708, + "p99_amplification": 0.772 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.788, + "p99_amplification": 0.832 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.833, + "p99_amplification": 0.85 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.685, + "p99_amplification": 0.747 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.69, + "p99_amplification": 0.712 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.694, + "p99_amplification": 0.609 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.715, + "p99_amplification": 0.804 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.722, + "p99_amplification": 0.739 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.681, + "p99_amplification": 0.713 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.777, + "p99_amplification": 0.867 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.744, + "p99_amplification": 0.791 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.697, + "p99_amplification": 0.741 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.703, + "p99_amplification": 0.718 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.717, + "p99_amplification": 0.623 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.721, + "p99_amplification": 0.745 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.723, + "p99_amplification": 0.868 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.746, + "p99_amplification": 0.763 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.716, + "p99_amplification": 0.866 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.823, + "p99_amplification": 0.912 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.712, + "p99_amplification": 0.809 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.716, + "p99_amplification": 0.706 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.734, + "p99_amplification": 0.686 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.74, + "p99_amplification": 0.87 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.796, + "p99_amplification": 0.781 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.702, + "p99_amplification": 0.751 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.796, + "p99_amplification": 0.801 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.762, + "p99_amplification": 0.77 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.714, + "p99_amplification": 0.778 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.72, + "p99_amplification": 0.825 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.757, + "p99_amplification": 0.868 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.741, + "p99_amplification": 0.849 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.806, + "p99_amplification": 0.813 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.703, + "p99_amplification": 0.776 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.807, + "p99_amplification": 0.86 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.763, + "p99_amplification": 0.785 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.688, + "p99_amplification": 0.915 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.69, + "p99_amplification": 0.807 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.705, + "p99_amplification": 0.895 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.721, + "p99_amplification": 0.745 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.718, + "p99_amplification": 0.756 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.746, + "p99_amplification": 0.765 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.722, + "p99_amplification": 0.759 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.817, + "p99_amplification": 0.83 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.7, + "p99_amplification": 0.781 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.712, + "p99_amplification": 0.796 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.707, + "p99_amplification": 0.634 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.723, + "p99_amplification": 0.838 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.731, + "p99_amplification": 0.855 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.694, + "p99_amplification": 0.779 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.791, + "p99_amplification": 0.841 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.755, + "p99_amplification": 0.76 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.829, + "p99_amplification": 0.934 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.921, + "p99_amplification": 0.942 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.095, + "p99_amplification": 1.103 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.8, + "p99_amplification": 0.839 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.829, + "p99_amplification": 0.848 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.931, + "p99_amplification": 0.942 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.005, + "p99_amplification": 1.01 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.083, + "p99_amplification": 1.227 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.146, + "p99_amplification": 1.14 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.737, + "p99_amplification": 0.836 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.81, + "p99_amplification": 0.873 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.963, + "p99_amplification": 1.019 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.746, + "p99_amplification": 0.975 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.811, + "p99_amplification": 0.829 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.839, + "p99_amplification": 0.905 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.9, + "p99_amplification": 0.935 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.96, + "p99_amplification": 1.02 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.018, + "p99_amplification": 1.098 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.753, + "p99_amplification": 0.786 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.844, + "p99_amplification": 0.875 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.866, + "p99_amplification": 0.913 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.945, + "p99_amplification": 1.093 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.983, + "p99_amplification": 1.102 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.993, + "p99_amplification": 0.991 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.82, + "p99_amplification": 0.813 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.889, + "p99_amplification": 0.876 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.961, + "p99_amplification": 0.957 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.028, + "p99_amplification": 1.021 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.113, + "p99_amplification": 1.115 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.176, + "p99_amplification": 1.271 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.758, + "p99_amplification": 0.804 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.841, + "p99_amplification": 0.85 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.871, + "p99_amplification": 1.015 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.943, + "p99_amplification": 0.968 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.956, + "p99_amplification": 1.001 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.979, + "p99_amplification": 0.987 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.804, + "p99_amplification": 0.808 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.864, + "p99_amplification": 0.87 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.91, + "p99_amplification": 0.924 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.004, + "p99_amplification": 1.015 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.082, + "p99_amplification": 1.117 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.146, + "p99_amplification": 1.182 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.758, + "p99_amplification": 0.82 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.874, + "p99_amplification": 0.915 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.87, + "p99_amplification": 0.884 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.939, + "p99_amplification": 0.971 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.971, + "p99_amplification": 1.051 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.975, + "p99_amplification": 0.981 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.754, + "p99_amplification": 0.83 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.873, + "p99_amplification": 0.861 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.87, + "p99_amplification": 0.915 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.941, + "p99_amplification": 0.95 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.97, + "p99_amplification": 0.978 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.974, + "p99_amplification": 0.962 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.809, + "p99_amplification": 0.826 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.866, + "p99_amplification": 0.862 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.917, + "p99_amplification": 0.952 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.008, + "p99_amplification": 1.01 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.084, + "p99_amplification": 1.083 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.146, + "p99_amplification": 1.136 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.745, + "p99_amplification": 0.954 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.866, + "p99_amplification": 0.889 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.866, + "p99_amplification": 0.903 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.925, + "p99_amplification": 0.924 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.968, + "p99_amplification": 1.102 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.972, + "p99_amplification": 0.974 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.731, + "p99_amplification": 0.751 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.535, + "p99_amplification": 0.549 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.733, + "p99_amplification": 0.547 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.736, + "p99_amplification": 0.233 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.749, + "p99_amplification": 0.544 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.531, + "p99_amplification": 0.341 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.724, + "p99_amplification": 1.182 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.69, + "p99_amplification": 0.731 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.762, + "p99_amplification": 0.821 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.733, + "p99_amplification": 0.287 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.563, + "p99_amplification": 0.381 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.748, + "p99_amplification": 0.787 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.737, + "p99_amplification": 0.929 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.526, + "p99_amplification": 0.543 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.727, + "p99_amplification": 0.538 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.723, + "p99_amplification": 0.351 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.743, + "p99_amplification": 0.542 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.532, + "p99_amplification": 0.338 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.709, + "p99_amplification": 0.565 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.648, + "p99_amplification": 0.714 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.751, + "p99_amplification": 0.775 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.738, + "p99_amplification": 0.235 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.544, + "p99_amplification": 0.357 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.654, + "p99_amplification": 0.654 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.736, + "p99_amplification": 0.75 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.389, + "p99_amplification": 0.548 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.548, + "p99_amplification": 0.537 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.542, + "p99_amplification": 0.216 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.633, + "p99_amplification": 0.523 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.494, + "p99_amplification": 0.331 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.651, + "p99_amplification": 0.604 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.615, + "p99_amplification": 0.673 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.762, + "p99_amplification": 0.783 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.406, + "p99_amplification": 0.563 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.583, + "p99_amplification": 0.545 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.577, + "p99_amplification": 0.228 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.741, + "p99_amplification": 0.569 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.499, + "p99_amplification": 0.337 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.687, + "p99_amplification": 0.656 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.658, + "p99_amplification": 0.691 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.772, + "p99_amplification": 0.787 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.41, + "p99_amplification": 0.591 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.742, + "p99_amplification": 0.551 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.728, + "p99_amplification": 0.233 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.748, + "p99_amplification": 0.556 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.504, + "p99_amplification": 0.341 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.709, + "p99_amplification": 0.581 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.61, + "p99_amplification": 0.673 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.75, + "p99_amplification": 0.764 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.395, + "p99_amplification": 0.544 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.559, + "p99_amplification": 0.522 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.57, + "p99_amplification": 0.224 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.612, + "p99_amplification": 1.13 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.487, + "p99_amplification": 0.339 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.693, + "p99_amplification": 0.585 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.676, + "p99_amplification": 0.711 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.546, + "p99_amplification": 0.695 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.389, + "p99_amplification": 0.494 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.566, + "p99_amplification": 0.554 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.569, + "p99_amplification": 0.221 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.637, + "p99_amplification": 0.525 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.497, + "p99_amplification": 0.338 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.656, + "p99_amplification": 0.587 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.634, + "p99_amplification": 0.676 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.551, + "p99_amplification": 1.339 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.381, + "p99_amplification": 0.491 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.554, + "p99_amplification": 0.534 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.723, + "p99_amplification": 0.325 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.64, + "p99_amplification": 0.525 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.497, + "p99_amplification": 0.342 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.648, + "p99_amplification": 1.883 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.614, + "p99_amplification": 0.686 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.759, + "p99_amplification": 0.771 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.394, + "p99_amplification": 0.583 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.536, + "p99_amplification": 0.655 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.714, + "p99_amplification": 0.239 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.737, + "p99_amplification": 0.552 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.457, + "p99_amplification": 0.336 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.683, + "p99_amplification": 0.637 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.651, + "p99_amplification": 0.725 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.546, + "p99_amplification": 0.579 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.392, + "p99_amplification": 0.434 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.559, + "p99_amplification": 0.54 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.528, + "p99_amplification": 0.187 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.64, + "p99_amplification": 0.539 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.448, + "p99_amplification": 0.333 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.675, + "p99_amplification": 0.592 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.61, + "p99_amplification": 0.671 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.778, + "p99_amplification": 0.802 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.999, + "p99_amplification": 1.045 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.186, + "p99_amplification": 1.129 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.774, + "p99_amplification": 0.778 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.882, + "p99_amplification": 1.161 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.993, + "p99_amplification": 0.998 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.089, + "p99_amplification": 1.091 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.183, + "p99_amplification": 1.128 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.23, + "p99_amplification": 1.21 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.73, + "p99_amplification": 0.741 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.987, + "p99_amplification": 1.026 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.154, + "p99_amplification": 1.092 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.719, + "p99_amplification": 0.744 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.854, + "p99_amplification": 0.858 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.965, + "p99_amplification": 0.96 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.079, + "p99_amplification": 1.082 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.158, + "p99_amplification": 1.104 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.199, + "p99_amplification": 1.176 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.687, + "p99_amplification": 0.714 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.787, + "p99_amplification": 0.795 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.839, + "p99_amplification": 0.834 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.891, + "p99_amplification": 0.887 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.933, + "p99_amplification": 0.898 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.989, + "p99_amplification": 0.995 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.768, + "p99_amplification": 0.783 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.859, + "p99_amplification": 0.864 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.973, + "p99_amplification": 0.969 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.088, + "p99_amplification": 1.083 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.184, + "p99_amplification": 1.275 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.241, + "p99_amplification": 1.216 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.698, + "p99_amplification": 0.719 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.796, + "p99_amplification": 0.792 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.84, + "p99_amplification": 0.847 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.877, + "p99_amplification": 1.339 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.913, + "p99_amplification": 0.87 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.956, + "p99_amplification": 0.956 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.774, + "p99_amplification": 0.799 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.878, + "p99_amplification": 0.885 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.991, + "p99_amplification": 0.989 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.093, + "p99_amplification": 1.157 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.183, + "p99_amplification": 1.466 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.231, + "p99_amplification": 1.253 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.709, + "p99_amplification": 0.732 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.777, + "p99_amplification": 0.79 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.836, + "p99_amplification": 0.831 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.874, + "p99_amplification": 0.875 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.912, + "p99_amplification": 0.871 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.971, + "p99_amplification": 0.978 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.689, + "p99_amplification": 0.709 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.774, + "p99_amplification": 0.778 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.834, + "p99_amplification": 0.888 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.875, + "p99_amplification": 0.876 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.918, + "p99_amplification": 0.873 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.985, + "p99_amplification": 0.997 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.739, + "p99_amplification": 0.765 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.854, + "p99_amplification": 0.86 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.98, + "p99_amplification": 0.983 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.095, + "p99_amplification": 1.097 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.162, + "p99_amplification": 1.1 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.208, + "p99_amplification": 1.19 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.687, + "p99_amplification": 0.708 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.773, + "p99_amplification": 0.771 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.858, + "p99_amplification": 0.857 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.943, + "p99_amplification": 0.944 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.023, + "p99_amplification": 0.968 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.069, + "p99_amplification": 1.055 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.58, + "p99_amplification": 0.69 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.642, + "p99_amplification": 0.807 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.651, + "p99_amplification": 1.004 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.652, + "p99_amplification": 0.798 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.646, + "p99_amplification": 0.648 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.671, + "p99_amplification": 0.956 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.712, + "p99_amplification": 0.882 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.783, + "p99_amplification": 0.789 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.578, + "p99_amplification": 0.711 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.648, + "p99_amplification": 0.886 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.661, + "p99_amplification": 0.791 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.785, + "p99_amplification": 0.815 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.525, + "p99_amplification": 0.897 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.57, + "p99_amplification": 0.76 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.655, + "p99_amplification": 0.753 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.644, + "p99_amplification": 0.795 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.602, + "p99_amplification": 0.563 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.622, + "p99_amplification": 0.709 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.704, + "p99_amplification": 1.052 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.759, + "p99_amplification": 0.851 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.568, + "p99_amplification": 1.025 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.667, + "p99_amplification": 0.831 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.665, + "p99_amplification": 0.785 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.766, + "p99_amplification": 0.783 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.56, + "p99_amplification": 0.545 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.627, + "p99_amplification": 0.538 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.665, + "p99_amplification": 0.738 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.662, + "p99_amplification": 0.729 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.611, + "p99_amplification": 0.559 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.656, + "p99_amplification": 0.853 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.684, + "p99_amplification": 0.778 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.75, + "p99_amplification": 0.711 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.598, + "p99_amplification": 0.616 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.636, + "p99_amplification": 0.653 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.685, + "p99_amplification": 0.767 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.673, + "p99_amplification": 0.707 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.637, + "p99_amplification": 0.506 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.679, + "p99_amplification": 0.718 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.73, + "p99_amplification": 0.78 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.797, + "p99_amplification": 0.847 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.59, + "p99_amplification": 0.634 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.636, + "p99_amplification": 0.673 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.657, + "p99_amplification": 1.101 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.661, + "p99_amplification": 0.83 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.621, + "p99_amplification": 0.586 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.65, + "p99_amplification": 0.689 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.714, + "p99_amplification": 0.681 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.738, + "p99_amplification": 0.869 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.59, + "p99_amplification": 0.753 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.638, + "p99_amplification": 0.75 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.661, + "p99_amplification": 0.788 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.654, + "p99_amplification": 0.708 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.611, + "p99_amplification": 0.54 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.691, + "p99_amplification": 0.799 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.732, + "p99_amplification": 0.808 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.789, + "p99_amplification": 0.812 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.582, + "p99_amplification": 0.644 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.644, + "p99_amplification": 0.809 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.683, + "p99_amplification": 0.889 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.661, + "p99_amplification": 0.875 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.605, + "p99_amplification": 0.654 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.663, + "p99_amplification": 0.977 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.715, + "p99_amplification": 0.942 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.758, + "p99_amplification": 0.885 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.584, + "p99_amplification": 0.649 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.642, + "p99_amplification": 0.694 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.665, + "p99_amplification": 0.703 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.674, + "p99_amplification": 1.103 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.637, + "p99_amplification": 0.594 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.675, + "p99_amplification": 0.747 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.688, + "p99_amplification": 0.676 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.75, + "p99_amplification": 0.84 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.601, + "p99_amplification": 0.766 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.644, + "p99_amplification": 0.735 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.658, + "p99_amplification": 0.737 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.667, + "p99_amplification": 0.861 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.644, + "p99_amplification": 0.664 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.672, + "p99_amplification": 0.756 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.714, + "p99_amplification": 0.683 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.788, + "p99_amplification": 0.927 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.59, + "p99_amplification": 0.634 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.637, + "p99_amplification": 0.678 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.704, + "p99_amplification": 1.069 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.66, + "p99_amplification": 0.846 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.634, + "p99_amplification": 0.688 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.663, + "p99_amplification": 0.758 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.748, + "p99_amplification": 0.887 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.741, + "p99_amplification": 0.813 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.767, + "p99_amplification": 0.631 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.995, + "p99_amplification": 1.016 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.148, + "p99_amplification": 1.232 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.765, + "p99_amplification": 0.61 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.917, + "p99_amplification": 0.93 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.986, + "p99_amplification": 0.993 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.083, + "p99_amplification": 1.177 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.149, + "p99_amplification": 1.171 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.234, + "p99_amplification": 1.18 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.737, + "p99_amplification": 0.548 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.973, + "p99_amplification": 1.093 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.133, + "p99_amplification": 1.172 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.74, + "p99_amplification": 0.552 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.836, + "p99_amplification": 0.844 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.965, + "p99_amplification": 1.01 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.064, + "p99_amplification": 1.207 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.132, + "p99_amplification": 1.098 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.212, + "p99_amplification": 1.306 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.738, + "p99_amplification": 0.519 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.783, + "p99_amplification": 0.724 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.856, + "p99_amplification": 0.893 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.903, + "p99_amplification": 0.938 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.925, + "p99_amplification": 1.013 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.999, + "p99_amplification": 0.969 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.792, + "p99_amplification": 0.664 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.882, + "p99_amplification": 0.841 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.993, + "p99_amplification": 1.149 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.076, + "p99_amplification": 1.025 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.162, + "p99_amplification": 1.17 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.251, + "p99_amplification": 1.262 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.739, + "p99_amplification": 0.586 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.802, + "p99_amplification": 0.721 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.862, + "p99_amplification": 0.99 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.891, + "p99_amplification": 0.833 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.907, + "p99_amplification": 0.896 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.959, + "p99_amplification": 0.942 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.77, + "p99_amplification": 0.566 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.87, + "p99_amplification": 0.812 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.992, + "p99_amplification": 1.026 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.079, + "p99_amplification": 0.978 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.148, + "p99_amplification": 1.463 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.234, + "p99_amplification": 1.254 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.737, + "p99_amplification": 0.695 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.781, + "p99_amplification": 0.813 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.85, + "p99_amplification": 0.909 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.887, + "p99_amplification": 0.955 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.902, + "p99_amplification": 0.906 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.975, + "p99_amplification": 0.943 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.743, + "p99_amplification": 0.551 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.777, + "p99_amplification": 0.778 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.848, + "p99_amplification": 0.949 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.886, + "p99_amplification": 0.844 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.904, + "p99_amplification": 0.901 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.967, + "p99_amplification": 0.918 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.757, + "p99_amplification": 0.578 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.88, + "p99_amplification": 0.846 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.993, + "p99_amplification": 0.981 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.092, + "p99_amplification": 0.982 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.145, + "p99_amplification": 1.152 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.224, + "p99_amplification": 1.225 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.735, + "p99_amplification": 0.579 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.789, + "p99_amplification": 0.801 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.87, + "p99_amplification": 1.02 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.918, + "p99_amplification": 0.902 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.974, + "p99_amplification": 1.088 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.052, + "p99_amplification": 1.156 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.963, + "p99_amplification": 0.992 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.955, + "p99_amplification": 0.961 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.979, + "p99_amplification": 0.957 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.986, + "p99_amplification": 1.042 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.98, + "p99_amplification": 1.009 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.945, + "p99_amplification": 0.974 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.951, + "p99_amplification": 0.976 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.977, + "p99_amplification": 0.969 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.972, + "p99_amplification": 0.952 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.97, + "p99_amplification": 0.925 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.969, + "p99_amplification": 0.978 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.98, + "p99_amplification": 0.984 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.993, + "p99_amplification": 0.968 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.984, + "p99_amplification": 0.99 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.992, + "p99_amplification": 0.99 + } + ] + }, + "nccl": [ + { + "id": "cxn-a8203ce9", + "identity": "nccl|b300|all_gather|b300-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "b300", + "runner": "b300-nv_03", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "e6eafb7204b78dd3", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 186.922, + "status": "valid", + "valid": true, + "colorKey": "b300_a8203ce9", + "label": "B300 · b300-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:41.342024+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:41.342024+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 27.36, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.36, + "inPlaceUs": 27.26, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 26.88, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 26.88, + "inPlaceUs": 26.89, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 27.11, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 27.11, + "inPlaceUs": 27.07, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 26.64, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 26.64, + "inPlaceUs": 26.87, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 27.03, + "algBandwidthGbps": 0.08, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 27.03, + "inPlaceUs": 26.8, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 26.95, + "algBandwidthGbps": 0.15, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 26.95, + "inPlaceUs": 27.51, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 26.84, + "algBandwidthGbps": 0.31, + "busBandwidthGbps": 0.27, + "outOfPlaceUs": 27.05, + "inPlaceUs": 26.84, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 27.2, + "algBandwidthGbps": 0.6, + "busBandwidthGbps": 0.53, + "outOfPlaceUs": 27.2, + "inPlaceUs": 26.86, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 26.68, + "algBandwidthGbps": 1.23, + "busBandwidthGbps": 1.07, + "outOfPlaceUs": 26.98, + "inPlaceUs": 26.68, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 26.75, + "algBandwidthGbps": 2.45, + "busBandwidthGbps": 2.14, + "outOfPlaceUs": 26.89, + "inPlaceUs": 26.75, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 27.63, + "algBandwidthGbps": 4.74, + "busBandwidthGbps": 4.15, + "outOfPlaceUs": 27.63, + "inPlaceUs": 27.81, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 28.34, + "algBandwidthGbps": 9.25, + "busBandwidthGbps": 8.09, + "outOfPlaceUs": 28.34, + "inPlaceUs": 28.46, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 29.45, + "algBandwidthGbps": 17.8, + "busBandwidthGbps": 15.58, + "outOfPlaceUs": 29.49, + "inPlaceUs": 29.45, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 31.36, + "algBandwidthGbps": 33.43, + "busBandwidthGbps": 29.25, + "outOfPlaceUs": 31.51, + "inPlaceUs": 31.36, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 35.8, + "algBandwidthGbps": 58.58, + "busBandwidthGbps": 51.26, + "outOfPlaceUs": 35.94, + "inPlaceUs": 35.8, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 36.17, + "algBandwidthGbps": 115.95, + "busBandwidthGbps": 101.45, + "outOfPlaceUs": 36.29, + "inPlaceUs": 36.17, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 36.99, + "algBandwidthGbps": 226.76, + "busBandwidthGbps": 198.42, + "outOfPlaceUs": 37.02, + "inPlaceUs": 36.99, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 47.07, + "algBandwidthGbps": 356.41, + "busBandwidthGbps": 311.86, + "outOfPlaceUs": 47.08, + "inPlaceUs": 47.07, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 74.95, + "algBandwidthGbps": 447.68, + "busBandwidthGbps": 391.72, + "outOfPlaceUs": 75.78, + "inPlaceUs": 74.95, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 138.64, + "algBandwidthGbps": 484.06, + "busBandwidthGbps": 423.55, + "outOfPlaceUs": 139.26, + "inPlaceUs": 138.64, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 211.47, + "algBandwidthGbps": 634.68, + "busBandwidthGbps": 555.34, + "outOfPlaceUs": 211.47, + "inPlaceUs": 211.53, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 399.32, + "algBandwidthGbps": 672.24, + "busBandwidthGbps": 588.21, + "outOfPlaceUs": 399.32, + "inPlaceUs": 399.95, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 779.11, + "algBandwidthGbps": 689.08, + "busBandwidthGbps": 602.95, + "outOfPlaceUs": 779.96, + "inPlaceUs": 779.11, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 1532.87, + "algBandwidthGbps": 700.48, + "busBandwidthGbps": 612.92, + "outOfPlaceUs": 1533.45, + "inPlaceUs": 1532.87, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 3010.48, + "algBandwidthGbps": 713.34, + "busBandwidthGbps": 624.17, + "outOfPlaceUs": 3010.48, + "inPlaceUs": 3011.29, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 5911.41, + "algBandwidthGbps": 726.55, + "busBandwidthGbps": 635.74, + "outOfPlaceUs": 5949.57, + "inPlaceUs": 5911.41, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 11675.3, + "algBandwidthGbps": 735.74, + "busBandwidthGbps": 643.77, + "outOfPlaceUs": 11728.1, + "inPlaceUs": 11675.3, + "correct": true + } + ] + }, + { + "id": "cxn-17454439", + "identity": "nccl|h100|all_gather|h100-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "h100", + "runner": "h100-dgxc-slurm_09", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "dacea770825df094", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 110.587, + "status": "valid", + "valid": true, + "colorKey": "h100_17454439", + "label": "H100 · h100-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:57.699787+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:57.699787+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 40.4, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 40.4, + "inPlaceUs": 39.34, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 38.62, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.62, + "inPlaceUs": 38.09, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 38.41, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.41, + "inPlaceUs": 38.32, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.68, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 38.68, + "inPlaceUs": 37.58, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 37.29, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 37.29, + "inPlaceUs": 37.12, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 37.53, + "algBandwidthGbps": 0.11, + "busBandwidthGbps": 0.1, + "outOfPlaceUs": 37.53, + "inPlaceUs": 37.17, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 37.52, + "algBandwidthGbps": 0.22, + "busBandwidthGbps": 0.19, + "outOfPlaceUs": 37.52, + "inPlaceUs": 37.53, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 37.13, + "algBandwidthGbps": 0.44, + "busBandwidthGbps": 0.39, + "outOfPlaceUs": 37.13, + "inPlaceUs": 37.09, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 37.43, + "algBandwidthGbps": 0.88, + "busBandwidthGbps": 0.77, + "outOfPlaceUs": 37.43, + "inPlaceUs": 37.42, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 37.64, + "algBandwidthGbps": 1.74, + "busBandwidthGbps": 1.52, + "outOfPlaceUs": 37.64, + "inPlaceUs": 37.63, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 38.19, + "algBandwidthGbps": 3.43, + "busBandwidthGbps": 3, + "outOfPlaceUs": 38.48, + "inPlaceUs": 38.19, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 39.66, + "algBandwidthGbps": 6.61, + "busBandwidthGbps": 5.78, + "outOfPlaceUs": 39.66, + "inPlaceUs": 40.15, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 41.79, + "algBandwidthGbps": 12.55, + "busBandwidthGbps": 10.98, + "outOfPlaceUs": 42.17, + "inPlaceUs": 41.79, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 43.89, + "algBandwidthGbps": 23.89, + "busBandwidthGbps": 20.9, + "outOfPlaceUs": 45.09, + "inPlaceUs": 43.89, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 44.32, + "algBandwidthGbps": 47.31, + "busBandwidthGbps": 41.4, + "outOfPlaceUs": 44.55, + "inPlaceUs": 44.32, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 44.97, + "algBandwidthGbps": 93.27, + "busBandwidthGbps": 81.61, + "outOfPlaceUs": 44.97, + "inPlaceUs": 45, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 45.6, + "algBandwidthGbps": 183.98, + "busBandwidthGbps": 160.98, + "outOfPlaceUs": 46.08, + "inPlaceUs": 45.6, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 67.94, + "algBandwidthGbps": 246.95, + "busBandwidthGbps": 216.08, + "outOfPlaceUs": 70.1, + "inPlaceUs": 67.94, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 122.71, + "algBandwidthGbps": 273.44, + "busBandwidthGbps": 239.26, + "outOfPlaceUs": 125.34, + "inPlaceUs": 122.71, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 206.56, + "algBandwidthGbps": 324.88, + "busBandwidthGbps": 284.27, + "outOfPlaceUs": 210.98, + "inPlaceUs": 206.56, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 390.25, + "algBandwidthGbps": 343.93, + "busBandwidthGbps": 300.94, + "outOfPlaceUs": 396.19, + "inPlaceUs": 390.25, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 728.52, + "algBandwidthGbps": 368.47, + "busBandwidthGbps": 322.41, + "outOfPlaceUs": 733.59, + "inPlaceUs": 728.52, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1394.3, + "algBandwidthGbps": 385.05, + "busBandwidthGbps": 336.92, + "outOfPlaceUs": 1397.39, + "inPlaceUs": 1394.3, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2705.03, + "algBandwidthGbps": 396.94, + "busBandwidthGbps": 347.33, + "outOfPlaceUs": 2729.3, + "inPlaceUs": 2705.03, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 5306.37, + "algBandwidthGbps": 404.7, + "busBandwidthGbps": 354.11, + "outOfPlaceUs": 5374.68, + "inPlaceUs": 5306.37, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 10451.7, + "algBandwidthGbps": 410.93, + "busBandwidthGbps": 359.57, + "outOfPlaceUs": 10616.4, + "inPlaceUs": 10451.7, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 20734.1, + "algBandwidthGbps": 414.29, + "busBandwidthGbps": 362.5, + "outOfPlaceUs": 21013.2, + "inPlaceUs": 20734.1, + "correct": true + } + ] + }, + { + "id": "cxn-cc1fe619", + "identity": "nccl|h200|all_gather|h200-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "h200", + "runner": "h200-dgxc-slurm_2", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "e2f081a269356db7", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 111.028, + "status": "valid", + "valid": true, + "colorKey": "h200_cc1fe619", + "label": "H200 · h200-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:19:06.426368+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:19:06.426368+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 40.82, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 40.82, + "inPlaceUs": 37.72, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 39.21, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 39.21, + "inPlaceUs": 38.7, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 39.32, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 39.32, + "inPlaceUs": 38.61, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.35, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 38.35, + "inPlaceUs": 37.52, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 38.87, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 38.87, + "inPlaceUs": 37.95, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 39.96, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 39.96, + "inPlaceUs": 38.92, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 38.44, + "algBandwidthGbps": 0.21, + "busBandwidthGbps": 0.19, + "outOfPlaceUs": 38.44, + "inPlaceUs": 38.97, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 39.02, + "algBandwidthGbps": 0.42, + "busBandwidthGbps": 0.37, + "outOfPlaceUs": 39.02, + "inPlaceUs": 38.61, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 37.96, + "algBandwidthGbps": 0.86, + "busBandwidthGbps": 0.76, + "outOfPlaceUs": 37.96, + "inPlaceUs": 39.63, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 38.78, + "algBandwidthGbps": 1.69, + "busBandwidthGbps": 1.48, + "outOfPlaceUs": 38.78, + "inPlaceUs": 38.91, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 39.77, + "algBandwidthGbps": 3.3, + "busBandwidthGbps": 2.88, + "outOfPlaceUs": 39.77, + "inPlaceUs": 40.11, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 40.4, + "algBandwidthGbps": 6.49, + "busBandwidthGbps": 5.68, + "outOfPlaceUs": 40.56, + "inPlaceUs": 40.4, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 42.21, + "algBandwidthGbps": 12.42, + "busBandwidthGbps": 10.87, + "outOfPlaceUs": 42.21, + "inPlaceUs": 48.64, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 44.25, + "algBandwidthGbps": 23.7, + "busBandwidthGbps": 20.73, + "outOfPlaceUs": 46.55, + "inPlaceUs": 44.25, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 44.67, + "algBandwidthGbps": 46.95, + "busBandwidthGbps": 41.08, + "outOfPlaceUs": 45.93, + "inPlaceUs": 44.67, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 45.73, + "algBandwidthGbps": 91.71, + "busBandwidthGbps": 80.25, + "outOfPlaceUs": 45.73, + "inPlaceUs": 50.3, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 49.87, + "algBandwidthGbps": 168.19, + "busBandwidthGbps": 147.17, + "outOfPlaceUs": 49.87, + "inPlaceUs": 49.89, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 66.01, + "algBandwidthGbps": 254.16, + "busBandwidthGbps": 222.39, + "outOfPlaceUs": 66.91, + "inPlaceUs": 66.01, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 119.98, + "algBandwidthGbps": 279.66, + "busBandwidthGbps": 244.7, + "outOfPlaceUs": 123.43, + "inPlaceUs": 119.98, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 203.19, + "algBandwidthGbps": 330.27, + "busBandwidthGbps": 288.99, + "outOfPlaceUs": 207.29, + "inPlaceUs": 203.19, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 376.8, + "algBandwidthGbps": 356.2, + "busBandwidthGbps": 311.68, + "outOfPlaceUs": 380.65, + "inPlaceUs": 376.8, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 719.69, + "algBandwidthGbps": 372.99, + "busBandwidthGbps": 326.36, + "outOfPlaceUs": 725.33, + "inPlaceUs": 719.69, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1381.87, + "algBandwidthGbps": 388.51, + "busBandwidthGbps": 339.95, + "outOfPlaceUs": 1395.46, + "inPlaceUs": 1381.87, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2707.03, + "algBandwidthGbps": 396.65, + "busBandwidthGbps": 347.07, + "outOfPlaceUs": 2726.86, + "inPlaceUs": 2707.03, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 5309.69, + "algBandwidthGbps": 404.45, + "busBandwidthGbps": 353.89, + "outOfPlaceUs": 5364.37, + "inPlaceUs": 5309.69, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 10464.7, + "algBandwidthGbps": 410.42, + "busBandwidthGbps": 359.12, + "outOfPlaceUs": 10637.1, + "inPlaceUs": 10464.7, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 20742.5, + "algBandwidthGbps": 414.12, + "busBandwidthGbps": 362.36, + "outOfPlaceUs": 21038.3, + "inPlaceUs": 20742.5, + "correct": true + } + ] + }, + { + "id": "cxn-e1de3b53", + "identity": "nccl|mi355x|all_gather|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_gather", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "8f8417874bf37410", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 114.277, + "status": "valid", + "valid": true, + "colorKey": "mi355x_e1de3b53", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-06-29T02:39:13.078018+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T02:39:13.078018+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 187.5, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 187.5, + "inPlaceUs": 204.9, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 176.3, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 176.3, + "inPlaceUs": 202.7, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 176.5, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 176.5, + "inPlaceUs": 204, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 173.8, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 173.8, + "inPlaceUs": 170.9, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 177.9, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 177.9, + "inPlaceUs": 171.6, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 175.4, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 175.4, + "inPlaceUs": 171.3, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 173.1, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 210, + "inPlaceUs": 173.1, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 172.7, + "algBandwidthGbps": 0.09, + "busBandwidthGbps": 0.08, + "outOfPlaceUs": 210.5, + "inPlaceUs": 172.7, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 173.2, + "algBandwidthGbps": 0.19, + "busBandwidthGbps": 0.17, + "outOfPlaceUs": 210.5, + "inPlaceUs": 173.2, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 177.8, + "algBandwidthGbps": 0.37, + "busBandwidthGbps": 0.32, + "outOfPlaceUs": 215.9, + "inPlaceUs": 177.8, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 126.3, + "algBandwidthGbps": 1.04, + "busBandwidthGbps": 0.91, + "outOfPlaceUs": 223.8, + "inPlaceUs": 126.3, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 136.7, + "algBandwidthGbps": 1.92, + "busBandwidthGbps": 1.68, + "outOfPlaceUs": 139.9, + "inPlaceUs": 136.7, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 137.9, + "algBandwidthGbps": 3.8, + "busBandwidthGbps": 3.33, + "outOfPlaceUs": 140.8, + "inPlaceUs": 137.9, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 124.5, + "algBandwidthGbps": 8.42, + "busBandwidthGbps": 7.37, + "outOfPlaceUs": 124.5, + "inPlaceUs": 142, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 129.4, + "algBandwidthGbps": 16.21, + "busBandwidthGbps": 14.18, + "outOfPlaceUs": 129.4, + "inPlaceUs": 148.6, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 140.7, + "algBandwidthGbps": 29.82, + "busBandwidthGbps": 26.09, + "outOfPlaceUs": 140.7, + "inPlaceUs": 158.2, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 141.3, + "algBandwidthGbps": 59.35, + "busBandwidthGbps": 51.93, + "outOfPlaceUs": 141.3, + "inPlaceUs": 158.8, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 88.34, + "algBandwidthGbps": 189.91, + "busBandwidthGbps": 166.17, + "outOfPlaceUs": 128.5, + "inPlaceUs": 88.34, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 104.3, + "algBandwidthGbps": 321.59, + "busBandwidthGbps": 281.39, + "outOfPlaceUs": 142.4, + "inPlaceUs": 104.3, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 174.1, + "algBandwidthGbps": 385.36, + "busBandwidthGbps": 337.19, + "outOfPlaceUs": 174.1, + "inPlaceUs": 174.4, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 320.4, + "algBandwidthGbps": 418.92, + "busBandwidthGbps": 366.55, + "outOfPlaceUs": 320.9, + "inPlaceUs": 320.4, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 616.6, + "algBandwidthGbps": 435.32, + "busBandwidthGbps": 380.9, + "outOfPlaceUs": 625.8, + "inPlaceUs": 616.6, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1206.3, + "algBandwidthGbps": 445.05, + "busBandwidthGbps": 389.42, + "outOfPlaceUs": 1207.7, + "inPlaceUs": 1206.3, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2396.4, + "algBandwidthGbps": 448.06, + "busBandwidthGbps": 392.06, + "outOfPlaceUs": 2396.4, + "inPlaceUs": 2399, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4736.7, + "algBandwidthGbps": 453.37, + "busBandwidthGbps": 396.7, + "outOfPlaceUs": 4750.2, + "inPlaceUs": 4736.7, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9395.7, + "algBandwidthGbps": 457.12, + "busBandwidthGbps": 399.98, + "outOfPlaceUs": 9395.7, + "inPlaceUs": 9416.8, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 18643, + "algBandwidthGbps": 460.77, + "busBandwidthGbps": 403.17, + "outOfPlaceUs": 18643, + "inPlaceUs": 18899, + "correct": true + } + ] + }, + { + "id": "cxn-940e3e1c", + "identity": "nccl|b300|all_reduce|b300-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_03", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "139076c9959b0653", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 218.816, + "status": "valid", + "valid": true, + "colorKey": "b300_940e3e1c", + "label": "B300 · b300-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:24.142157+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:24.142157+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 28.3, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 28.3, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 27.27, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.27, + "inPlaceUs": 27.06, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 27.25, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.25, + "inPlaceUs": 27.3, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 27.32, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.32, + "inPlaceUs": 27.28, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 27.42, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 27.42, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 27.26, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 27.26, + "inPlaceUs": 27.32, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 27.16, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 27.16, + "inPlaceUs": 27.38, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 27.33, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 27.33, + "inPlaceUs": 27.14, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 27.36, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 27.36, + "inPlaceUs": 27.33, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 27.3, + "algBandwidthGbps": 0.15, + "busBandwidthGbps": 0.26, + "outOfPlaceUs": 27.3, + "inPlaceUs": 27.35, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 27.52, + "algBandwidthGbps": 0.3, + "busBandwidthGbps": 0.52, + "outOfPlaceUs": 27.52, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 27.64, + "algBandwidthGbps": 0.59, + "busBandwidthGbps": 1.04, + "outOfPlaceUs": 27.64, + "inPlaceUs": 27.61, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 27.76, + "algBandwidthGbps": 1.18, + "busBandwidthGbps": 2.07, + "outOfPlaceUs": 27.76, + "inPlaceUs": 27.85, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 27.7, + "algBandwidthGbps": 2.37, + "busBandwidthGbps": 4.14, + "outOfPlaceUs": 28.19, + "inPlaceUs": 27.7, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 28.21, + "algBandwidthGbps": 4.65, + "busBandwidthGbps": 8.13, + "outOfPlaceUs": 28.59, + "inPlaceUs": 28.21, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 28.56, + "algBandwidthGbps": 9.18, + "busBandwidthGbps": 16.06, + "outOfPlaceUs": 29.16, + "inPlaceUs": 28.56, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 29.89, + "algBandwidthGbps": 17.54, + "busBandwidthGbps": 30.7, + "outOfPlaceUs": 29.89, + "inPlaceUs": 29.93, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 32.16, + "algBandwidthGbps": 32.61, + "busBandwidthGbps": 57.06, + "outOfPlaceUs": 32.16, + "inPlaceUs": 32.67, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 37.47, + "algBandwidthGbps": 55.97, + "busBandwidthGbps": 97.94, + "outOfPlaceUs": 37.47, + "inPlaceUs": 38.07, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 56.79, + "algBandwidthGbps": 73.86, + "busBandwidthGbps": 129.26, + "outOfPlaceUs": 56.88, + "inPlaceUs": 56.79, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 77.08, + "algBandwidthGbps": 108.83, + "busBandwidthGbps": 190.45, + "outOfPlaceUs": 78.24, + "inPlaceUs": 77.08, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 104.77, + "algBandwidthGbps": 160.14, + "busBandwidthGbps": 280.24, + "outOfPlaceUs": 106.93, + "inPlaceUs": 104.77, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 166.18, + "algBandwidthGbps": 201.91, + "busBandwidthGbps": 353.34, + "outOfPlaceUs": 168.44, + "inPlaceUs": 166.18, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 274.52, + "algBandwidthGbps": 244.46, + "busBandwidthGbps": 427.8, + "outOfPlaceUs": 274.52, + "inPlaceUs": 275.23, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 391.34, + "algBandwidthGbps": 342.97, + "busBandwidthGbps": 600.19, + "outOfPlaceUs": 391.34, + "inPlaceUs": 392.6, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 711.09, + "algBandwidthGbps": 377.5, + "busBandwidthGbps": 660.62, + "outOfPlaceUs": 711.09, + "inPlaceUs": 712.3, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1324.96, + "algBandwidthGbps": 405.2, + "busBandwidthGbps": 709.1, + "outOfPlaceUs": 1324.96, + "inPlaceUs": 1327.33, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2553.96, + "algBandwidthGbps": 420.42, + "busBandwidthGbps": 735.74, + "outOfPlaceUs": 2558.96, + "inPlaceUs": 2553.96, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4571.5, + "algBandwidthGbps": 469.75, + "busBandwidthGbps": 822.07, + "outOfPlaceUs": 4576.46, + "inPlaceUs": 4571.5, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9024.56, + "algBandwidthGbps": 475.92, + "busBandwidthGbps": 832.86, + "outOfPlaceUs": 9034.78, + "inPlaceUs": 9024.56, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 17971.9, + "algBandwidthGbps": 477.96, + "busBandwidthGbps": 836.44, + "outOfPlaceUs": 17991.5, + "inPlaceUs": 17971.9, + "correct": true + } + ] + }, + { + "id": "cxn-fd5a787b", + "identity": "allreduce-fw|b300|flashinfer-oneshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "81bfaa10f5beda36", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_fd5a787b", + "label": "B300 · flashinfer-oneshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 11.661, + "algBandwidthGbps": 0.351, + "busBandwidthGbps": 0.615, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 11.601, + "algBandwidthGbps": 1.412, + "busBandwidthGbps": 2.472, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 12.381, + "algBandwidthGbps": 5.293, + "busBandwidthGbps": 9.263, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 14.274, + "algBandwidthGbps": 18.365, + "busBandwidthGbps": 32.139, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 23.854, + "algBandwidthGbps": 43.958, + "busBandwidthGbps": 76.926, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 75.394, + "algBandwidthGbps": 55.632, + "busBandwidthGbps": 97.356, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 244.644, + "algBandwidthGbps": 68.578, + "busBandwidthGbps": 120.011, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 956.149, + "algBandwidthGbps": 70.187, + "busBandwidthGbps": 122.827, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-087af4ad", + "identity": "allreduce-fw|b300|flashinfer-twoshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "183298dcd11c3e1e", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_087af4ad", + "label": "B300 · flashinfer-twoshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 385.191, + "algBandwidthGbps": 0.17, + "busBandwidthGbps": 0.298, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 118.644, + "algBandwidthGbps": 2.209, + "busBandwidthGbps": 3.867, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 47.46, + "algBandwidthGbps": 22.094, + "busBandwidthGbps": 38.664, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 43.002, + "algBandwidthGbps": 97.537, + "busBandwidthGbps": 170.69, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 90.81, + "algBandwidthGbps": 184.75, + "busBandwidthGbps": 323.313, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 353.165, + "algBandwidthGbps": 190.021, + "busBandwidthGbps": 332.537, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-345c72e8", + "identity": "allreduce-fw|b300|nccl|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "9c254fab92b5fac7", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_345c72e8", + "label": "B300 · nccl (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1024, + "dtype": "bf16", + "latencyUs": 51, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.035, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 29.788, + "algBandwidthGbps": 0.138, + "busBandwidthGbps": 0.241, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 25.746, + "algBandwidthGbps": 0.636, + "busBandwidthGbps": 1.114, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 43.559, + "algBandwidthGbps": 1.505, + "busBandwidthGbps": 2.633, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 27.737, + "algBandwidthGbps": 9.451, + "busBandwidthGbps": 16.539, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 29.05, + "algBandwidthGbps": 36.096, + "busBandwidthGbps": 63.168, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 52.692, + "algBandwidthGbps": 79.601, + "busBandwidthGbps": 139.301, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 95.558, + "algBandwidthGbps": 175.571, + "busBandwidthGbps": 307.25, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 302.87, + "algBandwidthGbps": 221.577, + "busBandwidthGbps": 387.759, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-18cb0223", + "identity": "allreduce-fw|h100|flashinfer-oneshot|h100-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "h100", + "runner": "h100-dgxc-slurm_17", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "2876f45736ca183e", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "h100_18cb0223", + "label": "H100 · flashinfer-oneshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:32.393320+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:32.393320+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 19.209, + "algBandwidthGbps": 0.213, + "busBandwidthGbps": 0.373, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 18.247, + "algBandwidthGbps": 0.898, + "busBandwidthGbps": 1.571, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 18.258, + "algBandwidthGbps": 3.589, + "busBandwidthGbps": 6.282, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 17.969, + "algBandwidthGbps": 14.589, + "busBandwidthGbps": 25.531, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 32.62, + "algBandwidthGbps": 32.145, + "busBandwidthGbps": 56.254, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 119.14, + "algBandwidthGbps": 35.205, + "busBandwidthGbps": 61.609, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 464.128, + "algBandwidthGbps": 36.148, + "busBandwidthGbps": 63.259, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 1854.815, + "algBandwidthGbps": 36.181, + "busBandwidthGbps": 63.317, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-5a5e256d", + "identity": "allreduce-fw|h100|flashinfer-twoshot|h100-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "h100", + "runner": "h100-dgxc-slurm_17", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "3914980c40380611", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "h100_5a5e256d", + "label": "H100 · flashinfer-twoshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:32.393320+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:32.393320+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 261.726, + "algBandwidthGbps": 0.25, + "busBandwidthGbps": 0.438, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 85.069, + "algBandwidthGbps": 3.082, + "busBandwidthGbps": 5.393, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 36.265, + "algBandwidthGbps": 28.914, + "busBandwidthGbps": 50.6, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 53.77, + "algBandwidthGbps": 78.004, + "busBandwidthGbps": 136.507, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 144.773, + "algBandwidthGbps": 115.886, + "busBandwidthGbps": 202.801, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 584.195, + "algBandwidthGbps": 114.874, + "busBandwidthGbps": 201.03, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-4676ac48", + "identity": "nccl|h100|all_reduce|h100-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "h100", + "runner": "h100-dgxc-slurm_09", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "059665d8b168a0d7", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 145.585, + "status": "valid", + "valid": true, + "colorKey": "h100_4676ac48", + "label": "H100 · h100-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:41.017727+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:41.017727+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 108.66, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 108.66, + "inPlaceUs": 38.99, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 39.33, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 39.33, + "inPlaceUs": 38.7, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 73.95, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 73.95, + "inPlaceUs": 38.72, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 39.17, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 39.17, + "inPlaceUs": 38.71, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 39.12, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 39.12, + "inPlaceUs": 38.4, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 38.88, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.88, + "inPlaceUs": 38.41, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 39.08, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 39.08, + "inPlaceUs": 38.59, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.88, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 38.88, + "inPlaceUs": 38.83, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 39.58, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 39.58, + "inPlaceUs": 39.25, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 38.69, + "algBandwidthGbps": 0.11, + "busBandwidthGbps": 0.19, + "outOfPlaceUs": 38.94, + "inPlaceUs": 38.69, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 38.69, + "algBandwidthGbps": 0.21, + "busBandwidthGbps": 0.37, + "outOfPlaceUs": 38.69, + "inPlaceUs": 39.4, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 39.08, + "algBandwidthGbps": 0.42, + "busBandwidthGbps": 0.73, + "outOfPlaceUs": 39.08, + "inPlaceUs": 39.06, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 38.26, + "algBandwidthGbps": 0.86, + "busBandwidthGbps": 1.5, + "outOfPlaceUs": 39.3, + "inPlaceUs": 38.26, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 38.71, + "algBandwidthGbps": 1.69, + "busBandwidthGbps": 2.96, + "outOfPlaceUs": 38.95, + "inPlaceUs": 38.71, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 39.68, + "algBandwidthGbps": 3.3, + "busBandwidthGbps": 5.78, + "outOfPlaceUs": 40.2, + "inPlaceUs": 39.68, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 40.97, + "algBandwidthGbps": 6.4, + "busBandwidthGbps": 11.2, + "outOfPlaceUs": 41.31, + "inPlaceUs": 40.97, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 43.56, + "algBandwidthGbps": 12.04, + "busBandwidthGbps": 21.06, + "outOfPlaceUs": 43.56, + "inPlaceUs": 43.68, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 46.56, + "algBandwidthGbps": 22.52, + "busBandwidthGbps": 39.42, + "outOfPlaceUs": 46.76, + "inPlaceUs": 46.56, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 44.32, + "algBandwidthGbps": 47.32, + "busBandwidthGbps": 82.81, + "outOfPlaceUs": 44.44, + "inPlaceUs": 44.32, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 55.78, + "algBandwidthGbps": 75.19, + "busBandwidthGbps": 131.58, + "outOfPlaceUs": 56.11, + "inPlaceUs": 55.78, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 83.88, + "algBandwidthGbps": 100, + "busBandwidthGbps": 175.01, + "outOfPlaceUs": 85.22, + "inPlaceUs": 83.88, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 125.29, + "algBandwidthGbps": 133.91, + "busBandwidthGbps": 234.34, + "outOfPlaceUs": 125.73, + "inPlaceUs": 125.29, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 200.68, + "algBandwidthGbps": 167.2, + "busBandwidthGbps": 292.6, + "outOfPlaceUs": 200.82, + "inPlaceUs": 200.68, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 325, + "algBandwidthGbps": 206.49, + "busBandwidthGbps": 361.36, + "outOfPlaceUs": 325, + "inPlaceUs": 325.69, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 585.92, + "algBandwidthGbps": 229.07, + "busBandwidthGbps": 400.87, + "outOfPlaceUs": 585.97, + "inPlaceUs": 585.92, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 1110.23, + "algBandwidthGbps": 241.78, + "busBandwidthGbps": 423.12, + "outOfPlaceUs": 1111.7, + "inPlaceUs": 1110.23, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 2145.48, + "algBandwidthGbps": 250.23, + "busBandwidthGbps": 437.91, + "outOfPlaceUs": 2145.48, + "inPlaceUs": 2147.26, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 4026.19, + "algBandwidthGbps": 266.69, + "busBandwidthGbps": 466.71, + "outOfPlaceUs": 4026.19, + "inPlaceUs": 4031.14, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 7957.67, + "algBandwidthGbps": 269.86, + "busBandwidthGbps": 472.26, + "outOfPlaceUs": 7958.73, + "inPlaceUs": 7957.67, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 15778.7, + "algBandwidthGbps": 272.2, + "busBandwidthGbps": 476.35, + "outOfPlaceUs": 15778.7, + "inPlaceUs": 15787, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 31394.3, + "algBandwidthGbps": 273.61, + "busBandwidthGbps": 478.83, + "outOfPlaceUs": 31404.3, + "inPlaceUs": 31394.3, + "correct": true + } + ] + }, + { + "id": "cxn-ae07ad9c", + "identity": "allreduce-fw|h100|nccl|h100-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "h100", + "runner": "h100-dgxc-slurm_17", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "aa6fba4338779d59", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "h100_ae07ad9c", + "label": "H100 · nccl (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:32.393320+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:32.393320+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1024, + "dtype": "bf16", + "latencyUs": 32.458, + "algBandwidthGbps": 0.032, + "busBandwidthGbps": 0.055, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 30.771, + "algBandwidthGbps": 0.133, + "busBandwidthGbps": 0.233, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 31.116, + "algBandwidthGbps": 0.527, + "busBandwidthGbps": 0.921, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 61.512, + "algBandwidthGbps": 1.065, + "busBandwidthGbps": 1.864, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 30.758, + "algBandwidthGbps": 8.523, + "busBandwidthGbps": 14.915, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 33.86, + "algBandwidthGbps": 30.968, + "busBandwidthGbps": 54.194, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 84.309, + "algBandwidthGbps": 49.749, + "busBandwidthGbps": 87.061, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 118.376, + "algBandwidthGbps": 141.728, + "busBandwidthGbps": 248.024, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 322.062, + "algBandwidthGbps": 208.372, + "busBandwidthGbps": 364.652, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-83a9e484", + "identity": "nccl|h200|all_reduce|h200-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "h200", + "runner": "h200-dgxc-slurm_2", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "9171bd1206f1d15c", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 147.096, + "status": "valid", + "valid": true, + "colorKey": "h200_83a9e484", + "label": "H200 · h200-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:51.255960+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:51.255960+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 46.19, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 46.19, + "inPlaceUs": 45.4, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 46.3, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 46.3, + "inPlaceUs": 49.15, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 43.48, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 43.48, + "inPlaceUs": 41.06, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 40.84, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 40.84, + "inPlaceUs": 40.88, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 39.89, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 39.89, + "inPlaceUs": 43.96, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 43.77, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 43.77, + "inPlaceUs": 44.61, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 40.31, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 40.31, + "inPlaceUs": 41.46, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 40.55, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 40.55, + "inPlaceUs": 41.64, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 40.83, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 43.27, + "inPlaceUs": 40.83, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 41.81, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.17, + "outOfPlaceUs": 41.81, + "inPlaceUs": 41.67, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 41.55, + "algBandwidthGbps": 0.2, + "busBandwidthGbps": 0.35, + "outOfPlaceUs": 41.69, + "inPlaceUs": 41.55, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 40.78, + "algBandwidthGbps": 0.4, + "busBandwidthGbps": 0.7, + "outOfPlaceUs": 44.62, + "inPlaceUs": 40.78, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 40.5, + "algBandwidthGbps": 0.81, + "busBandwidthGbps": 1.42, + "outOfPlaceUs": 41.35, + "inPlaceUs": 40.5, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 41.2, + "algBandwidthGbps": 1.59, + "busBandwidthGbps": 2.78, + "outOfPlaceUs": 45.65, + "inPlaceUs": 41.2, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 44.7, + "algBandwidthGbps": 2.93, + "busBandwidthGbps": 5.13, + "outOfPlaceUs": 44.7, + "inPlaceUs": 45.23, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 42.45, + "algBandwidthGbps": 6.18, + "busBandwidthGbps": 10.81, + "outOfPlaceUs": 43.35, + "inPlaceUs": 42.45, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 45.27, + "algBandwidthGbps": 11.58, + "busBandwidthGbps": 20.27, + "outOfPlaceUs": 45.27, + "inPlaceUs": 50.92, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 47.39, + "algBandwidthGbps": 22.13, + "busBandwidthGbps": 38.72, + "outOfPlaceUs": 47.39, + "inPlaceUs": 48.94, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 45.86, + "algBandwidthGbps": 45.73, + "busBandwidthGbps": 80.02, + "outOfPlaceUs": 48.22, + "inPlaceUs": 45.86, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 54.76, + "algBandwidthGbps": 76.6, + "busBandwidthGbps": 134.05, + "outOfPlaceUs": 54.92, + "inPlaceUs": 54.76, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 82.71, + "algBandwidthGbps": 101.42, + "busBandwidthGbps": 177.49, + "outOfPlaceUs": 83.49, + "inPlaceUs": 82.71, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 124.23, + "algBandwidthGbps": 135.05, + "busBandwidthGbps": 236.34, + "outOfPlaceUs": 125.83, + "inPlaceUs": 124.23, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 199.57, + "algBandwidthGbps": 168.13, + "busBandwidthGbps": 294.23, + "outOfPlaceUs": 199.57, + "inPlaceUs": 199.89, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 321.61, + "algBandwidthGbps": 208.67, + "busBandwidthGbps": 365.16, + "outOfPlaceUs": 321.61, + "inPlaceUs": 322.55, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 571.39, + "algBandwidthGbps": 234.9, + "busBandwidthGbps": 411.07, + "outOfPlaceUs": 573.19, + "inPlaceUs": 571.39, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 1073.14, + "algBandwidthGbps": 250.14, + "busBandwidthGbps": 437.75, + "outOfPlaceUs": 1073.14, + "inPlaceUs": 1076.11, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 2090.9, + "algBandwidthGbps": 256.77, + "busBandwidthGbps": 449.34, + "outOfPlaceUs": 2091.74, + "inPlaceUs": 2090.9, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 4010.65, + "algBandwidthGbps": 267.72, + "busBandwidthGbps": 468.51, + "outOfPlaceUs": 4013.31, + "inPlaceUs": 4010.65, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 7917.63, + "algBandwidthGbps": 271.23, + "busBandwidthGbps": 474.65, + "outOfPlaceUs": 7920.22, + "inPlaceUs": 7917.63, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 15691.2, + "algBandwidthGbps": 273.72, + "busBandwidthGbps": 479.01, + "outOfPlaceUs": 15691.2, + "inPlaceUs": 15701, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 31241, + "algBandwidthGbps": 274.96, + "busBandwidthGbps": 481.17, + "outOfPlaceUs": 31280.5, + "inPlaceUs": 31241, + "correct": true + } + ] + }, + { + "id": "cxn-be6147f8", + "identity": "nccl|mi355x|all_reduce|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "643cf957198f1634", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 132.947, + "status": "valid", + "valid": true, + "colorKey": "mi355x_be6147f8", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-06-29T02:37:18.096029+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T02:37:18.096029+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 70.23, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 70.23, + "inPlaceUs": 55.94, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 58.71, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.71, + "inPlaceUs": 59.19, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 58.37, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.37, + "inPlaceUs": 58.26, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 58.11, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.11, + "inPlaceUs": 67.17, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 58.88, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.88, + "inPlaceUs": 68.44, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 57.68, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 57.68, + "inPlaceUs": 68.1, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 57.2, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 57.2, + "inPlaceUs": 68.38, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 42.33, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 42.33, + "inPlaceUs": 56.59, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 56.27, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.06, + "outOfPlaceUs": 56.27, + "inPlaceUs": 57.42, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 56.73, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 56.73, + "inPlaceUs": 57.52, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 46.68, + "algBandwidthGbps": 0.18, + "busBandwidthGbps": 0.31, + "outOfPlaceUs": 56.91, + "inPlaceUs": 46.68, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 56.73, + "algBandwidthGbps": 0.29, + "busBandwidthGbps": 0.51, + "outOfPlaceUs": 56.73, + "inPlaceUs": 58.19, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 57.45, + "algBandwidthGbps": 0.57, + "busBandwidthGbps": 1, + "outOfPlaceUs": 57.45, + "inPlaceUs": 58.35, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 48.73, + "algBandwidthGbps": 1.34, + "busBandwidthGbps": 2.35, + "outOfPlaceUs": 57.81, + "inPlaceUs": 48.73, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 61.94, + "algBandwidthGbps": 2.12, + "busBandwidthGbps": 3.7, + "outOfPlaceUs": 61.94, + "inPlaceUs": 72.74, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 62.66, + "algBandwidthGbps": 4.18, + "busBandwidthGbps": 7.32, + "outOfPlaceUs": 71.87, + "inPlaceUs": 62.66, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 72.34, + "algBandwidthGbps": 7.25, + "busBandwidthGbps": 12.68, + "outOfPlaceUs": 73.07, + "inPlaceUs": 72.34, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 65.42, + "algBandwidthGbps": 16.03, + "busBandwidthGbps": 28.05, + "outOfPlaceUs": 65.42, + "inPlaceUs": 76.09, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 72.46, + "algBandwidthGbps": 28.94, + "busBandwidthGbps": 50.65, + "outOfPlaceUs": 73.93, + "inPlaceUs": 72.46, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 62.19, + "algBandwidthGbps": 67.44, + "busBandwidthGbps": 118.02, + "outOfPlaceUs": 63.37, + "inPlaceUs": 62.19, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 76.29, + "algBandwidthGbps": 109.96, + "busBandwidthGbps": 192.43, + "outOfPlaceUs": 80.46, + "inPlaceUs": 76.29, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 119, + "algBandwidthGbps": 141.02, + "busBandwidthGbps": 246.78, + "outOfPlaceUs": 119, + "inPlaceUs": 127.6, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 183, + "algBandwidthGbps": 183.36, + "busBandwidthGbps": 320.89, + "outOfPlaceUs": 184.4, + "inPlaceUs": 183, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 323.3, + "algBandwidthGbps": 207.56, + "busBandwidthGbps": 363.23, + "outOfPlaceUs": 323.9, + "inPlaceUs": 323.3, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 613.1, + "algBandwidthGbps": 218.91, + "busBandwidthGbps": 383.09, + "outOfPlaceUs": 623.4, + "inPlaceUs": 613.1, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 1191.6, + "algBandwidthGbps": 225.28, + "busBandwidthGbps": 394.24, + "outOfPlaceUs": 1191.6, + "inPlaceUs": 1192.7, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 2349.2, + "algBandwidthGbps": 228.54, + "busBandwidthGbps": 399.94, + "outOfPlaceUs": 2349.2, + "inPlaceUs": 2353.4, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 4668.6, + "algBandwidthGbps": 229.99, + "busBandwidthGbps": 402.49, + "outOfPlaceUs": 4668.6, + "inPlaceUs": 4671.6, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 9245.8, + "algBandwidthGbps": 232.27, + "busBandwidthGbps": 406.47, + "outOfPlaceUs": 9245.8, + "inPlaceUs": 9250.8, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 18524, + "algBandwidthGbps": 231.86, + "busBandwidthGbps": 405.76, + "outOfPlaceUs": 18543, + "inPlaceUs": 18524, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 37129, + "algBandwidthGbps": 231.36, + "busBandwidthGbps": 404.87, + "outOfPlaceUs": 37129, + "inPlaceUs": 37136, + "correct": true + } + ] + }, + { + "id": "cxn-300783f6", + "identity": "allreduce-fw|mi355x|nccl|mi355x-xgmi|xgmi|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "mi355x", + "runner": "mi355x-amds_02", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "cffcc3132d487de4", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "mi355x_300783f6", + "label": "MI355X · nccl (fw-AR · ws8)", + "generatedAt": "2026-06-28T05:14:21.326557+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T05:14:21.326557+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1024, + "dtype": "bf16", + "latencyUs": 43.632, + "algBandwidthGbps": 0.023, + "busBandwidthGbps": 0.041, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 28.193, + "algBandwidthGbps": 0.145, + "busBandwidthGbps": 0.254, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 26.58, + "algBandwidthGbps": 0.616, + "busBandwidthGbps": 1.079, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 26.654, + "algBandwidthGbps": 2.459, + "busBandwidthGbps": 4.303, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 28.918, + "algBandwidthGbps": 9.065, + "busBandwidthGbps": 15.864, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 35.083, + "algBandwidthGbps": 29.889, + "busBandwidthGbps": 52.305, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 56.62, + "algBandwidthGbps": 74.078, + "busBandwidthGbps": 129.636, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 115.37, + "algBandwidthGbps": 145.42, + "busBandwidthGbps": 254.486, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 361.633, + "algBandwidthGbps": 185.572, + "busBandwidthGbps": 324.75, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-35eb6655", + "identity": "nccl|mi355x|alltoall|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "alltoall", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "67a9b0532a278ee9", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 108.705, + "status": "valid", + "valid": true, + "colorKey": "mi355x_35eb6655", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-06-29T02:42:52.989210+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T02:42:52.989210+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 35.84, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 35.84, + "inPlaceUs": 77.06, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 49.55, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0, + "outOfPlaceUs": 49.55, + "inPlaceUs": 72.32, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 49.25, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 49.25, + "inPlaceUs": 82.36, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.87, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 38.87, + "inPlaceUs": 71.67, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 38.2, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 38.2, + "inPlaceUs": 81.13, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 38.76, + "algBandwidthGbps": 0.11, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 38.76, + "inPlaceUs": 71.12, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 48.95, + "algBandwidthGbps": 0.17, + "busBandwidthGbps": 0.15, + "outOfPlaceUs": 48.95, + "inPlaceUs": 70.91, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 50.41, + "algBandwidthGbps": 0.32, + "busBandwidthGbps": 0.28, + "outOfPlaceUs": 50.41, + "inPlaceUs": 81.47, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 50.09, + "algBandwidthGbps": 0.65, + "busBandwidthGbps": 0.57, + "outOfPlaceUs": 50.09, + "inPlaceUs": 71.15, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 49.62, + "algBandwidthGbps": 1.32, + "busBandwidthGbps": 1.16, + "outOfPlaceUs": 49.62, + "inPlaceUs": 82.19, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 49.18, + "algBandwidthGbps": 2.67, + "busBandwidthGbps": 2.33, + "outOfPlaceUs": 49.18, + "inPlaceUs": 75.31, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 62.15, + "algBandwidthGbps": 4.22, + "busBandwidthGbps": 3.69, + "outOfPlaceUs": 62.15, + "inPlaceUs": 80.96, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 62.11, + "algBandwidthGbps": 8.44, + "busBandwidthGbps": 7.39, + "outOfPlaceUs": 62.11, + "inPlaceUs": 90.5, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 53.07, + "algBandwidthGbps": 19.76, + "busBandwidthGbps": 17.29, + "outOfPlaceUs": 53.07, + "inPlaceUs": 72.69, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 63.56, + "algBandwidthGbps": 32.99, + "busBandwidthGbps": 28.87, + "outOfPlaceUs": 63.56, + "inPlaceUs": 85.67, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 77.11, + "algBandwidthGbps": 54.39, + "busBandwidthGbps": 47.59, + "outOfPlaceUs": 77.11, + "inPlaceUs": 91.21, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 69.63, + "algBandwidthGbps": 120.48, + "busBandwidthGbps": 105.42, + "outOfPlaceUs": 69.63, + "inPlaceUs": 83.26, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 71.3, + "algBandwidthGbps": 235.3, + "busBandwidthGbps": 205.89, + "outOfPlaceUs": 71.3, + "inPlaceUs": 93.33, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 104.3, + "algBandwidthGbps": 321.79, + "busBandwidthGbps": 281.57, + "outOfPlaceUs": 104.3, + "inPlaceUs": 108.6, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 195.8, + "algBandwidthGbps": 342.66, + "busBandwidthGbps": 299.83, + "outOfPlaceUs": 195.8, + "inPlaceUs": 196.2, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 365.1, + "algBandwidthGbps": 367.59, + "busBandwidthGbps": 321.64, + "outOfPlaceUs": 365.1, + "inPlaceUs": 365.9, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 697.9, + "algBandwidthGbps": 384.61, + "busBandwidthGbps": 336.54, + "outOfPlaceUs": 698.5, + "inPlaceUs": 697.9, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1353.3, + "algBandwidthGbps": 396.7, + "busBandwidthGbps": 347.11, + "outOfPlaceUs": 1353.3, + "inPlaceUs": 1355.9, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2675.6, + "algBandwidthGbps": 401.32, + "busBandwidthGbps": 351.15, + "outOfPlaceUs": 2675.6, + "inPlaceUs": 2679, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 5296.7, + "algBandwidthGbps": 405.43, + "busBandwidthGbps": 354.76, + "outOfPlaceUs": 5301, + "inPlaceUs": 5296.7, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 10543, + "algBandwidthGbps": 407.38, + "busBandwidthGbps": 356.46, + "outOfPlaceUs": 10543, + "inPlaceUs": 10668, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 21021, + "algBandwidthGbps": 408.63, + "busBandwidthGbps": 357.55, + "outOfPlaceUs": 21021, + "inPlaceUs": 21415, + "correct": true + } + ] + }, + { + "id": "cxn-9383336f", + "identity": "nccl|mi355x|reduce_scatter|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "reduce_scatter", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "fd5d1a361a3ebfa3", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 116.588, + "status": "valid", + "valid": true, + "colorKey": "mi355x_9383336f", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-06-29T02:40:54.838353+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T02:40:54.838353+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 63.68, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 63.68, + "inPlaceUs": 71.14, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 58.12, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.12, + "inPlaceUs": 69.38, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 59.18, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 59.18, + "inPlaceUs": 70.39, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 58.61, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 58.61, + "inPlaceUs": 59.64, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 58.93, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 58.93, + "inPlaceUs": 60.04, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 59.87, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.06, + "outOfPlaceUs": 59.87, + "inPlaceUs": 59.13, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 59.51, + "algBandwidthGbps": 0.14, + "busBandwidthGbps": 0.12, + "outOfPlaceUs": 68.98, + "inPlaceUs": 59.51, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 59.64, + "algBandwidthGbps": 0.27, + "busBandwidthGbps": 0.24, + "outOfPlaceUs": 69.54, + "inPlaceUs": 59.64, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 59.88, + "algBandwidthGbps": 0.55, + "busBandwidthGbps": 0.48, + "outOfPlaceUs": 70.63, + "inPlaceUs": 59.88, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 61.5, + "algBandwidthGbps": 1.07, + "busBandwidthGbps": 0.93, + "outOfPlaceUs": 72.73, + "inPlaceUs": 61.5, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 65.15, + "algBandwidthGbps": 2.01, + "busBandwidthGbps": 1.76, + "outOfPlaceUs": 74.45, + "inPlaceUs": 65.15, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 69.72, + "algBandwidthGbps": 3.76, + "busBandwidthGbps": 3.29, + "outOfPlaceUs": 70.1, + "inPlaceUs": 69.72, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 68.53, + "algBandwidthGbps": 7.65, + "busBandwidthGbps": 6.69, + "outOfPlaceUs": 68.53, + "inPlaceUs": 68.85, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 72, + "algBandwidthGbps": 14.56, + "busBandwidthGbps": 12.74, + "outOfPlaceUs": 72, + "inPlaceUs": 83.69, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 76.95, + "algBandwidthGbps": 27.25, + "busBandwidthGbps": 23.85, + "outOfPlaceUs": 76.95, + "inPlaceUs": 86.59, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 76.39, + "algBandwidthGbps": 54.91, + "busBandwidthGbps": 48.04, + "outOfPlaceUs": 76.39, + "inPlaceUs": 87.44, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 78.25, + "algBandwidthGbps": 107.21, + "busBandwidthGbps": 93.8, + "outOfPlaceUs": 78.25, + "inPlaceUs": 89.31, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 82.07, + "algBandwidthGbps": 204.42, + "busBandwidthGbps": 178.87, + "outOfPlaceUs": 82.07, + "inPlaceUs": 86.71, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 115.6, + "algBandwidthGbps": 290.35, + "busBandwidthGbps": 254.06, + "outOfPlaceUs": 117.1, + "inPlaceUs": 115.6, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 187.1, + "algBandwidthGbps": 358.71, + "busBandwidthGbps": 313.88, + "outOfPlaceUs": 192.5, + "inPlaceUs": 187.1, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 323.5, + "algBandwidthGbps": 414.87, + "busBandwidthGbps": 363.02, + "outOfPlaceUs": 342.8, + "inPlaceUs": 323.5, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 604.1, + "algBandwidthGbps": 444.38, + "busBandwidthGbps": 388.83, + "outOfPlaceUs": 658.6, + "inPlaceUs": 604.1, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1173.6, + "algBandwidthGbps": 457.48, + "busBandwidthGbps": 400.29, + "outOfPlaceUs": 1259.9, + "inPlaceUs": 1173.6, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2311.8, + "algBandwidthGbps": 464.46, + "busBandwidthGbps": 406.4, + "outOfPlaceUs": 2481, + "inPlaceUs": 2311.8, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4567.5, + "algBandwidthGbps": 470.17, + "busBandwidthGbps": 411.4, + "outOfPlaceUs": 4853.2, + "inPlaceUs": 4567.5, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9220.5, + "algBandwidthGbps": 465.81, + "busBandwidthGbps": 407.58, + "outOfPlaceUs": 9610.1, + "inPlaceUs": 9220.5, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 19037, + "algBandwidthGbps": 451.22, + "busBandwidthGbps": 394.82, + "outOfPlaceUs": 19087, + "inPlaceUs": 19037, + "correct": true + } + ] + } + ], + "offload": [ + { + "id": "cxt-2254035a", + "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pageable|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_2254035a", + "label": "B300 · d2h · pageable", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.319, + "latency": 12.8224, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.197, + "latency": 13.6896, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.07, + "latency": 16.1008, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.171, + "latency": 25.7744, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 16.232, + "latency": 64.5984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 22.845, + "latency": 183.6016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 25.057, + "latency": 669.5584, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 25.741, + "latency": 2607.0801, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 25.884, + "latency": 10370.5231, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-ec9c695d", + "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pinned|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_ec9c695d", + "label": "B300 · d2h · pinned", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.314, + "latency": 3.1168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.911, + "latency": 3.336, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 16.26, + "latency": 4.0304, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.371, + "latency": 7.4112, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.656, + "latency": 21.1168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.179, + "latency": 76.0128, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 56.698, + "latency": 295.9056, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.243, + "latency": 1172.3568, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.376, + "latency": 4678.5118, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-0325201a", + "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pageable|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_0325201a", + "label": "B300 · h2d · pageable", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.48, + "latency": 8.5408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.73, + "latency": 9.4704, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.35, + "latency": 15.0656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.573, + "latency": 22.6512, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 19.272, + "latency": 54.408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 32.974, + "latency": 127.2, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 38.009, + "latency": 441.4016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 39.678, + "latency": 1691.3168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 40.13, + "latency": 6689.2288, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-6112e71d", + "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pinned|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_6112e71d", + "label": "B300 · h2d · pinned", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.204, + "latency": 3.4032, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.481, + "latency": 3.656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 15.087, + "latency": 4.344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 32.966, + "latency": 7.952, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.231, + "latency": 21.2992, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.149, + "latency": 76.0544, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 57.026, + "latency": 294.2016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.572, + "latency": 1165.6432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.71, + "latency": 4651.4656, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-cdb189fe", + "identity": "offload|h100|h100-nvlink-island|nvlink|d2h|pageable|us", + "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", + "family": "offload", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", + "peakBandwidthGbps": 55.199, + "latencyUnit": "us", + "colorKey": "h100_cdb189fe", + "label": "H100 · d2h · pageable", + "generatedAt": "2026-06-27T13:13:55.178101+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:13:55.178101+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.291, + "latency": 14.0992, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.049, + "latency": 15.6128, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 2.957, + "latency": 22.1648, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 6.642, + "latency": 39.4656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 9.239, + "latency": 113.4928, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 12.413, + "latency": 337.9072, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 13.847, + "latency": 1211.6448, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 14.279, + "latency": 4699.8737, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 14.534, + "latency": 18469.5724, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-0606c0a1", + "identity": "offload|h100|h100-nvlink-island|nvlink|d2h|pinned|us", + "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", + "family": "offload", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", + "peakBandwidthGbps": 55.199, + "latencyUnit": "us", + "colorKey": "h100_0606c0a1", + "label": "H100 · d2h · pinned", + "generatedAt": "2026-06-27T13:13:55.178101+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:13:55.178101+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.862, + "latency": 4.7504, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 3.537, + "latency": 4.632, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 13.999, + "latency": 4.6816, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 34.756, + "latency": 7.5424, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 46.25, + "latency": 22.672, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 52.69, + "latency": 79.6032, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.319, + "latency": 308.8672, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 54.768, + "latency": 1225.3216, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 54.899, + "latency": 4889.6255, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-6119c3de", + "identity": "offload|h100|h100-nvlink-island|nvlink|h2d|pageable|us", + "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", + "family": "offload", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", + "peakBandwidthGbps": 55.199, + "latencyUnit": "us", + "colorKey": "h100_6119c3de", + "label": "H100 · h2d · pageable", + "generatedAt": "2026-06-27T13:13:55.178101+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:13:55.178101+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.367, + "latency": 11.168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.141, + "latency": 14.3536, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 2.491, + "latency": 26.3136, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 8.311, + "latency": 31.5408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 12.373, + "latency": 84.7456, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 16.967, + "latency": 247.208, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 18.73, + "latency": 895.7264, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 19.219, + "latency": 3491.8175, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 19.362, + "latency": 13864.0869, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-60747181", + "identity": "offload|h100|h100-nvlink-island|nvlink|h2d|pinned|us", + "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", + "family": "offload", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", + "peakBandwidthGbps": 55.199, + "latencyUnit": "us", + "colorKey": "h100_60747181", + "label": "H100 · h2d · pinned", + "generatedAt": "2026-06-27T13:13:55.178101+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:13:55.178101+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.64, + "latency": 6.3984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 3.19, + "latency": 5.136, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 12.693, + "latency": 5.1632, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 33.464, + "latency": 7.8336, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 47.39, + "latency": 22.1264, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 52.967, + "latency": 79.1872, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.546, + "latency": 307.5808, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 55.077, + "latency": 1218.4512, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 55.199, + "latency": 4863.0142, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-5472a2f0", + "identity": "offload|h200|h200-nvlink-island|nvlink|d2h|pageable|us", + "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", + "family": "offload", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 54.738, + "latencyUnit": "us", + "colorKey": "h200_5472a2f0", + "label": "H200 · d2h · pageable", + "generatedAt": "2026-06-27T13:14:28.000433+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.000433+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.349, + "latency": 11.7232, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.05, + "latency": 15.5984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 3.61, + "latency": 18.1552, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 6.584, + "latency": 39.8176, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.775, + "latency": 119.4976, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 13.542, + "latency": 309.7312, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 15.692, + "latency": 1069.1856, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 15.898, + "latency": 4221.0976, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 16.284, + "latency": 16484.2148, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-a653b433", + "identity": "offload|h200|h200-nvlink-island|nvlink|d2h|pinned|us", + "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", + "family": "offload", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 54.738, + "latencyUnit": "us", + "colorKey": "h200_a653b433", + "label": "H200 · d2h · pinned", + "generatedAt": "2026-06-27T13:14:28.000433+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.000433+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.066, + "latency": 3.8416, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.38, + "latency": 3.7408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 16.019, + "latency": 4.0912, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 36.28, + "latency": 7.2256, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 46.925, + "latency": 22.3456, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 50.673, + "latency": 82.7712, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 53.181, + "latency": 315.4752, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 53.519, + "latency": 1253.9344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 51.961, + "latency": 5166.0847, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-88606cb0", + "identity": "offload|h200|h200-nvlink-island|nvlink|h2d|pageable|us", + "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", + "family": "offload", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 54.738, + "latencyUnit": "us", + "colorKey": "h200_88606cb0", + "label": "H200 · h2d · pageable", + "generatedAt": "2026-06-27T13:14:28.000433+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.000433+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.408, + "latency": 10.048, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.057, + "latency": 15.5072, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 2.766, + "latency": 23.6976, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 9.51, + "latency": 27.5664, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 13.367, + "latency": 78.4464, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 18.167, + "latency": 230.8736, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 20.785, + "latency": 807.1696, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 21.442, + "latency": 3129.8529, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 21.303, + "latency": 12600.544, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-543138f3", + "identity": "offload|h200|h200-nvlink-island|nvlink|h2d|pinned|us", + "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", + "family": "offload", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 54.738, + "latencyUnit": "us", + "colorKey": "h200_543138f3", + "label": "H200 · h2d · pinned", + "generatedAt": "2026-06-27T13:14:28.000433+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.000433+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.882, + "latency": 4.6464, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.6, + "latency": 3.5616, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 16.773, + "latency": 3.9072, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.219, + "latency": 7.4432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 47.742, + "latency": 21.9632, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 52.79, + "latency": 79.4528, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.738, + "latency": 306.4976, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 53.864, + "latency": 1245.9056, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 54.328, + "latency": 4940.9775, + "sizeClass": null, + "correct": null + } + ] + } + ], + "copyEngine": [ + { + "id": "cxt-6e3131b7", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_6e3131b7", + "label": "B300 · dtod · copy-engine", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.729, + "latency": 8.4789, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.851, + "latency": 8.2304, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 131.475, + "latency": 7.9755, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 506.069, + "latency": 8.288, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2092.131, + "latency": 8.0192, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8232.735, + "latency": 8.1515, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 33743.395, + "latency": 7.9552, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-214329f7", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|sm|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_214329f7", + "label": "B300 · dtod · sm", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.772, + "latency": 8.432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.011, + "latency": 8.4533, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 127.139, + "latency": 8.2475, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 515.355, + "latency": 8.1387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2004.925, + "latency": 8.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8245.683, + "latency": 8.1387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32844.98, + "latency": 8.1728, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-64e7ea33", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|copy-engine|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_64e7ea33", + "label": "B300 · htod · copy-engine", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 8.922, + "latency": 7.3451, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.159, + "latency": 7.456, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 139.617, + "latency": 7.5104, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 525.479, + "latency": 7.9819, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2004.925, + "latency": 8.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8211.245, + "latency": 8.1728, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32556.046, + "latency": 8.2453, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-4b3f523b", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|sm|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_4b3f523b", + "label": "B300 · htod · sm", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.918, + "latency": 8.2773, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.703, + "latency": 8.2688, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 127.9, + "latency": 8.1984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 509.743, + "latency": 8.2283, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2022.716, + "latency": 8.2944, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8166.48, + "latency": 8.2176, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32413.478, + "latency": 8.2816, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-ff8a9f33", + "identity": "copy-engine|h100|h100-nvlink-island|nvlink|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 18918.827, + "latencyUnit": "us", + "colorKey": "h100_ff8a9f33", + "label": "H100 · dtod · copy-engine", + "generatedAt": "2026-06-27T13:14:03.281164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:03.281164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 4.208, + "latency": 15.5744, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 17.496, + "latency": 14.9835, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 70.967, + "latency": 14.7755, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 283.154, + "latency": 14.8128, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1120.673, + "latency": 14.9707, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4411.651, + "latency": 15.2117, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 17557.959, + "latency": 15.2885, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-b4f7093b", + "identity": "copy-engine|h100|h100-nvlink-island|nvlink|dtod|sm|us", + "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 18918.827, + "latencyUnit": "us", + "colorKey": "h100_b4f7093b", + "label": "H100 · dtod · sm", + "generatedAt": "2026-06-27T13:14:03.281164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:03.281164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 4.26, + "latency": 15.3856, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 17.366, + "latency": 15.0955, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 68.961, + "latency": 15.2053, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 274.803, + "latency": 15.2629, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1060.954, + "latency": 15.8133, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4300.674, + "latency": 15.6043, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 17342.584, + "latency": 15.4784, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-08d3e6b7", + "identity": "copy-engine|h100|h100-nvlink-island|nvlink|htod|copy-engine|us", + "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 18918.827, + "latencyUnit": "us", + "colorKey": "h100_08d3e6b7", + "label": "H100 · htod · copy-engine", + "generatedAt": "2026-06-27T13:14:03.281164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:03.281164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 4.543, + "latency": 14.4267, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 18.364, + "latency": 14.2752, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 73.192, + "latency": 14.3264, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 290.776, + "latency": 14.4245, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1184.475, + "latency": 14.1643, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4735.759, + "latency": 14.1707, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 18918.827, + "latency": 14.1888, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-8afae0f7", + "identity": "copy-engine|h100|h100-nvlink-island|nvlink|htod|sm|us", + "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 18918.827, + "latencyUnit": "us", + "colorKey": "h100_8afae0f7", + "label": "H100 · htod · sm", + "generatedAt": "2026-06-27T13:14:03.281164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:03.281164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 4.217, + "latency": 15.5403, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 17.286, + "latency": 15.1648, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 69.404, + "latency": 15.1083, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 272.31, + "latency": 15.4027, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1106.715, + "latency": 15.1595, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4303.322, + "latency": 15.5947, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 17472.627, + "latency": 15.3632, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-568b3ed1", + "identity": "copy-engine|h200|h200-nvlink-island|nvlink|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 21990.41, + "latencyUnit": "us", + "colorKey": "h200_568b3ed1", + "label": "H200 · dtod · copy-engine", + "generatedAt": "2026-06-27T13:14:32.919518+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:32.919518+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 3.796, + "latency": 17.264, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 16.942, + "latency": 15.4731, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 68.576, + "latency": 15.2907, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 252.563, + "latency": 16.6069, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1049.556, + "latency": 15.9851, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 3952.168, + "latency": 16.9803, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 17644.131, + "latency": 15.2139, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-d2f1fcf5", + "identity": "copy-engine|h200|h200-nvlink-island|nvlink|dtod|sm|us", + "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 21990.41, + "latencyUnit": "us", + "colorKey": "h200_d2f1fcf5", + "label": "H200 · dtod · sm", + "generatedAt": "2026-06-27T13:14:32.919518+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:32.919518+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 3.494, + "latency": 18.7584, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 15.049, + "latency": 17.4197, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 63.181, + "latency": 16.5963, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 241.207, + "latency": 17.3888, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 959.414, + "latency": 17.4869, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4005.001, + "latency": 16.7563, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 16321.308, + "latency": 16.4469, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-16dd6dad", + "identity": "copy-engine|h200|h200-nvlink-island|nvlink|htod|copy-engine|us", + "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 21990.41, + "latencyUnit": "us", + "colorKey": "h200_16dd6dad", + "label": "H200 · htod · copy-engine", + "generatedAt": "2026-06-27T13:14:32.919518+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:32.919518+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 4.478, + "latency": 14.6357, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 19.382, + "latency": 13.5253, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 73.252, + "latency": 14.3147, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 300.417, + "latency": 13.9616, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1322.624, + "latency": 12.6848, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 5426.008, + "latency": 12.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 21990.41, + "latency": 12.2069, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-f87dced9", + "identity": "copy-engine|h200|h200-nvlink-island|nvlink|htod|sm|us", + "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 21990.41, + "latencyUnit": "us", + "colorKey": "h200_f87dced9", + "label": "H200 · htod · sm", + "generatedAt": "2026-06-27T13:14:32.919518+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:32.919518+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 4.164, + "latency": 15.7387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 16.954, + "latency": 15.4624, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 65.654, + "latency": 15.9712, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 283.195, + "latency": 14.8107, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1151.185, + "latency": 14.5739, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4718.356, + "latency": 14.2229, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 18381.29, + "latency": 14.6037, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-0f7ea2f3", + "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27738.291, + "latencyUnit": "us", + "colorKey": "mi355x_0f7ea2f3", + "label": "MI355X · dtod · copy-engine", + "generatedAt": "2026-06-29T00:49:25.677922+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T00:49:25.677922+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.29, + "latency": 10.4187, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 25.583, + "latency": 10.2468, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 101.083, + "latency": 10.3734, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 390.576, + "latency": 10.7388, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1611.113, + "latency": 10.4134, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6501.915, + "latency": 10.3214, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 26296.406, + "latency": 10.2081, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-a301ee7b", + "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|dtod|sm|us", + "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27738.291, + "latencyUnit": "us", + "colorKey": "mi355x_a301ee7b", + "label": "MI355X · dtod · sm", + "generatedAt": "2026-06-29T00:49:25.677922+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T00:49:25.677922+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 5.974, + "latency": 10.9707, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 23.442, + "latency": 11.1828, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 92.466, + "latency": 11.3401, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 380.375, + "latency": 11.0268, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1520.029, + "latency": 11.0374, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6138.698, + "latency": 10.9321, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 23990.086, + "latency": 11.1894, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-30a30277", + "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|htod|copy-engine|us", + "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27738.291, + "latencyUnit": "us", + "colorKey": "mi355x_30a30277", + "label": "MI355X · htod · copy-engine", + "generatedAt": "2026-06-29T00:49:25.677922+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T00:49:25.677922+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.111, + "latency": 9.2161, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 28.79, + "latency": 9.1054, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 118.831, + "latency": 8.8241, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 492.98, + "latency": 8.5081, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1921.041, + "latency": 8.7334, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 7489.773, + "latency": 8.9601, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 27738.291, + "latency": 9.6774, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-da4cda37", + "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|htod|sm|us", + "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27738.291, + "latencyUnit": "us", + "colorKey": "mi355x_da4cda37", + "label": "MI355X · htod · sm", + "generatedAt": "2026-06-29T00:49:25.677922+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T00:49:25.677922+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.239, + "latency": 10.5041, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 24.106, + "latency": 10.8748, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 97.985, + "latency": 10.7014, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 394.892, + "latency": 10.6214, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1607.819, + "latency": 10.4348, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6392.071, + "latency": 10.4988, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 25471.24, + "latency": 10.5388, + "sizeClass": null, + "correct": null + } + ] + } + ], + "kvCache": [ + { + "id": "cxt-72e44191", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_72e44191", + "label": "B300 · dtod-local · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.86, + "latency": 0.00337, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 18.31, + "latency": 0.00358, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 79.48, + "latency": 0.0033, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 315.89, + "latency": 0.00332, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 1140.42, + "latency": 0.00368, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2696.03, + "latency": 0.00622, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 2724.4, + "latency": 0.02463, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 3189.99, + "latency": 0.08415, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-0198272e", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_0198272e", + "label": "B300 · dtod-local · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.27, + "latency": 0.005, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 13.15, + "latency": 0.00498, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 13.46, + "latency": 0.01948, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 13.76, + "latency": 0.07619, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 13.84, + "latency": 0.30311, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 13.87, + "latency": 1.20968, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 13.83, + "latency": 4.85211, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 13.89, + "latency": 19.32599, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-65e093de", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_65e093de", + "label": "B300 · dtod-remote · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.08, + "latency": 0.01514, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.52, + "latency": 0.01451, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 17.43, + "latency": 0.01504, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 67.07, + "latency": 0.01563, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 205.84, + "latency": 0.02038, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 409.12, + "latency": 0.04101, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 644.24, + "latency": 0.10417, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 736.42, + "latency": 0.36451, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-502d7923", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_502d7923", + "label": "B300 · dtod-remote · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.11, + "latency": 0.01473, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.35, + "latency": 0.01507, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 4.3, + "latency": 0.06098, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.27, + "latency": 0.24556, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.26, + "latency": 0.98559, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.24, + "latency": 3.9593, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4.27, + "latency": 15.72352, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 4.25, + "latency": 63.14588, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-0560494f", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_0560494f", + "label": "B300 · dtoh · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.29, + "latency": 0.01266, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.53, + "latency": 0.01447, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.95, + "latency": 0.02394, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 17.12, + "latency": 0.06125, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 23.25, + "latency": 0.18038, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 25.14, + "latency": 0.66728, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 25.77, + "latency": 2.60365, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 26.05, + "latency": 10.30309, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-ce77da1a", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|contiguous/pinned|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "contiguous/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_ce77da1a", + "label": "B300 · dtoh · contiguous/pinned", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.69, + "latency": 0.00349, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 15.49, + "latency": 0.00423, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 34.21, + "latency": 0.00766, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.37, + "latency": 0.02124, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.18, + "latency": 0.07601, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 56.69, + "latency": 0.29592, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.26, + "latency": 1.17204, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.37, + "latency": 4.67905, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-46a8e034", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_46a8e034", + "label": "B300 · dtoh · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.15, + "latency": 0.01424, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.09, + "latency": 0.01604, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 4.13, + "latency": 0.06348, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.08, + "latency": 0.25721, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.12, + "latency": 1.01899, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.13, + "latency": 4.05933, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4.12, + "latency": 16.28391, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 4.08, + "latency": 65.79932, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-74b14d7d", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|paged/pinned|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "paged/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_74b14d7d", + "label": "B300 · dtoh · paged/pinned", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.97, + "latency": 0.00413, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 11.6, + "latency": 0.00565, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.71, + "latency": 0.02239, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 11.85, + "latency": 0.08852, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 15.07, + "latency": 0.27834, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 16.21, + "latency": 1.0351, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 16, + "latency": 4.19304, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 15.3, + "latency": 17.54518, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-a39a3977", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_a39a3977", + "label": "B300 · htod · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.67, + "latency": 0.00351, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 10.64, + "latency": 0.00616, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 28.03, + "latency": 0.00935, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 39.84, + "latency": 0.02632, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 40.9, + "latency": 0.10256, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 43.53, + "latency": 0.38545, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 40.29, + "latency": 1.66584, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 40.43, + "latency": 6.6389, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-4dc90462", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|contiguous/pinned|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "contiguous/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_4dc90462", + "label": "B300 · htod · contiguous/pinned", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.92, + "latency": 0.00333, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 15.9, + "latency": 0.00412, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.21, + "latency": 0.00745, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.53, + "latency": 0.02117, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 54.66, + "latency": 0.07673, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 57.11, + "latency": 0.29375, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.27, + "latency": 1.1717, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.29, + "latency": 4.68587, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-1baaf76c", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_1baaf76c", + "label": "B300 · htod · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.07, + "latency": 0.00534, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 12.18, + "latency": 0.00538, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 13.09, + "latency": 0.02003, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 13.07, + "latency": 0.08021, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 12.88, + "latency": 0.32552, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 12.74, + "latency": 1.31673, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 13.44, + "latency": 4.99481, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 13.46, + "latency": 19.93861, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-95e0eff5", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|paged/pinned|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "paged/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_95e0eff5", + "label": "B300 · htod · paged/pinned", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.85, + "latency": 0.00425, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 11.28, + "latency": 0.00581, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.35, + "latency": 0.0231, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 11.41, + "latency": 0.0919, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 14.68, + "latency": 0.28572, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 15.48, + "latency": 1.08353, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 15.4, + "latency": 4.35678, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 15.59, + "latency": 17.21665, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-ac86e5b5", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_ac86e5b5", + "label": "H100 · dtod-local · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.08, + "latency": 0.00532, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 12.22, + "latency": 0.00536, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 51.29, + "latency": 0.00511, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 203.7, + "latency": 0.00515, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 830.27, + "latency": 0.00505, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1848.69, + "latency": 0.00908, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 1404.25, + "latency": 0.04779, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 1496.83, + "latency": 0.17934, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-5cd440fa", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_5cd440fa", + "label": "H100 · dtod-local · paged/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.87, + "latency": 0.00875, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 7.19, + "latency": 0.00912, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 7.74, + "latency": 0.03385, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.12, + "latency": 0.12917, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 8.12, + "latency": 0.51673, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 8.12, + "latency": 2.06732, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8.14, + "latency": 8.24075, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 8.16, + "latency": 32.8879, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-ea4a3eaa", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_ea4a3eaa", + "label": "H100 · dtod-remote · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.05, + "latency": 0.01566, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.14, + "latency": 0.01583, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 16.78, + "latency": 0.01562, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 60.4, + "latency": 0.01736, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 135.27, + "latency": 0.03101, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 287.64, + "latency": 0.05833, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 355.4, + "latency": 0.18883, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 386.28, + "latency": 0.69492, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-0ce612f7", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_0ce612f7", + "label": "H100 · dtod-remote · paged/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 0.81, + "latency": 0.02017, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 3.39, + "latency": 0.01935, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 3.46, + "latency": 0.07571, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 3.5, + "latency": 0.29964, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 3.52, + "latency": 1.19198, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 3.51, + "latency": 4.78335, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 3.51, + "latency": 19.11805, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 3.51, + "latency": 76.49081, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-9514aa3b", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_9514aa3b", + "label": "H100 · dtoh · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.14, + "latency": 0.01435, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 3.21, + "latency": 0.02041, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 6.95, + "latency": 0.03775, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 9.46, + "latency": 0.11089, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 12.75, + "latency": 0.32908, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 14.17, + "latency": 1.18418, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 14.68, + "latency": 4.57034, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 14.78, + "latency": 18.15827, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-51d4ebbe", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|contiguous/pinned|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "contiguous/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_51d4ebbe", + "label": "H100 · dtoh · contiguous/pinned", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.63, + "latency": 0.00452, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 14.61, + "latency": 0.00449, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 34.97, + "latency": 0.0075, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 45.56, + "latency": 0.02301, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 52.87, + "latency": 0.07934, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.46, + "latency": 0.30805, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 54.81, + "latency": 1.22436, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 54.92, + "latency": 4.88742, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-53eb5188", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_53eb5188", + "label": "H100 · dtoh · paged/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 0.95, + "latency": 0.01729, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 2.79, + "latency": 0.02345, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 2.8, + "latency": 0.0936, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 2.83, + "latency": 0.37049, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 2.84, + "latency": 1.47709, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2.84, + "latency": 5.91534, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 2.8, + "latency": 23.94517, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 2.81, + "latency": 95.42213, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-58b1ef69", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|paged/pinned|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "paged/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_58b1ef69", + "label": "H100 · dtoh · paged/pinned", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 2.02, + "latency": 0.00812, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 8.26, + "latency": 0.00793, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 8.59, + "latency": 0.03052, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.8, + "latency": 0.11912, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 8.89, + "latency": 0.47188, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 8.94, + "latency": 1.87628, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8.99, + "latency": 7.46602, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 8.97, + "latency": 29.91576, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-f0ce2a63", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_f0ce2a63", + "label": "H100 · htod · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 2.21, + "latency": 0.00742, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 3.56, + "latency": 0.0184, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 12.81, + "latency": 0.02046, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 16.18, + "latency": 0.06483, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 14.77, + "latency": 0.28404, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 15.24, + "latency": 1.10071, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 19.35, + "latency": 3.46895, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 19.37, + "latency": 13.85634, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-90f1ea66", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|contiguous/pinned|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "contiguous/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_90f1ea66", + "label": "H100 · htod · contiguous/pinned", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.54, + "latency": 0.00463, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 10.95, + "latency": 0.00598, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 20.37, + "latency": 0.01287, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 24.67, + "latency": 0.0425, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 45.61, + "latency": 0.09197, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 19, + "latency": 0.88291, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 15.38, + "latency": 4.36425, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 26.93, + "latency": 9.96701, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-e2eccf00", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_e2eccf00", + "label": "H100 · htod · paged/memcpy", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.48, + "latency": 0.01107, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.89, + "latency": 0.01341, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 5.26, + "latency": 0.04985, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 5.25, + "latency": 0.19989, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.87, + "latency": 0.86178, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.88, + "latency": 3.43634, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 5.29, + "latency": 12.69012, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 5.3, + "latency": 50.67481, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-9d427921", + "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|paged/pinned|ms", + "cohortIdentity": "kv-cache|h100|nvlink", + "family": "kv-cache", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "paged/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h100_9d427921", + "label": "H100 · htod · paged/pinned", + "generatedAt": "2026-06-27T13:14:31.575969+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:31.575969+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.62, + "latency": 0.01012, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 7.91, + "latency": 0.00828, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 8.25, + "latency": 0.03177, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.56, + "latency": 0.12251, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 8.72, + "latency": 0.48117, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 8.69, + "latency": 1.93067, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8.76, + "latency": 7.66475, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 8.8, + "latency": 30.51378, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-da427647", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_da427647", + "label": "H200 · dtod-local · contiguous/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 2.92, + "latency": 0.00561, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 11.4, + "latency": 0.00575, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 50.9, + "latency": 0.00515, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 203.07, + "latency": 0.00516, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 808.09, + "latency": 0.00519, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2577.62, + "latency": 0.00651, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 1942.65, + "latency": 0.03455, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 2094.13, + "latency": 0.12818, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-e86f4c3c", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_e86f4c3c", + "label": "H200 · dtod-local · paged/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.88, + "latency": 0.0087, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 7.13, + "latency": 0.00919, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 7.76, + "latency": 0.03377, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.12, + "latency": 0.12919, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 8.12, + "latency": 0.51648, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 8.12, + "latency": 2.06665, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 7.82, + "latency": 8.58219, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 8.37, + "latency": 32.07343, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-a92baae0", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_a92baae0", + "label": "H200 · dtod-remote · contiguous/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.32, + "latency": 0.01245, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.88, + "latency": 0.01342, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 18.4, + "latency": 0.01425, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 50.51, + "latency": 0.02076, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 157.92, + "latency": 0.02656, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 283.17, + "latency": 0.05925, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 357.67, + "latency": 0.18763, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 386.41, + "latency": 0.69468, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-a09960ed", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_a09960ed", + "label": "H200 · dtod-remote · paged/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.02, + "latency": 0.01611, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.09, + "latency": 0.01602, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 4.28, + "latency": 0.06122, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.15, + "latency": 0.25284, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.16, + "latency": 1.0074, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.38, + "latency": 3.83027, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 3.98, + "latency": 16.86224, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 4.35, + "latency": 61.70685, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-5a06e0c5", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_5a06e0c5", + "label": "H200 · dtoh · contiguous/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.33, + "latency": 0.01232, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 3.39, + "latency": 0.01935, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 7.01, + "latency": 0.03738, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 9.35, + "latency": 0.11209, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 12.26, + "latency": 0.34211, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 13.63, + "latency": 1.2306, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 14, + "latency": 4.79503, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 14.17, + "latency": 18.94882, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-196034c4", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|contiguous/pinned|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "contiguous/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_196034c4", + "label": "H200 · dtoh · contiguous/pinned", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.66, + "latency": 0.00352, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 11.72, + "latency": 0.00559, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 36.36, + "latency": 0.00721, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 47.93, + "latency": 0.02188, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 52.92, + "latency": 0.07926, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.32, + "latency": 0.30887, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 54.42, + "latency": 1.2332, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 54.68, + "latency": 4.90889, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-1edeeeca", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_1edeeeca", + "label": "H200 · dtoh · paged/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.12, + "latency": 0.01465, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 3.05, + "latency": 0.02151, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 3.1, + "latency": 0.08467, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 3.2, + "latency": 0.32818, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 3.07, + "latency": 1.3646, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 3.14, + "latency": 5.3446, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 3.12, + "latency": 21.51246, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 3.1, + "latency": 86.61224, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-19277faf", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|paged/pinned|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "dtoh", + "subtype": "paged/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_19277faf", + "label": "H200 · dtoh · paged/pinned", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 2.57, + "latency": 0.00638, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 6.21, + "latency": 0.01056, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 6.34, + "latency": 0.04137, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 6.39, + "latency": 0.16406, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 9.68, + "latency": 0.4333, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 11.16, + "latency": 1.50278, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 10.75, + "latency": 6.24109, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 11.08, + "latency": 24.21999, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-0cec247d", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_0cec247d", + "label": "H200 · htod · contiguous/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 2.07, + "latency": 0.00793, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.33, + "latency": 0.01513, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 13.97, + "latency": 0.01876, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 17.88, + "latency": 0.05865, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 14.91, + "latency": 0.28129, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 15.65, + "latency": 1.07179, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 20.77, + "latency": 3.23166, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 20.84, + "latency": 12.88331, + "sizeClass": "prefill", + "correct": true + } + ] }, { - "id": "cxf-e15f2b54", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:51:34.222899+00:00", - "publicationStatus": "diagnostic", + "id": "cxt-541fa51c", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|contiguous/pinned|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "contiguous/pinned", + "valid": true, "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_541fa51c", + "label": "H200 · htod · contiguous/pinned", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.25, + "latency": 0.00386, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 10.91, + "latency": 0.00601, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 34.78, + "latency": 0.00754, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 47.66, + "latency": 0.022, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 53.14, + "latency": 0.07893, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.49, + "latency": 0.30792, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 54.39, + "latency": 1.23395, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 55.4, + "latency": 4.84562, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-59482272", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|paged/memcpy|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", "sku": "h200", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_59482272", + "label": "H200 · htod · paged/memcpy", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", "run": { - "id": "28271653486", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", - "createdAt": "2026-06-26T23:49:28Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.23, + "latency": 0.01335, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.57, + "latency": 0.01434, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 5.08, + "latency": 0.05156, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.95, + "latency": 0.21203, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.73, + "latency": 0.8865, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.77, + "latency": 3.51835, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 5.3, + "latency": 12.65221, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 5.32, + "latency": 50.43789, + "sizeClass": "prefill", + "correct": true + } + ] }, { - "id": "cxf-70961aef", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "generatedAt": "2026-06-26T17:31:08.227503+00:00", - "publicationStatus": "diagnostic", + "id": "cxt-a5a8f197", + "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|paged/pinned|ms", + "cohortIdentity": "kv-cache|h200|nvlink", + "family": "kv-cache", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "paged/pinned", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "h200_a5a8f197", + "label": "H200 · htod · paged/pinned", + "generatedAt": "2026-06-27T13:15:06.269124+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:15:06.269124+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 2.01, + "latency": 0.00814, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 5.86, + "latency": 0.01117, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 6.21, + "latency": 0.04221, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 6.39, + "latency": 0.16417, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 9.51, + "latency": 0.44121, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 10.54, + "latency": 1.59134, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 10.79, + "latency": 6.22042, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 10.65, + "latency": 25.1967, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-3fe4f8ad", + "identity": "kv-cache|mi355x|mi355x-xgmi|xgmi|dtod-remote|contiguous/rccl|ms", + "cohortIdentity": "kv-cache|mi355x|xgmi", + "family": "kv-cache", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "dtod-remote", + "subtype": "contiguous/rccl", + "valid": true, + "status": "valid", + "note": "wired: rccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "mi355x_3fe4f8ad", + "label": "MI355X · dtod-remote · contiguous/rccl", + "generatedAt": "2026-06-29T00:48:56.689585+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-29T00:48:56.689585+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 0.93, + "latency": 0.07018, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 4.14, + "latency": 0.06326, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 14.43, + "latency": 0.07267, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 46.03, + "latency": 0.09112, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 63.43, + "latency": 0.26449, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 69.8, + "latency": 0.96147, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 71.72, + "latency": 3.74303, + "sizeClass": "prefill", + "correct": true + } + ] + } + ], + "rlMesh": [ + { + "id": "cxt-e28663d4", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_e28663d4", + "label": "B300 · gen->trn · paired", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 22.43, + "latency": 0.04675, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 210.03, + "latency": 0.01997, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 444.24, + "latency": 0.03777, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 613.35, + "latency": 0.10941, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 672.64, + "latency": 0.39908, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 681.89, + "latency": 1.57465, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-abc63f3d", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_abc63f3d", + "label": "B300 · gen->trn · redistribute", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.02, + "latency": 44.24712, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 56.86, + "latency": 0.07377, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 168.78, + "latency": 0.0994, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 466.61, + "latency": 0.14382, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 565.6, + "latency": 0.4746, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 656.22, + "latency": 1.63626, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-08ab0854", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_08ab0854", + "label": "B300 · trn->gen · paired", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.13, + "latency": 0.12892, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 161.07, + "latency": 0.02604, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 455.8, + "latency": 0.03681, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 613.96, + "latency": 0.10931, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 670.34, + "latency": 0.40045, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 681.46, + "latency": 1.57564, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-bea1bfbd", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_bea1bfbd", + "label": "B300 · trn->gen · redistribute", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.01, + "latency": 74.91642, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 66.21, + "latency": 0.06334, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 295.56, + "latency": 0.05676, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 581.82, + "latency": 0.11534, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 543.6, + "latency": 0.49381, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 659.57, + "latency": 1.62794, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-3e3f24d0", + "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|h100|nvlink", + "family": "rl-mesh", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 372.53, + "latencyUnit": "ms", + "colorKey": "h100_3e3f24d0", + "label": "H100 · gen->trn · paired", + "generatedAt": "2026-06-27T13:36:14.593136+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:36:14.593136+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 28.68, + "latency": 0.03656, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 119.47, + "latency": 0.03511, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 179.16, + "latency": 0.09364, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 333.15, + "latency": 0.20144, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 360.41, + "latency": 0.7448, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 372.22, + "latency": 2.88468, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-02dece19", + "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|h100|nvlink", + "family": "rl-mesh", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 372.53, + "latencyUnit": "ms", + "colorKey": "h100_02dece19", + "label": "H100 · gen->trn · redistribute", + "generatedAt": "2026-06-27T13:36:14.593136+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:36:14.593136+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.33, + "latency": 3.20924, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 26.07, + "latency": 0.16087, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 105.25, + "latency": 0.1594, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 306.68, + "latency": 0.21882, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 332.52, + "latency": 0.80728, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 324.51, + "latency": 3.30884, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-40b74430", + "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|h100|nvlink", + "family": "rl-mesh", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 372.53, + "latencyUnit": "ms", + "colorKey": "h100_40b74430", + "label": "H100 · trn->gen · paired", + "generatedAt": "2026-06-27T13:36:14.593136+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:36:14.593136+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 19.34, + "latency": 0.05421, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 102.79, + "latency": 0.04081, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 282.95, + "latency": 0.05929, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 331.36, + "latency": 0.20252, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 359.85, + "latency": 0.74597, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 372.53, + "latency": 2.88228, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-3f787c79", + "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|h100|nvlink", + "family": "rl-mesh", + "sku": "h100", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, "status": "valid", + "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 372.53, + "latencyUnit": "ms", + "colorKey": "h100_3f787c79", + "label": "H100 · trn->gen · redistribute", + "generatedAt": "2026-06-27T13:36:14.593136+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:36:14.593136+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.02, + "latency": 42.89165, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 26.19, + "latency": 0.16012, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 107.67, + "latency": 0.15583, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 313.63, + "latency": 0.21398, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 324.4, + "latency": 0.82748, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 336.39, + "latency": 3.19197, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-3051cd1a", + "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|h200|nvlink", + "family": "rl-mesh", "sku": "h200", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 369.31, + "latencyUnit": "ms", + "colorKey": "h200_3051cd1a", + "label": "H200 · gen->trn · paired", + "generatedAt": "2026-06-27T13:38:51.710797+00:00", "run": { - "id": "28254435010", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", - "createdAt": "2026-06-26T17:29:12Z", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - } + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:51.710797+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 23.89, + "latency": 0.0439, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 156.49, + "latency": 0.0268, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 293.07, + "latency": 0.05725, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 335.17, + "latency": 0.20023, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 358.96, + "latency": 0.74782, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 369.31, + "latency": 2.90744, + "sizeClass": null, + "correct": true + } + ] }, { - "id": "cxf-33a53f33", - "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:51:35.330044+00:00", - "publicationStatus": "diagnostic", + "id": "cxt-71059d57", + "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|h200|nvlink", + "family": "rl-mesh", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, "status": "valid", + "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 369.31, + "latencyUnit": "ms", + "colorKey": "h200_71059d57", + "label": "H200 · gen->trn · redistribute", + "generatedAt": "2026-06-27T13:38:51.710797+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:51.710797+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.04, + "latency": 25.02575, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 39.38, + "latency": 0.10651, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 155.89, + "latency": 0.10762, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 245.8, + "latency": 0.27303, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 330.18, + "latency": 0.81301, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 342.63, + "latency": 3.1338, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-c6f0b6b2", + "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|h200|nvlink", + "family": "rl-mesh", "sku": "h200", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/runtime-visible", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 369.31, + "latencyUnit": "ms", + "colorKey": "h200_c6f0b6b2", + "label": "H200 · trn->gen · paired", + "generatedAt": "2026-06-27T13:38:51.710797+00:00", "run": { - "id": "28271656517", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", - "createdAt": "2026-06-26T23:49:35Z", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:51.710797+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.34, + "latency": 0.24155, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 61.01, + "latency": 0.06874, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 288.23, + "latency": 0.05821, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 333.03, + "latency": 0.20151, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 358.08, + "latency": 0.74964, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 369.21, + "latency": 2.90821, + "sizeClass": null, + "correct": true + } + ] }, { - "id": "cxf-26d1baf4", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", - "generatedAt": "2026-06-26T15:40:45.756534+00:00", - "publicationStatus": "diagnostic", + "id": "cxt-494c6e3f", + "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|h200|nvlink", + "family": "rl-mesh", + "sku": "h200", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, "status": "valid", + "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 369.31, + "latencyUnit": "ms", + "colorKey": "h200_494c6e3f", + "label": "H200 · trn->gen · redistribute", + "generatedAt": "2026-06-27T13:38:51.710797+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:51.710797+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.02, + "latency": 56.31775, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 33.42, + "latency": 0.12549, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 99.65, + "latency": 0.16836, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 180.83, + "latency": 0.37112, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 260.28, + "latency": 1.03132, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 340.94, + "latency": 3.14936, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-2963cf1c", + "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|mi355x|xgmi", + "family": "rl-mesh", "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "config": "bf16/normal/layout-and-dispatch", - "reason": "resource-nonconforming", - "returnCode": null, + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 71.79, + "latencyUnit": "ms", + "colorKey": "mi355x_2963cf1c", + "label": "MI355X · gen->trn · paired", + "generatedAt": "2026-06-28T05:12:36.633047+00:00", "run": { - "id": "28247575150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", - "createdAt": "2026-06-26T15:22:26Z", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" - } + "id": null, + "url": null, + "createdAt": "2026-06-28T05:12:36.633047+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 14.01, + "latency": 0.07485, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 45.33, + "latency": 0.09253, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 62.8, + "latency": 0.26717, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 69.38, + "latency": 0.9672, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 71.12, + "latency": 3.77445, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 71.62, + "latency": 14.99269, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-687aa675", + "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|mi355x|xgmi", + "family": "rl-mesh", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 71.79, + "latencyUnit": "ms", + "colorKey": "mi355x_687aa675", + "label": "MI355X · gen->trn · redistribute", + "generatedAt": "2026-06-28T05:12:36.633047+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T05:12:36.633047+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.09, + "latency": 12.00838, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 13.34, + "latency": 0.3144, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 48.16, + "latency": 0.34836, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 64.08, + "latency": 1.04724, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 69.45, + "latency": 3.8654, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 71.41, + "latency": 15.03625, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-0700747c", + "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|mi355x|xgmi", + "family": "rl-mesh", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 71.79, + "latencyUnit": "ms", + "colorKey": "mi355x_0700747c", + "label": "MI355X · trn->gen · paired", + "generatedAt": "2026-06-28T05:12:36.633047+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T05:12:36.633047+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 10.53, + "latency": 0.0996, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 44.33, + "latency": 0.09462, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 62.58, + "latency": 0.2681, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 69.37, + "latency": 0.96746, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 71.32, + "latency": 3.76377, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 71.79, + "latency": 14.95774, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-a10511d5", + "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|mi355x|xgmi", + "family": "rl-mesh", + "sku": "mi355x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 71.79, + "latencyUnit": "ms", + "colorKey": "mi355x_a10511d5", + "label": "MI355X · trn->gen · redistribute", + "generatedAt": "2026-06-28T05:12:36.633047+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T05:12:36.633047+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.01, + "latency": 97.26006, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 14.75, + "latency": 0.28435, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 50.28, + "latency": 0.33368, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 65.3, + "latency": 1.02763, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 70.05, + "latency": 3.83224, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 71.74, + "latency": 14.96724, + "sizeClass": null, + "correct": true + } + ] } ], - "scannedRuns": 234, - "scannedArtifacts": 255, - "contributingRuns": 234, - "generatedAt": "2026-06-27T00:54:19.552Z" + "scannedRuns": 313, + "scannedArtifacts": 891, + "contributingRuns": 313, + "generatedAt": "2026-06-29T02:42:52.989Z" } diff --git a/packages/app/src/components/collectivex/CollectiveXDecision.tsx b/packages/app/src/components/collectivex/CollectiveXDecision.tsx new file mode 100644 index 00000000..7ad1dd8d --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXDecision.tsx @@ -0,0 +1,195 @@ +'use client'; + +import { ExternalLink } from 'lucide-react'; + +import { Card } from '@/components/ui/card'; +import { track } from '@/lib/analytics'; + +import type { CollectiveXDecisionSummary, CollectiveXSummaryCard } from './types'; + +interface CollectiveXDecisionProps { + cards: CollectiveXSummaryCard[]; + decision: CollectiveXDecisionSummary; +} + +function budgetValue(value: number | null | undefined): string { + return value === null || value === undefined ? '-' : value.toLocaleString('en-US'); +} + +function EmptyTable({ label }: { label: string }) { + return ( + +

No {label} data available yet.

+
+ ); +} + +export function CollectiveXDecision({ cards, decision }: CollectiveXDecisionProps) { + return ( +
+
+ {cards.map((card) => { + const body = ( + +

+ {card.title} +

+

+ {card.value} +

+

{card.sub}

+
+ ); + if (!card.href) return
{body}
; + return ( + track('collectivex_decision_card_opened', { title: card.title })} + className="block rounded-xl focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + > + {body} + + ); + })} +
+ +
+ +

Max tokens under round-trip p99 budget

+

+ Official headline cells only. Values are the largest measured source tokens per rank + whose measured round-trip p99 stays under the budget. +

+ {decision.maxTokensUnderBudget.length === 0 ? ( +

No budget cell is satisfied yet.

+ ) : ( +
+ + + + + + + + + {decision.budgetsUs.map((budget) => ( + + ))} + + + + {decision.maxTokensUnderBudget.map((row) => ( + + + + + + + {decision.budgetsUs.map((budget) => ( + + ))} + + ))} + +
SKU + Backend + PhaseDtypeEP + {'<= '} + {budget} us +
{row.sku.toUpperCase()}{row.backend}{row.phase}{row.dispatchDtype}{row.epSize === null ? '-' : `EP${row.epSize}`} + {budgetValue(row.budgets[String(budget)])} +
+
+ )} +
+ + +

Lowest dispatch p99 recommendations

+

+ Per SKU and phase at T=64 for decode or T=256 for prefill. +

+ {decision.recommendations.length === 0 ? ( +

No recommendation cells yet.

+ ) : ( +
+ + + + + + + + + + + + {decision.recommendations.map((row) => ( + + + + + + + + ))} + +
SKUPhaseT + p99 us + + Config +
{row.sku.toUpperCase()}{row.phase}{row.atTokensPerRank} + {row.lowestP99DispatchUs.toFixed(1)} + {row.config}
+
+ )} +
+
+ +
+ {[ + ['LL crossover rows', decision.llCrossover.length], + ['Resource Pareto cells', decision.resourcePareto.length], + ['Topology penalty cells', decision.topologyPenalty.length], + ['Skew penalty cells', decision.skewPenalty.length], + ].map(([label, count]) => ( + +

{count}

+

{label}

+
+ ))} +
+ + {decision.llCrossover.length === 0 && + decision.resourcePareto.length === 0 && + decision.topologyPenalty.length === 0 && + decision.skewPenalty.length === 0 ? ( + + ) : ( + +

+ Analysis outputs + +

+

+ Heavy analysis rows are generated into the static snapshot for auditability. The compact + counts above match the source report outputs; detailed EP evidence lives in the Evidence + tab. +

+
+ )} +
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx index 9be0c0ce..33ef7254 100644 --- a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx +++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx @@ -15,21 +15,32 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { useCollectiveX } from '@/hooks/api/use-collectivex'; import { useThemeColors } from '@/hooks/useThemeColors'; import { track } from '@/lib/analytics'; import { getModelSortIndex } from '@/lib/constants'; import { CollectiveXChart } from './CollectiveXChart'; +import { CollectiveXDecision } from './CollectiveXDecision'; import { CollectiveXHeatmap } from './CollectiveXHeatmap'; import { CollectiveXScaling } from './CollectiveXScaling'; +import { CollectiveXSizePanel } from './CollectiveXSizePanel'; import { CollectiveXCoverageTable, CollectiveXFailureTable, CollectiveXSensitivityTable, } from './CollectiveXTables'; -import { collectiveXPrefillFloor, comparisonDifferences, publicationMatches } from './data'; +import { + collectiveXDecisionSummary, + collectiveXPrefillFloor, + collectiveXShapeKey, + collectiveXSummaryCards, + comparisonDifferences, + publicationMatches, +} from './data'; import type { + CollectiveXDecisionSummary, CollectiveXOperation, CollectiveXPercentile, CollectiveXPhase, @@ -39,6 +50,7 @@ import type { CollectiveXSuite, CollectiveXXAxis, CollectiveXYAxis, + CollectiveXSummaryCard, } from './types'; const OPERATION_OPTIONS: SegmentedToggleOption[] = [ @@ -68,6 +80,7 @@ const SUITE_OPTIONS: SegmentedToggleOption[] = [ ]; const PUBLICATION_OPTIONS: SegmentedToggleOption[] = [ + { value: 'official-headline', label: 'Official headline' }, { value: 'publishable', label: 'Publishable' }, { value: 'official', label: 'Official only' }, { value: 'all', label: 'All' }, @@ -84,6 +97,27 @@ const BASE_PRECISION_OPTIONS: SegmentedToggleOption[] = [ { value: 'fp8', label: 'FP8' }, ]; +const BASE_EP_OPTIONS: SegmentedToggleOption[] = [ + { value: 'all', label: 'All' }, + { value: '8', label: 'EP8' }, +]; + +const HEADLINE_SHAPE_KEY = '7168/8/256'; + +const COLLECTIVEX_TABS = [ + { value: 'ep', label: 'EP dispatch / combine' }, + { value: 'decision', label: 'Decision' }, + { value: 'evidence', label: 'Evidence' }, + { value: 'all-reduce', label: 'All-reduce' }, + { value: 'all-gather', label: 'All-gather' }, + { value: 'offload', label: 'CPU-GPU offload' }, + { value: 'kv-cache', label: 'KV-cache transfer' }, + { value: 'copy-engine', label: 'Copy-engine / SDMA' }, + { value: 'rl-mesh', label: 'RL mesh' }, +] as const; + +type CollectiveXTab = (typeof COLLECTIVEX_TABS)[number]['value']; + const ACTIVATION_ORDER = [ 'normal', 'zeros', @@ -127,6 +161,11 @@ function formatActivation(value: string): string { return ACTIVATION_LABELS[value] ?? value; } +function formatModelShapeOption(item: CollectiveXSeries): string { + const key = collectiveXShapeKey(item.shape); + return `${item.model} (${key})`; +} + function displaySeriesLabel(item: CollectiveXSeries): string { if (item.shape.activationProfile === 'normal') return item.label; return `${item.label} · ${formatActivation(item.shape.activationProfile)} activation`; @@ -210,14 +249,17 @@ function LineStyleKey() { export default function CollectiveXDisplay() { const { data, error, isLoading, isFetching, refetch } = useCollectiveX(); - const [operation, setOperation] = useState('dispatch'); + const [activeTab, setActiveTab] = useState('ep'); + const [operation, setOperation] = useState('roundtrip'); const [phase, setPhase] = useState('decode'); - const [precision, setPrecision] = useState('all'); + const [modelShape, setModelShape] = useState(HEADLINE_SHAPE_KEY); + const [precision, setPrecision] = useState('bf16'); + const [epFilter, setEpFilter] = useState('8'); const [activation, setActivation] = useState('all'); - const [percentile, setPercentile] = useState('p50'); - const [suite, setSuite] = useState('backend-default'); + const [percentile, setPercentile] = useState('p99'); + const [suite, setSuite] = useState('resource-constrained'); const [routing, setRouting] = useState('uniform'); - const [publication, setPublication] = useState('publishable'); + const [publication, setPublication] = useState('official-headline'); const [xAxis, setXAxis] = useState('tokens-per-rank'); const [yAxis, setYAxis] = useState('latency'); const [xScaleType, setXScaleType] = useState('log'); @@ -227,12 +269,38 @@ export default function CollectiveXDisplay() { const [highContrast, setHighContrast] = useState(false); const series = data?.series ?? []; + const ncclSeries = data?.nccl ?? []; + const offloadSeries = data?.offload ?? []; + const copyEngineSeries = data?.copyEngine ?? []; + const kvCacheSeries = data?.kvCache ?? []; + const rlMeshSeries = data?.rlMesh ?? []; + const summaryCards: CollectiveXSummaryCard[] = useMemo( + () => data?.summaryCards ?? collectiveXSummaryCards(series, data?.failures ?? []), + [data?.failures, data?.summaryCards, series], + ); + const decision: CollectiveXDecisionSummary = useMemo( + () => data?.decision ?? collectiveXDecisionSummary(series), + [data?.decision, series], + ); const prefillFloor = useMemo(() => collectiveXPrefillFloor(series), [series]); useEffect(() => { if (series.length > 0) setActiveSeriesIds(new Set(series.map((item) => item.id))); }, [series]); + useEffect(() => { + const readHash = () => { + const rawHash = window.location.hash.replace('#', ''); + const tab = rawHash.startsWith('tab-') ? rawHash.slice(4) : rawHash; + if (COLLECTIVEX_TABS.some((item) => item.value === tab)) { + setActiveTab(tab as CollectiveXTab); + } + }; + readHash(); + window.addEventListener('hashchange', readHash); + return () => window.removeEventListener('hashchange', readHash); + }, []); + const routingOptions = useMemo(() => { const values = [...new Set(series.map((item) => item.shape.routingLabel))].toSorted((a, b) => { if (a === 'uniform') return -1; @@ -242,6 +310,20 @@ export default function CollectiveXDisplay() { return ['all', ...values]; }, [series]); + const modelShapeOptions = useMemo(() => { + const byShape = new Map(); + for (const item of series) { + const key = collectiveXShapeKey(item.shape); + if (!byShape.has(key)) byShape.set(key, formatModelShapeOption(item)); + } + const shapes = [...byShape.entries()].toSorted(([a], [b]) => { + if (a === HEADLINE_SHAPE_KEY) return -1; + if (b === HEADLINE_SHAPE_KEY) return 1; + return a.localeCompare(b, undefined, { numeric: true }); + }); + return [['all', 'All shapes'], ...shapes] as [string, string][]; + }, [series]); + const precisionOptions = useMemo[]>(() => { const baseValues = new Set(BASE_PRECISION_OPTIONS.map((option) => option.value)); const extraValues = [...new Set(series.map((item) => item.shape.dispatchDtype))] @@ -253,6 +335,16 @@ export default function CollectiveXDisplay() { ]; }, [series]); + const epOptions = useMemo[]>(() => { + const baseValues = new Set(BASE_EP_OPTIONS.map((option) => option.value)); + const extraValues = [...new Set(series.map((item) => item.epSize))] + .filter((value): value is number => value !== null) + .map(String) + .filter((value) => !baseValues.has(value)) + .toSorted((a, b) => Number(a) - Number(b)); + return [...BASE_EP_OPTIONS, ...extraValues.map((value) => ({ value, label: `EP${value}` }))]; + }, [series]); + const activationOptions = useMemo(() => { const values = [...new Set(series.map((item) => item.shape.activationProfile))].toSorted( (a, b) => { @@ -276,12 +368,28 @@ export default function CollectiveXDisplay() { setRouting(routingOptions.includes('uniform') ? 'uniform' : 'all'); }, [routing, routingOptions, series.length]); + useEffect(() => { + if (series.length === 0 || modelShape === 'all') return; + if (modelShapeOptions.some(([value]) => value === modelShape)) return; + setModelShape( + modelShapeOptions.some(([value]) => value === HEADLINE_SHAPE_KEY) + ? HEADLINE_SHAPE_KEY + : 'all', + ); + }, [modelShape, modelShapeOptions, series.length]); + useEffect(() => { if (series.length === 0 || precision === 'all') return; if (precisionOptions.some((option) => option.value === precision)) return; setPrecision('all'); }, [precision, precisionOptions, series.length]); + useEffect(() => { + if (series.length === 0 || epFilter === 'all') return; + if (epOptions.some((option) => option.value === epFilter)) return; + setEpFilter('all'); + }, [epFilter, epOptions, series.length]); + useEffect(() => { if (series.length === 0 || activation === 'all') return; if (activationOptions.includes(activation)) return; @@ -292,10 +400,12 @@ export default function CollectiveXDisplay() { () => series.filter( (item) => + (modelShape === 'all' || collectiveXShapeKey(item.shape) === modelShape) && + (epFilter === 'all' || String(item.epSize) === epFilter) && (precision === 'all' || item.shape.dispatchDtype === precision) && (activation === 'all' || item.shape.activationProfile === activation), ), - [activation, precision, series], + [activation, epFilter, modelShape, precision, series], ); const filteredSeries = useMemo( @@ -304,14 +414,36 @@ export default function CollectiveXDisplay() { (item) => (suite === 'all' || item.suite === suite) && (routing === 'all' || item.shape.routingLabel === routing) && - publicationMatches(item, publication), + publicationMatches(item, publication, modelShape), ), - [dimensionFilteredSeries, publication, routing, suite], + [dimensionFilteredSeries, modelShape, publication, routing, suite], ); const phaseSeries = useMemo( () => filteredSeries.filter((item) => item.phase === phase), [filteredSeries, phase], ); + + useEffect(() => { + if (series.length === 0 || phaseSeries.length > 0) return; + if (suite !== 'all') { + setSuite('all'); + return; + } + if (precision !== 'all') { + setPrecision('all'); + return; + } + if (epFilter !== 'all') { + setEpFilter('all'); + return; + } + if (publication === 'official-headline') { + setPublication('publishable'); + return; + } + if (publication !== 'all') setPublication('all'); + }, [epFilter, phaseSeries.length, precision, publication, series.length, suite]); + const activePhaseSeries = useMemo( () => phaseSeries.filter((item) => activeSeriesIds.has(item.id)), [activeSeriesIds, phaseSeries], @@ -322,9 +454,9 @@ export default function CollectiveXDisplay() { (item) => item.phase === phase && (suite === 'all' || item.suite === suite) && - publicationMatches(item, publication), + publicationMatches(item, publication, modelShape), ), - [dimensionFilteredSeries, phase, publication, suite], + [dimensionFilteredSeries, modelShape, phase, publication, suite], ); const scalingColorSeries = useMemo( () => @@ -340,8 +472,28 @@ export default function CollectiveXDisplay() { [dimensionFilteredSeries], ); const colorKeys = useMemo( - () => [...new Set([...filteredSeries, ...scalingColorSeries].map((item) => item.colorKey))], - [filteredSeries, scalingColorSeries], + () => [ + ...new Set( + [ + ...filteredSeries, + ...scalingColorSeries, + ...ncclSeries, + ...offloadSeries, + ...copyEngineSeries, + ...kvCacheSeries, + ...rlMeshSeries, + ].map((item) => item.colorKey), + ), + ], + [ + copyEngineSeries, + filteredSeries, + kvCacheSeries, + ncclSeries, + offloadSeries, + rlMeshSeries, + scalingColorSeries, + ], ); const { resolveColor, getCssColor } = useThemeColors({ highContrast, @@ -370,6 +522,8 @@ export default function CollectiveXDisplay() { ); const runs = useMemo(() => uniqueRuns(series), [series]); const hardwareCount = new Set(activePhaseSeries.map((item) => item.sku)).size; + const selectedModelLabel = + modelShapeOptions.find(([value]) => value === modelShape)?.[1] ?? modelShape; const overviewGroups = useMemo(() => { const availablePhases = PHASE_OPTIONS.map((option) => option.value).filter((candidate) => filteredSeries.some((item) => item.phase === candidate), @@ -411,6 +565,14 @@ export default function CollectiveXDisplay() { const eplbExample = series.find( (item) => item.eplbImbalanceBefore !== null && item.eplbImbalanceAfter !== null, ); + const allReduceSeries = useMemo( + () => ncclSeries.filter((item) => item.op === 'all_reduce'), + [ncclSeries], + ); + const allGatherSeries = useMemo( + () => ncclSeries.filter((item) => item.op === 'all_gather'), + [ncclSeries], + ); const legendItems = useMemo( () => @@ -423,13 +585,16 @@ export default function CollectiveXDisplay() { ) .map((item) => ({ name: item.id, - label: displaySeriesLabel(item), + label: + modelShape === 'all' + ? `[${item.model}] ${displaySeriesLabel(item)}` + : displaySeriesLabel(item), color: colors[item.colorKey] ?? 'var(--muted-foreground)', isActive: activeSeriesIds.has(item.id), - title: `${item.publicationStatus} · ${item.shape.routingLabel} · ${item.topologyClass} · ${item.measurementContract} · ${formatActivation(item.shape.activationProfile)} activation`, + title: `${item.model} · ${item.publicationStatus} · ${item.shape.routingLabel} · ${item.topologyClass} · ${item.measurementContract} · ${formatActivation(item.shape.activationProfile)} activation`, onClick: () => toggleSeries(item.id), })), - [activeSeriesIds, colors, phaseSeries, toggleSeries], + [activeSeriesIds, colors, modelShape, phaseSeries, toggleSeries], ); const handleRefresh = useCallback(() => { @@ -437,6 +602,15 @@ export default function CollectiveXDisplay() { void refetch(); }, [refetch]); + const handleTabChange = useCallback((value: string) => { + const tab = value as CollectiveXTab; + setActiveTab(tab); + track('collectivex_tab_changed', { tab }); + if (typeof window !== 'undefined') { + window.history.replaceState(null, '', `#tab-${tab}`); + } + }, []); + if (isLoading) { return ( @@ -559,6 +733,26 @@ export default function CollectiveXDisplay() { Chart

+ + +
+ + { + setEpFilter(value); + track('collectivex_ep_degree_changed', { ep: value }); + }} + ariaLabel="CollectiveX EP degree" + testId="collectivex-ep-toggle" + className="flex-wrap" + /> +
+ { - setXAxis(value as CollectiveXXAxis); - track('collectivex_x_axis_changed', { axis: value }); + setSuite(value); + track('collectivex_suite_changed', { suite: value }); }} - > - - - - - Source tokens / rank - Global source tokens - - + ariaLabel="CollectiveX comparison suite" + testId="collectivex-suite-toggle" + className="flex-wrap" + /> - + - - { - setXScaleType(value); - track('collectivex_x_scale_changed', { scale: value }); - }} - ariaLabel="CollectiveX x scale" - testId="collectivex-x-scale-toggle" - /> - - - { - setYScaleType(value); - track('collectivex_y_scale_changed', { scale: value }); - }} - ariaLabel="CollectiveX y scale" - testId="collectivex-y-scale-toggle" - /> - -
-
- -
-

- Filters -

-

- Precision is the dispatch dtype. Activation profile is an independent benchmark - dimension. -

-
-
- +
+
@@ -897,31 +892,6 @@ export default function CollectiveXDisplay() { className="flex-wrap" />
-
- - - -
- +
+ + { + setPublication(value); + track('collectivex_publication_changed', { publication: value }); + }} + ariaLabel="CollectiveX publication status" + testId="collectivex-publication-toggle" + className="flex-wrap" + /> + +
+ + + + + + + { - setSuite(value); - track('collectivex_suite_changed', { suite: value }); + setXScaleType(value); + track('collectivex_x_scale_changed', { scale: value }); }} - ariaLabel="CollectiveX comparison suite" - testId="collectivex-suite-toggle" - className="flex-wrap" + ariaLabel="CollectiveX x scale" + testId="collectivex-x-scale-toggle" /> - + + + + { - setPublication(value); - track('collectivex_publication_changed', { publication: value }); + setYScaleType(value); + track('collectivex_y_scale_changed', { scale: value }); }} - ariaLabel="CollectiveX publication status" - testId="collectivex-publication-toggle" - className="flex-wrap" + ariaLabel="CollectiveX y scale" + testId="collectivex-y-scale-toggle" />
@@ -1005,6 +1043,7 @@ export default function CollectiveXDisplay() {

{OPERATION_LABELS[operation]} · {phase} · {percentile} {modelShape === 'all' ? ' · all shapes' : ` · ${selectedModelLabel}`} + {backendFilter === 'all' ? '' : ` · ${backendFilter}`} {precision === 'all' ? '' : ` · ${precision.toUpperCase()}`} {epFilter === 'all' ? '' : ` · EP${epFilter}`} {activation === 'all' ? '' : ` · ${formatActivation(activation)} activation`} @@ -1125,8 +1164,8 @@ export default function CollectiveXDisplay() { {overviewGroups.length === 0 ? (

- No latency panels match the current precision, activation, suite, routing, and - publication filters. + No latency panels match the current backend, precision, activation, suite, + routing, and publication filters.

) : ( @@ -1179,8 +1218,8 @@ export default function CollectiveXDisplay() {

Scaling

Strong and weak scaling are distinct experiments with separately labeled fixed-work - contracts. Precision and activation filters apply; a chart appears once a SKU has - matched measurements at two EP degrees. + contracts. Backend, precision, and activation filters apply; a chart appears once a + SKU has matched measurements at two EP degrees.

@@ -1192,8 +1231,8 @@ export default function CollectiveXDisplay() {

Heatmaps

Dispatch p50 across EP, routing, and resource dimensions for the current phase, - precision, activation, suite, and publication filters. The routing selector is - intentionally not applied here. + backend, precision, activation, suite, and publication filters. The routing selector + is intentionally not applied here.

diff --git a/packages/app/src/components/collectivex/data.test.ts b/packages/app/src/components/collectivex/data.test.ts index 3a5e60b1..ef95d335 100644 --- a/packages/app/src/components/collectivex/data.test.ts +++ b/packages/app/src/components/collectivex/data.test.ts @@ -6,6 +6,7 @@ import { collectiveXHeatmapCells, collectiveXPrefillFloor, collectiveXScalingPoints, + collectiveXSeriesLabel, comparisonDifferences, distributionSensitivity, metricValue, @@ -188,6 +189,32 @@ describe('normalizeCollectiveXDocument', () => { }); }); + it('labels DeepEP v2 as a distinct backend generation', () => { + const baseShape = rawDocument().shape as Record; + const v1 = rawDocument({ + backend: 'deepep', + backend_provenance: { deepep_version: '1.2.1' }, + }); + const v2 = rawDocument({ + backend: 'deepep', + backend_provenance: { deepep_version: '2.0.0+af9a040' }, + shape: { + ...baseShape, + kernel_gen: 'v2', + }, + }); + const series = normalized(v2); + + expect(series.label).toContain('deepep v2'); + expect(collectiveXSeriesLabel({ ...series, label: 'MI355X EP8 · deepep · bf16' })).toContain( + 'deepep v2', + ); + expect( + chartPoints([series], 'dispatch', 'p99', 'tokens-per-rank', 'latency')[0]?.seriesLabel, + ).toContain('deepep v2'); + expect(collectiveXConfigIdentity(v1)).not.toBe(collectiveXConfigIdentity(v2)); + }); + it('supports legacy flat rows without mislabeling the isolated sum as measured', () => { const series = normalized({ schema_version: 1, diff --git a/packages/app/src/components/collectivex/data.ts b/packages/app/src/components/collectivex/data.ts index a484e59b..af0125a2 100644 --- a/packages/app/src/components/collectivex/data.ts +++ b/packages/app/src/components/collectivex/data.ts @@ -174,6 +174,27 @@ function backendVersion(raw: Record): string | null { ); } +function backendKernelGeneration(backend: string, version: string | null): string { + if (backend !== 'deepep') return 'n-a'; + if (version && (/^2(?:\.|$)/u.test(version) || /\bv2\b/iu.test(version))) return 'v2'; + return 'v1'; +} + +export function collectiveXBackendLabel(backend: string, version: string | null): string { + return backendKernelGeneration(backend, version) === 'v2' ? `${backend} v2` : backend; +} + +export function collectiveXSeriesLabel( + series: Pick, +): string { + const backendLabel = collectiveXBackendLabel(series.backend, series.backendVersion); + if (backendLabel === series.backend) return series.label; + + const backendSegment = `· ${series.backend} ·`; + if (!series.label.includes(backendSegment)) return series.label; + return series.label.replace(backendSegment, `· ${backendLabel} ·`); +} + function stableHash(value: string): string { let hash = 2166136261; for (const character of value) { @@ -236,6 +257,10 @@ function rawConfig(raw: Record) { const routingIdentity = isRecord(raw.routing_identity) ? raw.routing_identity : {}; const eplb = isRecord(raw.eplb) ? raw.eplb : {}; const placement = isRecord(raw.placement) ? raw.placement : {}; + const backend = stringValue(raw.backend, 'unknown'); + const version = backendVersion(raw); + const kernelGeneration = + stringValue(shape.kernel_gen) || backendKernelGeneration(backend, version); const phase: CollectiveXPhase | null = raw.phase === 'decode' || raw.phase === 'prefill' ? raw.phase : null; const routing = stringValue(shape.routing, 'unknown'); @@ -252,7 +277,9 @@ function rawConfig(raw: Record) { return { runner, sku: skuFromRunner(runner), - backend: stringValue(raw.backend, 'unknown'), + backend, + backendVersion: version, + backendKernelGeneration: kernelGeneration, phase, mode: stringValue(raw.mode, 'normal'), resourceMode: stringValue(raw.resource_mode) || 'tuned', @@ -307,6 +334,7 @@ export function collectiveXConfigIdentity(raw: Record): string return [ config.sku, config.backend, + config.backendKernelGeneration, config.hidden ?? '', config.topk ?? '', config.experts ?? '', @@ -331,6 +359,7 @@ function colorKey(config: ReturnType): string { [ config.sku, config.backend, + config.backendKernelGeneration, config.dispatchDtype, config.mode, config.resourceMode, @@ -349,9 +378,10 @@ function buildLabel(config: ReturnType): string { config.measurementContract === 'cached-layout-comm-only-v1' ? '[cl]' : '', ].filter(Boolean); const routing = config.routingLabel === 'uniform' ? '' : ` · ${config.routingLabel}`; - return `${config.sku.toUpperCase()} EP${config.epSize ?? '?'} · ${config.backend} · ${ - config.dispatchDtype - }${suffixes.length > 0 ? ` ${suffixes.join(' ')}` : ''}${routing}`; + return `${config.sku.toUpperCase()} EP${config.epSize ?? '?'} · ${collectiveXBackendLabel( + config.backend, + config.backendVersion, + )} · ${config.dispatchDtype}${suffixes.length > 0 ? ` ${suffixes.join(' ')}` : ''}${routing}`; } export function normalizeCollectiveXDocument( @@ -432,7 +462,7 @@ export function normalizeCollectiveXDocument( workloadSource: config.workloadSource, eplbImbalanceBefore: config.eplbImbalanceBefore, eplbImbalanceAfter: config.eplbImbalanceAfter, - backendVersion: backendVersion(raw), + backendVersion: config.backendVersion, imageDigest: config.imageDigest, repository: config.repository, run: runSource(raw, generatedAt, context), @@ -1137,7 +1167,7 @@ export function chartPoints( .filter((row) => item.phase !== 'prefill' || row.tokensPerRank >= prefillFloor) .map((row) => ({ seriesId: item.id, - seriesLabel: item.label, + seriesLabel: collectiveXSeriesLabel(item), colorKey: item.colorKey, x: xAxis === 'tokens-per-rank' ? row.tokensPerRank : row.globalTokens, y: metricValue(row, operation, percentileKey, yAxis), @@ -1157,6 +1187,9 @@ export function comparisonDifferences(series: CollectiveXSeries[]): string[] { new Set(series.map(getValue)).size > 1; if (different((item) => item.topologyClass)) warnings.push('topology'); + if (different((item) => collectiveXBackendLabel(item.backend, item.backendVersion))) { + warnings.push('backend generation'); + } if (different((item) => item.epSize)) warnings.push('EP degree'); if (different((item) => item.shape.dispatchDtype)) warnings.push('dispatch dtype'); if (different((item) => item.mode)) warnings.push('kernel mode'); From 0a91b6d8581250da17d571de8f738cabd09f1074 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Tue, 30 Jun 2026 09:24:10 +0800 Subject: [PATCH 08/23] fix: ingest CollectiveX aggregate runs --- packages/app/public/data/collectivex.json | 137556 +++------------ .../collectivex/CollectiveXDisplay.tsx | 4 +- .../src/components/collectivex/data.test.ts | 1 + .../app/src/components/collectivex/data.ts | 28 +- .../app/src/components/collectivex/types.ts | 1 + .../app/src/lib/collectivex-snapshot.test.ts | 53 +- packages/app/src/lib/collectivex-snapshot.ts | 57 +- 7 files changed, 24205 insertions(+), 113495 deletions(-) diff --git a/packages/app/public/data/collectivex.json b/packages/app/public/data/collectivex.json index 2b44f424..20838d6e 100644 --- a/packages/app/public/data/collectivex.json +++ b/packages/app/public/data/collectivex.json @@ -2,28 +2,28 @@ "snapshotVersion": 3, "series": [ { - "id": "cx-0eafa1d5", - "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "b300_c9569580", - "comparisonKey": "62e1e2299cdc509d", + "id": "cx-5d8b357a", + "identity": "gb200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "fa3808de096d4a7a", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:16.179311+00:00", + "generatedAt": "2026-06-29T14:00:04.159261+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "GB200 EP8 · deepep · bf16", "model": "Qwen3.5", "shape": { "hidden": 4096, @@ -35,14 +35,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -50,59 +51,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287508460", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460", - "createdAt": "2026-06-27T11:14:16.179311+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.992001831531525, - "p90": 59.328000992536545, - "p95": 62.55999952554703, - "p99": 80.38400113582611 + "p50": 88.70399743318558, + "p90": 319.4560110569, + "p95": 368.3199882507324, + "p99": 390.56000113487244 }, "combine": { - "p50": 55.00800162553787, - "p90": 57.0559985935688, - "p95": 64.41599875688553, - "p99": 65.92000275850296 + "p50": 70.8480030298233, + "p90": 330.49601316452026, + "p95": 350.3040075302124, + "p99": 363.77599835395813 }, "roundtrip": { - "p50": 94.81599926948547, - "p90": 97.63199836015701, - "p95": 99.04000163078308, - "p99": 108.0000028014183 + "p50": 136.1279934644699, + "p90": 390.3680145740509, + "p95": 427.16801166534424, + "p99": 443.1680142879486 }, "isolatedSum": { - "p50": 112.0000034570694, - "p90": 116.38399958610535, - "p95": 126.97599828243256, - "p99": 146.30400389432907 + "p50": 159.55200046300888, + "p90": 649.9520242214203, + "p95": 718.6239957809448, + "p99": 754.3359994888306 }, "roundtripMeasured": true, "dispatchLogicalBytes": 344064, "combineLogicalBytes": 344064, "fanoutMean": 5.25, "recvTokensMax": 6, - "stragglerRank": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -111,35 +112,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.89600110054016, - "p90": 59.039998799562454, - "p95": 61.15199998021126, - "p99": 82.04799890518188 + "p50": 88.70399743318558, + "p90": 328.8320004940033, + "p95": 365.59998989105225, + "p99": 387.2320055961609 }, "combine": { - "p50": 55.67999929189682, - "p90": 58.400001376867294, - "p95": 64.67200070619583, - "p99": 76.67200267314911 + "p50": 71.42399996519089, + "p90": 338.6879861354828, + "p95": 352.7039885520935, + "p99": 362.91199922561646 }, "roundtrip": { - "p50": 95.16800194978714, - "p90": 98.11200201511383, - "p95": 100.67199915647507, - "p99": 112.03200370073318 + "p50": 137.66400516033173, + "p90": 396.09599113464355, + "p95": 427.45599150657654, + "p99": 439.8399889469147 }, "isolatedSum": { - "p50": 112.57600039243698, - "p90": 117.44000017642975, - "p95": 125.82400068640709, - "p99": 158.720001578331 + "p50": 160.12799739837646, + "p90": 667.5199866294861, + "p95": 718.3039784431458, + "p99": 750.1440048217773 }, "roundtripMeasured": true, "dispatchLogicalBytes": 704512, "combineLogicalBytes": 704512, "fanoutMean": 5.375, "recvTokensMax": 12, - "stragglerRank": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -148,35 +149,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.21599981188774, - "p90": 59.74400043487549, - "p95": 61.664000153541565, - "p99": 77.18399912118912 + "p50": 90.36800265312195, + "p90": 332.0640027523041, + "p95": 372.1280097961426, + "p99": 386.49600744247437 }, "combine": { - "p50": 56.063998490571976, - "p90": 58.14399942755699, - "p95": 64.92800265550613, - "p99": 78.68800312280655 + "p50": 72.57600128650665, + "p90": 318.91199946403503, + "p95": 351.3279855251312, + "p99": 366.62399768829346 }, "roundtrip": { - "p50": 95.74399888515472, - "p90": 98.78399968147278, - "p95": 103.26399654150009, - "p99": 113.0559965968132 + "p50": 138.5599970817566, + "p90": 397.11999893188477, + "p95": 425.9839951992035, + "p99": 444.9920058250427 }, "isolatedSum": { - "p50": 113.27999830245972, - "p90": 117.88799986243248, - "p95": 126.5920028090477, - "p99": 155.87200224399567 + "p50": 162.9440039396286, + "p90": 650.9760022163391, + "p95": 723.4559953212738, + "p99": 753.1200051307678 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1384448, "combineLogicalBytes": 1384448, "fanoutMean": 5.28125, "recvTokensMax": 26, - "stragglerRank": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -185,35 +186,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.079998940229416, - "p90": 61.08799949288368, - "p95": 62.65600025653839, - "p99": 71.68000191450119 + "p50": 89.9519994854927, + "p90": 120.86399644613266, + "p95": 360.4480028152466, + "p99": 390.8480107784271 }, "combine": { - "p50": 64.44799900054932, - "p90": 66.23999774456024, - "p95": 66.59200042486191, - "p99": 69.023996591568 + "p50": 73.69600236415863, + "p90": 326.1120021343231, + "p95": 354.0799915790558, + "p99": 365.1520013809204 }, "roundtrip": { - "p50": 108.8000014424324, - "p90": 113.95200341939926, - "p95": 114.84800279140472, - "p99": 122.72000312805176 + "p50": 140.86399972438812, + "p90": 409.59998965263367, + "p95": 438.04800510406494, + "p99": 451.9999921321869 }, "isolatedSum": { - "p50": 122.52799794077873, - "p90": 127.32799723744392, - "p95": 129.2480006814003, - "p99": 140.70399850606918 + "p50": 163.64800184965134, + "p90": 446.9759985804558, + "p95": 714.5279943943024, + "p99": 756.0000121593475 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2744320, "combineLogicalBytes": 2744320, "fanoutMean": 5.234375, "recvTokensMax": 49, - "stragglerRank": 4, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -222,35 +223,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 58.687999844551086, - "p90": 61.055999249219894, - "p95": 63.00800293684006, - "p99": 71.96799665689468 + "p50": 91.80799871683121, + "p90": 332.70400762557983, + "p95": 379.35999035835266, + "p99": 392.0319974422455 }, "combine": { - "p50": 57.82400071620941, - "p90": 66.3679987192154, - "p95": 66.81600213050842, - "p99": 77.98399776220322 + "p50": 75.71200281381607, + "p90": 338.9120101928711, + "p95": 355.3279936313629, + "p99": 367.0080006122589 }, "roundtrip": { - "p50": 111.39199882745743, - "p90": 122.04799801111221, - "p95": 126.5919953584671, - "p99": 132.86399841308594 + "p50": 142.30400323867798, + "p90": 393.887996673584, + "p95": 428.8960099220276, + "p99": 445.95199823379517 }, "isolatedSum": { - "p50": 116.5120005607605, - "p90": 127.42399796843529, - "p95": 129.82400506734848, - "p99": 149.9519944190979 + "p50": 167.52000153064728, + "p90": 671.6160178184509, + "p95": 734.6879839897156, + "p99": 759.0399980545044 }, "roundtripMeasured": true, "dispatchLogicalBytes": 5464064, "combineLogicalBytes": 5464064, "fanoutMean": 5.2109375, "recvTokensMax": 94, - "stragglerRank": 4, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -259,35 +260,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.8480030298233, - "p90": 74.68800246715546, - "p95": 75.71200281381607, - "p99": 81.31200075149536 + "p50": 98.04800152778625, + "p90": 367.74399876594543, + "p95": 386.30399107933044, + "p99": 400.38400888442993 }, "combine": { - "p50": 66.30399823188782, - "p90": 67.07199662923813, - "p95": 67.71200150251389, - "p99": 77.15199887752533 + "p50": 80.92799782752991, + "p90": 349.727988243103, + "p95": 362.8480136394501, + "p99": 374.752014875412 }, "roundtrip": { - "p50": 108.99200290441513, - "p90": 114.07999694347382, - "p95": 116.7680025100708, - "p99": 132.47999548912048 + "p50": 146.43199741840363, + "p90": 423.2639968395233, + "p95": 440.12799859046936, + "p99": 459.6799910068512 }, "isolatedSum": { - "p50": 137.15200126171112, - "p90": 141.75999909639359, - "p95": 143.42400431632996, - "p99": 158.4639996290207 + "p50": 178.97599935531616, + "p90": 717.4719870090485, + "p95": 749.1520047187805, + "p99": 775.1360237598419 }, "roundtripMeasured": true, "dispatchLogicalBytes": 11124736, "combineLogicalBytes": 11124736, "fanoutMean": 5.3046875, "recvTokensMax": 186, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -296,35 +297,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 70.72000205516815, - "p90": 72.95999675989151, - "p95": 74.8480036854744, - "p99": 81.02399855852127 + "p50": 101.47199779748917, + "p90": 357.12000727653503, + "p95": 386.24000549316406, + "p99": 410.3359878063202 }, "combine": { - "p50": 78.75200361013412, - "p90": 79.55200225114822, - "p95": 80.19199967384338, - "p99": 95.96800059080124 + "p50": 91.39200299978256, + "p90": 105.8880016207695, + "p95": 340.4479920864105, + "p99": 377.3120045661926 }, "roundtrip": { - "p50": 131.77600502967834, - "p90": 136.63999736309052, - "p95": 138.91200721263885, - "p99": 158.04800391197205 + "p50": 165.56799411773682, + "p90": 411.9360148906708, + "p95": 429.4399917125702, + "p99": 461.60000562667847 }, "isolatedSum": { - "p50": 149.47200566530228, - "p90": 152.51199901103973, - "p95": 155.04000335931778, - "p99": 176.9919991493225 + "p50": 192.86400079727173, + "p90": 463.00800889730453, + "p95": 726.6879975795746, + "p99": 787.6479923725128 }, "roundtripMeasured": true, "dispatchLogicalBytes": 22192128, "combineLogicalBytes": 22192128, "fanoutMean": 5.291015625, "recvTokensMax": 358, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -333,35 +334,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 82.5280025601387, - "p90": 85.21600067615509, - "p95": 88.16000074148178, - "p99": 100.80000013113022 + "p50": 112.2559979557991, + "p90": 357.2160005569458, + "p95": 379.61599230766296, + "p99": 414.36800360679626 }, "combine": { - "p50": 91.77599847316742, - "p90": 94.59199756383896, - "p95": 101.72799974679947, - "p99": 104.92800176143646 + "p50": 107.90400207042694, + "p90": 344.4159924983978, + "p95": 355.103999376297, + "p99": 397.5360095500946 }, "roundtrip": { - "p50": 157.53600001335144, - "p90": 165.24800658226013, - "p95": 166.97600483894348, - "p99": 184.76800620555878 + "p50": 191.8720006942749, + "p90": 220.89600563049316, + "p95": 477.08800435066223, + "p99": 499.55201148986816 }, "isolatedSum": { - "p50": 174.30400103330612, - "p90": 179.80799823999405, - "p95": 189.88800048828125, - "p99": 205.72800189256668 + "p50": 220.16000002622604, + "p90": 701.6319930553436, + "p95": 734.71999168396, + "p99": 811.9040131568909 }, "roundtripMeasured": true, "dispatchLogicalBytes": 44564480, "combineLogicalBytes": 44564480, "fanoutMean": 5.3125, "recvTokensMax": 699, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -369,16 +370,16 @@ ] }, { - "id": "cx-73ede381", - "identity": "b300|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "b300_307ed708", - "comparisonKey": "29583b2aa22167e0", + "id": "cx-b83f938d", + "identity": "gb200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "ad961b604b617551", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:53.146142+00:00", + "generatedAt": "2026-06-29T14:01:52.765724+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_04", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -386,30 +387,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "Qwen3.5", + "label": "GB200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 4096, + "hidden": 5120, "topk": 8, - "experts": 128, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -417,59 +419,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285698979", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285698979", - "createdAt": "2026-06-27T09:51:53.146142+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 55.776000022888184, - "p90": 58.14399942755699, - "p95": 59.93599817156792, - "p99": 65.95200300216675 + "p50": 97.50399738550186, + "p90": 341.0240113735199, + "p95": 371.2959885597229, + "p99": 406.17600083351135 }, "combine": { - "p50": 54.71999943256378, - "p90": 56.063998490571976, - "p95": 57.151999324560165, - "p99": 65.69600105285645 + "p50": 74.91199672222137, + "p90": 330.9760093688965, + "p95": 353.7600040435791, + "p99": 368.4479892253876 }, "roundtrip": { - "p50": 93.31200271844864, - "p90": 95.96800059080124, - "p95": 98.01600128412247, - "p99": 104.86400127410889 + "p50": 148.41599762439728, + "p90": 393.44000816345215, + "p95": 424.3839979171753, + "p99": 458.71999859809875 }, "isolatedSum": { - "p50": 110.49599945545197, - "p90": 114.20799791812897, - "p95": 117.08799749612808, - "p99": 131.6480040550232 + "p50": 172.41599410772324, + "p90": 672.0000207424164, + "p95": 725.055992603302, + "p99": 774.6239900588989 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 4, + "recvTokensMax": 8, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -478,35 +480,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.09599873423576, - "p90": 57.792000472545624, - "p95": 59.55199897289276, - "p99": 68.15999746322632 + "p50": 96.44799679517746, + "p90": 332.5439989566803, + "p95": 372.79999256134033, + "p99": 399.07199144363403 }, "combine": { - "p50": 55.07199838757515, - "p90": 56.671999394893646, - "p95": 57.28000029921532, - "p99": 65.2799978852272 + "p50": 75.29599964618683, + "p90": 321.3120102882385, + "p95": 352.3840010166168, + "p99": 365.2479946613312 }, "roundtrip": { - "p50": 95.39200365543365, - "p90": 101.79200023412704, - "p95": 102.55999863147736, - "p99": 108.0000028014183 + "p50": 148.95999431610107, + "p90": 393.3440148830414, + "p95": 429.31199073791504, + "p99": 453.72799038887024 }, "isolatedSum": { - "p50": 111.16799712181091, - "p90": 114.46399986743927, - "p95": 116.83199927210808, - "p99": 133.43999534845352 + "p50": 171.7439964413643, + "p90": 653.8560092449188, + "p95": 725.1839935779572, + "p99": 764.3199861049652 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 4, + "recvTokensMax": 13, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -515,35 +517,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 56.0000017285347, - "p90": 57.88800120353699, - "p95": 59.67999994754791, - "p99": 68.7360018491745 + "p50": 98.04800152778625, + "p90": 332.73598551750183, + "p95": 369.951993227005, + "p99": 394.9120044708252 }, "combine": { - "p50": 56.12799897789955, - "p90": 65.37599861621857, - "p95": 65.72800129652023, - "p99": 66.97600334882736 + "p50": 80.89599758386612, + "p90": 328.7999927997589, + "p95": 354.11199927330017, + "p99": 368.51200461387634 }, "roundtrip": { - "p50": 105.18400371074677, - "p90": 111.10399663448334, - "p95": 112.2559979557991, - "p99": 115.10399729013443 + "p50": 152.8320014476776, + "p90": 396.4479863643646, + "p95": 431.0719966888428, + "p99": 465.0239944458008 }, "isolatedSum": { - "p50": 112.12800070643425, - "p90": 123.26399981975555, - "p95": 125.40800124406815, - "p99": 135.71200519800186 + "p50": 178.94399911165237, + "p90": 661.5359783172607, + "p95": 724.0639925003052, + "p99": 763.4240090847015 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -552,35 +554,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 56.86400085687637, - "p90": 58.88000130653381, - "p95": 60.54399907588959, - "p99": 67.6800012588501 + "p50": 99.5199978351593, + "p90": 336.89600229263306, + "p95": 383.4879994392395, + "p99": 412.86399960517883 }, "combine": { - "p50": 65.24799764156342, - "p90": 66.46399945020676, - "p95": 66.81600213050842, - "p99": 70.62400132417679 + "p50": 82.2720006108284, + "p90": 324.95999336242676, + "p95": 354.5919954776764, + "p99": 378.04800271987915 }, "roundtrip": { - "p50": 105.79200088977814, - "p90": 112.35199868679047, - "p95": 112.83200234174728, - "p99": 116.48000031709671 + "p50": 154.7199934720993, + "p90": 400.89601278305054, + "p95": 427.19998955726624, + "p99": 459.52001214027405 }, "isolatedSum": { - "p50": 122.11199849843979, - "p90": 125.34400075674057, - "p95": 127.36000120639801, - "p99": 138.3040025830269 + "p50": 181.7919984459877, + "p90": 661.8559956550598, + "p95": 738.0799949169159, + "p99": 790.912002325058 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 4, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -589,35 +591,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 58.6559996008873, - "p90": 60.5119988322258, - "p95": 63.00800293684006, - "p99": 79.0719985961914 + "p50": 100.89600086212158, + "p90": 353.2480001449585, + "p95": 383.2640051841736, + "p99": 401.98400616645813 }, "combine": { - "p50": 65.31199812889099, - "p90": 66.43199920654297, - "p95": 66.97600334882736, - "p99": 69.40799951553345 + "p50": 83.3280012011528, + "p90": 337.95198798179626, + "p95": 358.0160140991211, + "p99": 374.81600046157837 }, "roundtrip": { - "p50": 105.85600137710571, - "p90": 107.87200182676315, - "p95": 109.66400057077408, - "p99": 115.64800143241882 + "p50": 154.36799824237823, + "p90": 388.0000114440918, + "p95": 423.74399304389954, + "p99": 458.49600434303284 }, "isolatedSum": { - "p50": 123.96799772977829, - "p90": 126.94399803876877, - "p95": 129.98400628566742, - "p99": 148.47999811172485 + "p50": 184.22400206327438, + "p90": 691.1999881267548, + "p95": 741.2800192832947, + "p99": 776.8000066280365 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 4, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -626,34 +628,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 72.80000299215317, - "p90": 75.83999633789062, - "p95": 76.60800218582153, - "p99": 84.63999629020691 + "p50": 101.95200145244598, + "p90": 131.3599944114685, + "p95": 373.50401282310486, + "p99": 395.9679901599884 }, "combine": { - "p50": 66.27199798822403, - "p90": 67.03999638557434, - "p95": 67.55200028419495, - "p99": 69.47200000286102 + "p50": 85.88799834251404, + "p90": 331.743985414505, + "p95": 361.88799142837524, + "p99": 376.70400738716125 }, "roundtrip": { - "p50": 109.24799740314484, - "p90": 115.32799899578094, - "p95": 116.57600104808807, - "p99": 131.32800161838531 + "p50": 157.47199952602386, + "p90": 396.09599113464355, + "p95": 433.75998735427856, + "p99": 462.14398741722107 }, "isolatedSum": { - "p50": 139.0720009803772, - "p90": 142.87999272346497, - "p95": 144.16000247001648, - "p99": 154.11199629306793 + "p50": 187.83999979496002, + "p90": 463.1039798259735, + "p95": 735.3920042514801, + "p99": 772.6719975471497 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -663,35 +665,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 69.66400146484375, - "p90": 71.9040036201477, - "p95": 73.05599749088287, - "p99": 79.64800298213959 + "p50": 110.43199896812439, + "p90": 345.66399455070496, + "p95": 376.800000667572, + "p99": 412.447988986969 }, "combine": { - "p50": 78.65600287914276, - "p90": 79.68000322580338, - "p95": 80.1599994301796, - "p99": 89.59999680519104 + "p50": 98.39999675750732, + "p90": 331.13598823547363, + "p95": 355.4239869117737, + "p99": 381.9519877433777 }, "roundtrip": { - "p50": 130.8480054140091, - "p90": 134.33599472045898, - "p95": 137.92000710964203, - "p99": 152.12799608707428 + "p50": 177.66399681568146, + "p90": 206.2399983406067, + "p95": 431.7440092563629, + "p99": 459.55199003219604 }, "isolatedSum": { - "p50": 148.3200043439865, - "p90": 151.58400684595108, - "p95": 153.21599692106247, - "p99": 169.24799978733063 + "p50": 208.8319957256317, + "p90": 676.7999827861786, + "p95": 732.2239875793457, + "p99": 794.3999767303467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 4, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -700,35 +702,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 82.30400085449219, - "p90": 86.14400029182434, - "p95": 90.7519981265068, - "p99": 98.59199821949005 + "p50": 124.28800016641617, + "p90": 366.6880130767822, + "p95": 396.09599113464355, + "p99": 425.53600668907166 }, "combine": { - "p50": 91.90399944782257, - "p90": 95.0080007314682, - "p95": 101.6639992594719, - "p99": 102.52799838781357 + "p50": 118.01599711179733, + "p90": 124.38400089740753, + "p95": 131.96800649166107, + "p99": 388.9920115470886 }, "roundtrip": { - "p50": 166.81599617004395, - "p90": 173.88799786567688, - "p95": 175.32800137996674, - "p99": 189.4720047712326 + "p50": 212.19199895858765, + "p90": 483.8719964027405, + "p95": 501.21599435806274, + "p99": 530.3040146827698 }, "isolatedSum": { - "p50": 174.20800030231476, - "p90": 181.15200102329254, - "p95": 192.4159973859787, - "p99": 201.11999660730362 + "p50": 242.3039972782135, + "p90": 491.07201397418976, + "p95": 528.0639976263046, + "p99": 814.5280182361603 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, "recvTokensMax": 699, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -736,16 +738,16 @@ ] }, { - "id": "cx-b2b86614", - "identity": "b300|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "b300_307ed708", - "comparisonKey": "246ad32f5ce8e310", + "id": "cx-f3f399c1", + "identity": "gb200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "7c7859f7d3b18eaf", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:24.032758+00:00", + "generatedAt": "2026-06-29T14:03:42.050997+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -753,30 +755,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "shape 5120/8/160", + "label": "GB200 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 5120, + "hidden": 6144, "topk": 8, - "experts": 160, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -784,59 +787,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285710659", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285710659", - "createdAt": "2026-06-27T09:52:24.032758+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.03199824690819, - "p90": 58.75200033187866, - "p95": 61.69600039720535, - "p99": 71.03999704122543 + "p50": 96.8639999628067, + "p90": 317.1199858188629, + "p95": 366.4959967136383, + "p99": 397.8239893913269 }, "combine": { - "p50": 55.424001067876816, - "p90": 57.5999990105629, - "p95": 64.7680014371872, - "p99": 65.5359998345375 + "p50": 80.57600259780884, + "p90": 331.167995929718, + "p95": 355.6160032749176, + "p99": 375.16799569129944 }, "roundtrip": { - "p50": 94.59199756383896, - "p90": 97.85600006580353, - "p95": 101.85600072145462, - "p99": 125.15200674533844 + "p50": 152.16000378131866, + "p90": 408.25599431991577, + "p95": 434.112012386322, + "p99": 456.31998777389526 }, "isolatedSum": { - "p50": 111.455999314785, - "p90": 116.35199934244156, - "p95": 126.46400183439255, - "p99": 136.57599687576294 + "p50": 177.44000256061554, + "p90": 648.2879817485809, + "p95": 722.1119999885559, + "p99": 772.9919850826263 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 430080, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -845,35 +848,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 55.93600124120712, - "p90": 57.88800120353699, - "p95": 59.74400043487549, - "p99": 67.26399809122086 + "p50": 97.98400104045868, + "p90": 340.89601039886475, + "p95": 378.6880075931549, + "p99": 401.08799934387207 }, "combine": { - "p50": 56.28800019621849, - "p90": 65.63200056552887, - "p95": 66.17599725723267, - "p99": 76.60800218582153 + "p50": 82.46400207281113, + "p90": 326.84800028800964, + "p95": 361.08800768852234, + "p99": 372.9279935359955 }, "roundtrip": { - "p50": 104.09600287675858, - "p90": 111.10399663448334, - "p95": 112.12799698114395, - "p99": 116.95999652147293 + "p50": 152.8320014476776, + "p90": 410.3679955005646, + "p95": 445.279985666275, + "p99": 479.2320132255554 }, "isolatedSum": { - "p50": 112.22400143742561, - "p90": 123.52000176906586, - "p95": 125.91999769210815, - "p99": 143.8720002770424 + "p50": 180.4480031132698, + "p90": 667.7440106868744, + "p95": 739.7760152816772, + "p99": 774.0159928798676 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 880640, - "combineLogicalBytes": 880640, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -882,35 +885,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 55.93600124120712, - "p90": 57.920001447200775, - "p95": 59.10399928689003, - "p99": 65.92000275850296 + "p50": 97.95200079679489, + "p90": 333.8559865951538, + "p95": 377.21601128578186, + "p99": 393.5680091381073 }, "combine": { - "p50": 65.37599861621857, - "p90": 66.39999896287918, - "p95": 66.52799993753433, - "p99": 69.72800195217133 + "p50": 83.26400071382523, + "p90": 325.98400115966797, + "p95": 355.16801476478577, + "p99": 373.4720051288605 }, "roundtrip": { - "p50": 105.85600137710571, - "p90": 112.86400258541107, - "p95": 113.72800171375275, - "p99": 131.42399489879608 + "p50": 154.08000349998474, + "p90": 397.98399806022644, + "p95": 425.85599422454834, + "p99": 453.7599980831146 }, "isolatedSum": { - "p50": 121.31199985742569, - "p90": 124.32000041007996, - "p95": 125.63199922442436, - "p99": 135.6480047106743 + "p50": 181.21600151062012, + "p90": 659.8399877548218, + "p95": 732.3840260505676, + "p99": 767.0400142669678 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1740800, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 6, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -919,35 +922,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 57.18399956822395, - "p90": 59.13599953055382, - "p95": 60.92799827456474, - "p99": 75.55200159549713 + "p50": 98.62399846315384, + "p90": 352.9280126094818, + "p95": 377.9839873313904, + "p99": 399.9359905719757 }, "combine": { - "p50": 66.0799965262413, - "p90": 66.65600091218948, - "p95": 67.52000004053116, - "p99": 81.02399855852127 + "p50": 84.03199911117554, + "p90": 320.25599479675293, + "p95": 355.19999265670776, + "p99": 375.328004360199 }, "roundtrip": { - "p50": 105.43999820947647, - "p90": 107.96800255775452, - "p95": 109.98400300741196, - "p99": 118.04799735546112 + "p50": 155.4879993200302, + "p90": 413.2800102233887, + "p95": 442.7199959754944, + "p99": 462.72000670433044 }, "isolatedSum": { - "p50": 123.26399609446526, - "p90": 125.7920004427433, - "p95": 128.4479983150959, - "p99": 156.5760001540184 + "p50": 182.65599757432938, + "p90": 673.1840074062347, + "p95": 733.1839799880981, + "p99": 775.2639949321747 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3471360, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 4, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -956,35 +959,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 58.36800113320351, - "p90": 60.47999858856201, - "p95": 62.272001057863235, - "p99": 68.09599697589874 + "p50": 100.3199964761734, + "p90": 354.0799915790558, + "p95": 384.67198610305786, + "p99": 400.5439877510071 }, "combine": { - "p50": 66.14399701356888, - "p90": 66.84800237417221, - "p95": 67.4239993095398, - "p99": 76.76800340414047 + "p50": 85.05599945783615, + "p90": 329.9520015716553, + "p95": 359.2959940433502, + "p99": 377.56800651550293 }, "roundtrip": { - "p50": 106.36799782514572, - "p90": 108.67200046777725, - "p95": 110.97600311040878, - "p99": 117.76000261306763 + "p50": 158.87999534606934, + "p90": 417.05599427223206, + "p95": 443.1680142879486, + "p99": 461.60000562667847 }, "isolatedSum": { - "p50": 124.51199814677238, - "p90": 127.32800096273422, - "p95": 129.69600036740303, - "p99": 144.86400038003922 + "p50": 185.37599593400955, + "p90": 684.0319931507111, + "p95": 743.9679801464081, + "p99": 778.11199426651 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6912000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 6, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -993,35 +996,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 74.5600014925003, - "p90": 76.4160007238388, - "p95": 77.08799839019775, - "p99": 81.85599744319916 + "p50": 102.55999863147736, + "p90": 340.4160141944885, + "p95": 378.6880075931549, + "p99": 407.039999961853 }, "combine": { - "p50": 67.32799857854843, - "p90": 71.45600020885468, - "p95": 77.2159993648529, - "p99": 90.01599997282028 + "p50": 88.95999938249588, + "p90": 323.64800572395325, + "p95": 355.52000999450684, + "p99": 379.5520067214966 }, "roundtrip": { - "p50": 119.32799965143204, - "p90": 125.2480000257492, - "p95": 126.17599964141846, - "p99": 128.9599984884262 + "p50": 164.2560064792633, + "p90": 405.7280123233795, + "p95": 442.49600172042847, + "p99": 465.31200408935547 }, "isolatedSum": { - "p50": 141.88800007104874, - "p90": 147.87200093269348, - "p95": 154.30399775505066, - "p99": 171.87199741601944 + "p50": 191.51999801397324, + "p90": 664.0640199184418, + "p95": 734.2080175876617, + "p99": 786.5920066833496 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13977600, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 6, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1030,35 +1033,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 71.42399996519089, - "p90": 78.46400141716003, - "p95": 79.9039974808693, - "p99": 101.79200023412704 + "p50": 111.7440015077591, + "p90": 345.7280099391937, + "p95": 365.82401394844055, + "p99": 416.703999042511 }, "combine": { - "p50": 80.06399869918823, - "p90": 83.16799998283386, - "p95": 89.6959975361824, - "p99": 93.44000369310379 + "p50": 103.5199984908104, + "p90": 342.24000573158264, + "p95": 370.7840144634247, + "p99": 391.2639915943146 }, "roundtrip": { - "p50": 147.2640037536621, - "p90": 150.11200308799744, - "p95": 151.58399939537048, - "p99": 160.3199988603592 + "p50": 185.18400192260742, + "p90": 200.6399929523468, + "p95": 429.56799268722534, + "p99": 484.3519926071167 }, "isolatedSum": { - "p50": 151.48799866437912, - "p90": 161.6320013999939, - "p95": 169.5999950170517, - "p99": 195.23200392723083 + "p50": 215.2639999985695, + "p90": 687.9680156707764, + "p95": 736.6080284118652, + "p99": 807.9679906368256 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27975680, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 4, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1067,35 +1070,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 90.84799885749817, - "p90": 98.39999675750732, - "p95": 100.35199671983719, - "p99": 104.73600029945374 + "p50": 126.71999633312225, + "p90": 375.2000033855438, + "p95": 402.3039937019348, + "p99": 428.8960099220276 }, "combine": { - "p50": 102.9760017991066, - "p90": 103.93600165843964, - "p95": 104.67199981212616, - "p99": 114.62400108575821 + "p50": 123.26399981975555, + "p90": 355.16801476478577, + "p95": 398.5599875450134, + "p99": 417.85600781440735 }, "roundtrip": { - "p50": 170.01600563526154, - "p90": 178.20799350738525, - "p95": 180.09600043296814, - "p99": 193.31200420856476 + "p50": 216.60800278186798, + "p90": 237.31200397014618, + "p95": 501.8240213394165, + "p99": 520.2879905700684 }, "isolatedSum": { - "p50": 193.82400065660477, - "p90": 202.33599841594696, - "p95": 205.02399653196335, - "p99": 219.36000138521194 + "p50": 249.9839961528778, + "p90": 730.3680181503296, + "p95": 800.8639812469482, + "p99": 846.7520177364349 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 6, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1103,16 +1106,16 @@ ] }, { - "id": "cx-24853ec9", - "identity": "b300|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_c9569580", - "comparisonKey": "862206160efb203e", + "id": "cx-e8db863c", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||00df46ebb2988d7", + "colorKey": "gb200_f1783455", + "comparisonKey": "ef1adb0bc917ca19", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:44.096050+00:00", + "generatedAt": "2026-06-29T13:47:21.708482+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -1120,14 +1123,14 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "MiniMax-M3", + "label": "GB200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -1136,14 +1139,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -1151,59 +1155,205 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "00df46ebb2988d7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 90.30400216579437, + "p90": 100.12800246477127, + "p95": 105.53599894046783, + "p99": 116.92799627780914 + }, + "combine": { + "p50": 82.2720006108284, + "p90": 89.47200328111649, + "p95": 91.58399701118469, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 149.1519957780838, + "p90": 159.67999398708344, + "p95": 162.1440052986145, + "p99": 169.37600076198578 + }, + "isolatedSum": { + "p50": 172.57600277662277, + "p90": 189.60000574588776, + "p95": 197.11999595165253, + "p99": 212.70399540662766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.45599865913391, + "p90": 113.40799927711487, + "p95": 117.63200163841248, + "p99": 125.5359947681427 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 108.41599851846695, + "p95": 110.6560006737709, + "p99": 117.69600212574005 + }, + "roundtrip": { + "p50": 185.34399569034576, + "p90": 192.60799884796143, + "p95": 196.0960030555725, + "p99": 200.6720006465912 + }, + "isolatedSum": { + "p50": 212.41600066423416, + "p90": 221.82399779558182, + "p95": 228.28800231218338, + "p99": 243.23199689388275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-814e92ce", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_f1783455", + "comparisonKey": "ef1adb0bc917ca19", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:54:29.380812+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287497246", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287497246", - "createdAt": "2026-06-27T11:13:44.096050+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.440001517534256, - "p90": 59.29600074887276, - "p95": 61.28000095486641, - "p99": 64.41599875688553 + "p50": 94.04800087213516, + "p90": 105.95200210809708, + "p95": 109.98400300741196, + "p99": 118.367999792099 }, "combine": { - "p50": 65.8240020275116, - "p90": 67.07199662923813, - "p95": 67.19999760389328, - "p99": 77.47200131416321 + "p50": 82.97599852085114, + "p90": 87.99999952316284, + "p95": 93.47199648618698, + "p99": 96.3200032711029 }, "roundtrip": { - "p50": 108.25599730014801, - "p90": 113.3119985461235, - "p95": 114.30399864912033, - "p99": 123.71200323104858 + "p50": 152.0320028066635, + "p90": 162.6559942960739, + "p95": 165.43999314308167, + "p99": 176.41599476337433 }, "isolatedSum": { - "p50": 123.26400354504585, - "p90": 126.36799737811089, - "p95": 128.4799985587597, - "p99": 141.88800007104874 + "p50": 177.0239993929863, + "p90": 193.95200163125992, + "p95": 203.45599949359894, + "p99": 214.6880030632019 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1212,35 +1362,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.66399949789047, - "p90": 60.28800085186958, - "p95": 62.94400244951248, - "p99": 71.1359977722168 + "p50": 93.47199648618698, + "p90": 106.20799660682678, + "p95": 110.04800349473953, + "p99": 116.5120005607605 }, "combine": { - "p50": 66.23999774456024, - "p90": 67.16799736022949, - "p95": 67.26399809122086, - "p99": 69.63200122117996 + "p50": 83.45600217580795, + "p90": 87.93599903583527, + "p95": 93.37600320577621, + "p99": 96.25600278377533 }, "roundtrip": { - "p50": 107.4879989027977, - "p90": 113.15199732780457, - "p95": 114.17599767446518, - "p99": 118.9119964838028 + "p50": 152.92799472808838, + "p90": 163.4880006313324, + "p95": 166.4319932460785, + "p99": 173.47200214862823 }, "isolatedSum": { - "p50": 123.90399724245071, - "p90": 127.45599821209908, - "p95": 130.20800054073334, - "p99": 140.76799899339676 + "p50": 176.92799866199493, + "p90": 194.14399564266205, + "p95": 203.42400670051575, + "p99": 212.76800334453583 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1249,35 +1399,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 59.328000992536545, - "p90": 62.17600032687187, - "p95": 65.05600363016129, - "p99": 70.01599669456482 + "p50": 94.55999732017517, + "p90": 107.10400342941284, + "p95": 113.08799684047699, + "p99": 121.08799815177917 }, "combine": { - "p50": 66.81600213050842, - "p90": 68.12799721956253, - "p95": 69.11999732255936, - "p99": 77.27999985218048 + "p50": 84.32000130414963, + "p90": 91.90399944782257, + "p95": 94.81599926948547, + "p99": 99.67999905347824 }, "roundtrip": { - "p50": 108.57599973678589, - "p90": 115.58400094509125, - "p95": 118.1119978427887, - "p99": 128.76799702644348 + "p50": 156.38400614261627, + "p90": 168.09600591659546, + "p95": 171.51999473571777, + "p99": 181.08800053596497 }, "isolatedSum": { - "p50": 126.14400312304497, - "p90": 130.3039975464344, - "p95": 134.17600095272064, - "p99": 147.2959965467453 + "p50": 178.8799986243248, + "p90": 199.0080028772354, + "p95": 207.90399610996246, + "p99": 220.76799720525742 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1286,35 +1436,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.776000678539276, - "p90": 62.111999839544296, - "p95": 63.90400230884552, - "p99": 70.14399766921997 + "p50": 95.83999961614609, + "p90": 107.64800012111664, + "p95": 111.7440015077591, + "p99": 123.23199957609177 }, "combine": { - "p50": 67.87200272083282, - "p90": 76.19199901819229, - "p95": 77.18399912118912, - "p99": 79.55200225114822 + "p50": 85.21600067615509, + "p90": 93.63199770450592, + "p95": 95.48799693584442, + "p99": 106.23999685049057 }, "roundtrip": { - "p50": 116.31999909877777, - "p90": 122.43200093507767, - "p95": 124.60800260305405, - "p99": 131.77600502967834 + "p50": 158.2079976797104, + "p90": 169.08800601959229, + "p95": 171.9679981470108, + "p99": 177.44000256061554 }, "isolatedSum": { - "p50": 127.6480033993721, - "p90": 138.3039988577366, - "p95": 141.08800143003464, - "p99": 149.6959999203682 + "p50": 181.05600029230118, + "p90": 201.27999782562256, + "p95": 207.23199844360352, + "p99": 229.47199642658234 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 5, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1323,35 +1473,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 64.19199705123901, - "p90": 69.34399902820587, - "p95": 71.80800288915634, - "p99": 79.52000200748444 + "p50": 97.88800030946732, + "p90": 109.69600081443787, + "p95": 114.84800279140472, + "p99": 124.1919994354248 }, "combine": { - "p50": 68.4799998998642, - "p90": 76.48000121116638, - "p95": 76.9599974155426, - "p99": 79.0719985961914 + "p50": 86.40000224113464, + "p90": 95.0080007314682, + "p95": 97.120001912117, + "p99": 104.06400263309479 }, "roundtrip": { - "p50": 121.5360015630722, - "p90": 126.52799487113953, - "p95": 127.3919939994812, - "p99": 137.9839926958084 + "p50": 161.53599321842194, + "p90": 172.54400253295898, + "p95": 177.5359958410263, + "p99": 186.68800592422485 }, "isolatedSum": { - "p50": 132.6719969511032, - "p90": 145.82400023937225, - "p95": 148.76800030469894, - "p99": 158.59200060367584 + "p50": 184.28800255060196, + "p90": 204.70400154590607, + "p95": 211.96800470352173, + "p99": 228.2560020685196 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 0, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1360,35 +1510,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 73.53600114583969, - "p90": 77.18399912118912, - "p95": 78.14399898052216, - "p99": 88.128000497818 + "p50": 101.21600329875946, + "p90": 111.48799955844879, + "p95": 115.1999980211258, + "p99": 120.41600048542023 }, "combine": { - "p50": 77.504001557827, - "p90": 79.19999957084656, - "p95": 79.45600152015686, - "p99": 80.25600016117096 + "p50": 94.68799829483032, + "p90": 99.32799637317657, + "p95": 105.6319996714592, + "p99": 109.3439981341362 }, "roundtrip": { - "p50": 123.64800274372101, - "p90": 128.38399410247803, - "p95": 131.1360001564026, - "p99": 140.4159963130951 + "p50": 166.24000668525696, + "p90": 175.52000284194946, + "p95": 178.14399302005768, + "p99": 185.2799952030182 }, "isolatedSum": { - "p50": 151.0400027036667, - "p90": 156.38399869203568, - "p95": 157.60000050067902, - "p99": 168.38400065898895 + "p50": 195.90400159358978, + "p90": 210.81599593162537, + "p95": 220.831997692585, + "p99": 229.75999861955643 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1397,35 +1547,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 88.28800171613693, - "p90": 90.91199934482574, - "p95": 91.90399944782257, - "p99": 103.90400141477585 + "p50": 113.40799927711487, + "p90": 123.16799908876419, + "p95": 126.5919953584671, + "p99": 138.2720023393631 }, "combine": { - "p50": 90.91199934482574, - "p90": 91.93599969148636, - "p95": 92.47999638319016, - "p99": 103.2319962978363 + "p50": 108.44799876213074, + "p90": 115.84000289440155, + "p95": 118.72000247240067, + "p99": 121.88799679279327 }, "roundtrip": { - "p50": 147.96799421310425, - "p90": 153.18399667739868, - "p95": 155.4879993200302, - "p99": 161.69600188732147 + "p50": 191.83999300003052, + "p90": 200.8640021085739, + "p95": 203.5519927740097, + "p99": 210.52800118923187 }, "isolatedSum": { - "p50": 179.20000106096268, - "p90": 182.8479990363121, - "p95": 184.38399583101273, - "p99": 207.13599771261215 + "p50": 221.8559980392456, + "p90": 239.00800198316574, + "p95": 245.31199783086777, + "p99": 260.1599991321564 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1434,35 +1584,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 100.00000149011612, - "p90": 102.68799960613251, - "p95": 103.4879982471466, - "p99": 112.86400258541107 + "p50": 126.14400684833527, + "p90": 136.1279934644699, + "p95": 139.67999815940857, + "p99": 150.84800124168396 }, "combine": { - "p50": 105.82400113344193, - "p90": 114.46399986743927, - "p95": 115.03999680280685, - "p99": 118.23999881744385 + "p50": 128.12800705432892, + "p90": 134.11200046539307, + "p95": 135.903999209404, + "p99": 142.11200177669525 }, "roundtrip": { - "p50": 185.82400679588318, - "p90": 190.14400243759155, - "p95": 191.00800156593323, - "p99": 196.8960016965866 + "p50": 226.623997092247, + "p90": 235.6799989938736, + "p95": 238.304004073143, + "p99": 243.83999407291412 }, "isolatedSum": { - "p50": 205.82400262355804, - "p90": 217.15199947357178, - "p95": 218.52799504995346, - "p99": 231.10400140285492 + "p50": 254.27201390266418, + "p90": 270.239993929863, + "p95": 275.58399736881256, + "p99": 292.9600030183792 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1470,16 +1620,16 @@ ] }, { - "id": "cx-c0dba141", - "identity": "b300|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_307ed708", - "comparisonKey": "62d01cd02a49457a", + "id": "cx-50b58ea2", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "61b647515928837c", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:49.194497+00:00", + "generatedAt": "2026-06-29T13:49:03.469405+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -1487,14 +1637,14 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "MiniMax-M3", + "label": "GB200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -1503,14 +1653,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -1518,59 +1669,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285721110", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285721110", - "createdAt": "2026-06-27T09:52:49.194497+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.41600117087364, - "p90": 58.848001062870026, - "p95": 63.07200342416763, - "p99": 79.29600030183792 + "p50": 91.67999774217606, + "p90": 103.07200253009796, + "p95": 107.51999914646149, + "p99": 116.86400324106216 }, "combine": { - "p50": 65.24799764156342, - "p90": 66.14399701356888, - "p95": 66.3359984755516, - "p99": 68.96000355482101 + "p50": 81.53600245714188, + "p90": 85.88799834251404, + "p95": 90.55999666452408, + "p99": 95.16800194978714 }, "roundtrip": { - "p50": 104.76800054311752, - "p90": 111.35999858379364, - "p95": 112.09599673748016, - "p99": 115.7120019197464 + "p50": 151.45599842071533, + "p90": 160.89600324630737, + "p95": 164.2879992723465, + "p99": 175.26400089263916 }, "isolatedSum": { - "p50": 121.66399881243706, - "p90": 124.9919980764389, - "p95": 129.40800189971924, - "p99": 148.25600385665894 + "p50": 173.21600019931793, + "p90": 188.960000872612, + "p95": 198.07999581098557, + "p99": 212.0320051908493 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1579,35 +1730,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 55.87200075387955, - "p90": 57.53599852323532, - "p95": 58.94400179386139, - "p99": 66.91200286149979 + "p50": 90.52799642086029, + "p90": 102.7199998497963, + "p95": 106.39999806880951, + "p99": 115.58400094509125 }, "combine": { - "p50": 65.69600105285645, - "p90": 66.3359984755516, - "p95": 66.68800115585327, - "p99": 78.20799946784973 + "p50": 81.66400343179703, + "p90": 88.67199718952179, + "p95": 90.52799642086029, + "p99": 94.08000111579895 }, "roundtrip": { - "p50": 105.56799918413162, - "p90": 110.23999750614166, - "p95": 111.13599687814713, - "p99": 129.5360028743744 + "p50": 151.39199793338776, + "p90": 162.9440039396286, + "p95": 166.4319932460785, + "p99": 171.74400389194489 }, "isolatedSum": { - "p50": 121.56800180673599, - "p90": 123.87199699878693, - "p95": 125.63200294971466, - "p99": 145.12000232934952 + "p50": 172.19199985265732, + "p90": 191.39199703931808, + "p95": 196.9279944896698, + "p99": 209.6640020608902 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, "fanoutMean": 5.375, "recvTokensMax": 13, - "stragglerRank": 4, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1616,35 +1767,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 56.73599988222122, - "p90": 58.36800113320351, - "p95": 59.42400172352791, - "p99": 68.4799998998642 + "p50": 92.44800359010696, + "p90": 104.12800312042236, + "p95": 109.15199667215347, + "p99": 117.40799993276596 }, "combine": { - "p50": 66.14399701356888, - "p90": 67.16799736022949, - "p95": 68.15999746322632, - "p99": 78.17599922418594 + "p50": 82.30400085449219, + "p90": 89.79199826717377, + "p95": 91.77599847316742, + "p99": 96.41599655151367 }, "roundtrip": { - "p50": 106.59199953079224, - "p90": 109.43999886512756, - "p95": 111.84000223875046, - "p99": 120.7680031657219 + "p50": 153.43999862670898, + "p90": 163.61600160598755, + "p95": 167.90400445461273, + "p99": 173.0560064315796 }, "isolatedSum": { - "p50": 122.8799968957901, - "p90": 125.535998493433, - "p95": 127.58399918675423, - "p99": 146.65599912405014 + "p50": 174.75200444459915, + "p90": 193.92000138759613, + "p95": 200.9279951453209, + "p99": 213.82399648427963 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, "recvTokensMax": 29, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1653,35 +1804,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 57.631999254226685, - "p90": 60.19200012087822, - "p95": 61.59999966621399, - "p99": 72.83200323581696 + "p50": 93.9520001411438, + "p90": 104.09600287675858, + "p95": 107.77600109577179, + "p99": 117.72800236940384 }, "combine": { - "p50": 66.78400188684464, - "p90": 68.31999868154526, - "p95": 69.50400024652481, - "p99": 77.82399654388428 + "p50": 85.28000116348267, + "p90": 93.56799721717834, + "p95": 96.73599898815155, + "p99": 167.67999529838562 }, "roundtrip": { - "p50": 115.68000167608261, - "p90": 122.11199849843979, - "p95": 123.03999811410904, - "p99": 135.04000008106232 + "p50": 159.7760021686554, + "p90": 171.07200622558594, + "p95": 177.08800733089447, + "p99": 250.46399235725403 }, "isolatedSum": { - "p50": 124.41600114107132, - "p90": 128.51199880242348, - "p95": 131.1039999127388, - "p99": 150.65599977970123 + "p50": 179.23200130462646, + "p90": 197.66400009393692, + "p95": 204.51200008392334, + "p99": 285.40799766778946 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1690,35 +1841,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 58.94400179386139, - "p90": 64.89600241184235, - "p95": 65.88800251483917, - "p99": 72.4480003118515 + "p50": 94.94400024414062, + "p90": 105.3759977221489, + "p95": 108.31999778747559, + "p99": 118.14399808645248 }, "combine": { - "p50": 67.64800101518631, - "p90": 76.25599950551987, - "p95": 76.92799717187881, - "p99": 78.43200117349625 + "p50": 88.92799913883209, + "p90": 93.34400296211243, + "p95": 94.71999853849411, + "p99": 101.6639992594719 }, "roundtrip": { - "p50": 121.56800180673599, - "p90": 124.35200065374374, - "p95": 125.40799379348755, - "p99": 136.73600554466248 + "p50": 158.75199437141418, + "p90": 168.38400065898895, + "p95": 171.55200242996216, + "p99": 180.1919937133789 }, "isolatedSum": { - "p50": 126.5920028090477, - "p90": 141.1520019173622, - "p95": 142.815999686718, - "p99": 150.88000148534775 + "p50": 183.87199938297272, + "p90": 198.72000068426132, + "p95": 203.0399963259697, + "p99": 219.80799734592438 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, "recvTokensMax": 92, - "stragglerRank": 4, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1727,35 +1878,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 68.54400038719177, - "p90": 75.42400062084198, - "p95": 75.9039968252182, - "p99": 78.015998005867 + "p50": 99.61599856615067, + "p90": 110.62400043010712, + "p95": 113.6000007390976, + "p99": 134.36800241470337 }, "combine": { - "p50": 77.56800204515457, - "p90": 78.36800068616867, - "p95": 78.52800190448761, - "p99": 80.70400357246399 + "p50": 93.56799721717834, + "p90": 100.832000374794, + "p95": 103.42399775981903, + "p99": 107.16799646615982 }, "roundtrip": { - "p50": 125.34399330615997, - "p90": 131.84000551700592, - "p95": 133.53599607944489, - "p99": 144.22400295734406 + "p50": 164.86400365829468, + "p90": 173.7920045852661, + "p95": 177.3120015859604, + "p99": 184.38400328159332 }, "isolatedSum": { - "p50": 146.11200243234634, - "p90": 153.79200130701065, - "p95": 154.4319987297058, - "p99": 158.720001578331 + "p50": 193.183995783329, + "p90": 211.45600080490112, + "p95": 217.02399849891663, + "p99": 241.5359988808632 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1764,35 +1915,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 85.91999858617783, - "p90": 88.79999816417694, - "p95": 89.56799656152725, - "p99": 97.69599884748459 + "p50": 109.92000252008438, + "p90": 118.9119964838028, + "p95": 122.20799922943115, + "p99": 127.6479959487915 }, "combine": { - "p50": 90.2400016784668, - "p90": 92.28800237178802, - "p95": 94.97600048780441, - "p99": 102.36799716949463 + "p50": 106.62399977445602, + "p90": 114.84800279140472, + "p95": 116.67200177907944, + "p99": 119.35999989509583 }, "roundtrip": { - "p50": 149.79200065135956, - "p90": 162.81600296497345, - "p95": 167.4560010433197, - "p99": 173.66400361061096 + "p50": 189.18399512767792, + "p90": 196.57599925994873, + "p95": 199.71199333667755, + "p99": 204.352006316185 }, "isolatedSum": { - "p50": 176.16000026464462, - "p90": 181.08800053596497, - "p95": 184.54399704933167, - "p99": 200.06399601697922 + "p50": 216.5440022945404, + "p90": 233.75999927520752, + "p95": 238.8800010085106, + "p99": 247.00799584388733 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, "recvTokensMax": 367, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1801,35 +1952,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 98.7199991941452, - "p90": 101.21600329875946, - "p95": 102.14400291442871, - "p99": 113.24799805879593 + "p50": 124.79999661445618, + "p90": 133.7919980287552, + "p95": 137.472003698349, + "p99": 143.48800480365753 }, "combine": { - "p50": 105.56799918413162, - "p90": 113.98400366306305, - "p95": 114.49600011110306, - "p99": 114.94400352239609 + "p50": 128.12800705432892, + "p90": 131.8719983100891, + "p95": 133.40799510478973, + "p99": 141.56800508499146 }, "roundtrip": { - "p50": 184.4159960746765, - "p90": 188.92799317836761, - "p95": 190.08000195026398, - "p99": 197.24799692630768 + "p50": 224.60800409317017, + "p90": 233.21600258350372, + "p95": 236.28799617290497, + "p99": 245.56800723075867 }, "isolatedSum": { - "p50": 204.28799837827682, - "p90": 215.2000069618225, - "p95": 216.64000302553177, - "p99": 228.19200158119202 + "p50": 252.9280036687851, + "p90": 265.6639963388443, + "p95": 270.87999880313873, + "p99": 285.056009888649 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -1837,47 +1988,48 @@ ] }, { - "id": "cx-3f6620d0", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "11fb97077712804e", + "id": "cx-ae831441", + "identity": "gb200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "57634c99a1c8a12a", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:34.883169+00:00", + "generatedAt": "2026-06-29T13:58:16.852678+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_05", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "kernelGeneration": "v1", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -1885,352 +2037,281 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272154473", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272154473", - "createdAt": "2026-06-27T00:06:34.883169+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.40800127387047, - "p90": 59.26400050520897, - "p95": 61.055999249219894, - "p99": 69.66400146484375 + "p50": 97.120001912117, + "p90": 331.39199018478394, + "p95": 372.4159896373749, + "p99": 393.3759927749634 }, "combine": { - "p50": 66.30399823188782, - "p90": 67.32799857854843, - "p95": 68.25599819421768, - "p99": 77.02399790287018 + "p50": 82.97599852085114, + "p90": 318.33600997924805, + "p95": 356.7039966583252, + "p99": 384.8640024662018 }, "roundtrip": { - "p50": 106.88000172376633, - "p90": 111.35999858379364, - "p95": 112.96000331640244, - "p99": 129.31199371814728 + "p50": 155.29599785804749, + "p90": 406.9119989871979, + "p95": 436.2559914588928, + "p99": 462.5599980354309 }, "isolatedSum": { - "p50": 123.71199950575829, - "p90": 126.5919990837574, - "p95": 129.31199744343758, - "p99": 146.68799936771393 + "p50": 180.09600043296814, + "p90": 649.728000164032, + "p95": 729.1199862957001, + "p99": 778.2399952411652 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.14400225877762, + "p90": 350.6239950656891, + "p95": 377.344012260437, + "p99": 396.38400077819824 + }, + "combine": { + "p50": 84.32000130414963, + "p90": 326.07999444007874, + "p95": 356.1600148677826, + "p99": 369.7279989719391 + }, + "roundtrip": { + "p50": 158.62399339675903, + "p90": 409.5039963722229, + "p95": 440.8000111579895, + "p99": 456.2560021877289 + }, + "isolatedSum": { + "p50": 182.46400356292725, + "p90": 676.7039895057678, + "p95": 733.5040271282196, + "p99": 766.1119997501373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 58.33600088953972, - "p90": 60.67200005054474, - "p95": 62.68800050020218, - "p99": 68.15999746322632 + "p50": 97.31200337409973, + "p90": 139.1039937734604, + "p95": 365.4080033302307, + "p99": 385.343998670578 }, "combine": { - "p50": 67.84000247716904, - "p90": 77.2159993648529, - "p95": 77.88799703121185, - "p99": 78.75200361013412 + "p50": 84.06399935483932, + "p90": 320.8320140838623, + "p95": 356.00000619888306, + "p99": 368.1280016899109 }, "roundtrip": { - "p50": 121.88799679279327, - "p90": 125.05599856376648, - "p95": 126.08000636100769, - "p99": 136.99199259281158 + "p50": 158.2079976797104, + "p90": 407.039999961853, + "p95": 437.5999867916107, + "p99": 456.35199546813965 }, "isolatedSum": { - "p50": 126.17600336670876, - "p90": 137.88799941539764, - "p95": 140.57599753141403, - "p99": 146.91200107336044 + "p50": 181.37600272893906, + "p90": 459.9360078573227, + "p95": 721.4080095291138, + "p99": 753.4720003604889 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 69.85600292682648, - "p90": 74.27199929952621, - "p95": 75.3600001335144, - "p99": 82.97599852085114 + "p50": 100.8640006184578, + "p90": 338.75200152397156, + "p95": 381.5680146217346, + "p99": 394.8479890823364 }, "combine": { - "p50": 78.52800190448761, - "p90": 79.19999957084656, - "p95": 79.99999821186066, - "p99": 82.8159973025322 + "p50": 86.11200004816055, + "p90": 317.24798679351807, + "p95": 355.9359908103943, + "p99": 368.80001425743103 }, "roundtrip": { - "p50": 131.3599944114685, - "p90": 135.903999209404, - "p95": 136.76799833774567, - "p99": 147.5519984960556 + "p50": 162.4000072479248, + "p90": 406.3360095024109, + "p95": 436.67200207710266, + "p99": 459.74400639533997 }, "isolatedSum": { - "p50": 148.3840048313141, - "p90": 153.47199887037277, - "p95": 155.35999834537506, - "p99": 165.79199582338333 + "p50": 186.97600066661835, + "p90": 655.9999883174896, + "p95": 737.5040054321289, + "p99": 763.6480033397675 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 94.24000233411789, - "p90": 96.79999947547913, - "p95": 99.39199686050415, - "p99": 103.74400019645691 + "p50": 101.02400183677673, + "p90": 337.3439908027649, + "p95": 375.2320110797882, + "p99": 404.63998913764954 }, "combine": { - "p50": 115.35999923944473, - "p90": 116.12799763679504, - "p95": 116.73600226640701, - "p99": 127.29600071907043 + "p50": 88.67199718952179, + "p90": 324.19198751449585, + "p95": 362.7519905567169, + "p99": 391.84001088142395 }, "roundtrip": { - "p50": 193.4400051832199, - "p90": 198.91199469566345, - "p95": 199.71199333667755, - "p99": 208.3200067281723 + "p50": 165.53600132465363, + "p90": 409.6960127353668, + "p95": 438.4959936141968, + "p99": 458.43198895454407 }, "isolatedSum": { - "p50": 209.60000157356262, - "p90": 212.92799711227417, - "p95": 216.12799912691116, - "p99": 231.04000091552734 + "p50": 189.69599902629852, + "p90": 661.5359783172607, + "p95": 737.9840016365051, + "p99": 796.4800000190735 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-854f00de", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "afbd085a57d290fd", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:27.937449+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_17", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271865772", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271865772", - "createdAt": "2026-06-26T23:57:27.937449+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 55.58399856090546, - "p90": 57.40800127387047, - "p95": 59.13599953055382, - "p99": 65.63200056552887 - }, - "combine": { - "p50": 66.14399701356888, - "p90": 67.55200028419495, - "p95": 68.38399916887283, - "p99": 77.2159993648529 - }, - "roundtrip": { - "p50": 105.18400371074677, - "p90": 111.29599809646606, - "p95": 113.50400000810623, - "p99": 132.1280002593994 - }, - "isolatedSum": { - "p50": 121.72799557447433, - "p90": 124.96000155806541, - "p95": 127.51999869942665, - "p99": 142.84799993038177 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 58.400001376867294, - "p90": 60.99199876189232, - "p95": 62.880001962184906, - "p99": 73.05599749088287 + "p50": 104.5759990811348, + "p90": 338.6879861354828, + "p95": 372.6080060005188, + "p99": 395.9999978542328 }, "combine": { - "p50": 67.29599833488464, - "p90": 77.15199887752533, - "p95": 77.72800326347351, - "p99": 79.64800298213959 + "p50": 95.29600292444229, + "p90": 328.0639946460724, + "p95": 342.272013425827, + "p99": 376.3839900493622 }, "roundtrip": { - "p50": 117.95199662446976, - "p90": 122.72000312805176, - "p95": 123.9359974861145, - "p99": 138.46400380134583 + "p50": 167.67999529838562, + "p90": 404.4159948825836, + "p95": 431.0399889945984, + "p99": 457.88800716400146 }, "isolatedSum": { - "p50": 125.69599971175194, - "p90": 138.14399763941765, - "p95": 140.60800522565842, - "p99": 152.70400047302246 + "p50": 199.8720020055771, + "p90": 666.7519807815552, + "p95": 714.8800194263458, + "p99": 772.383987903595 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 67.9360032081604, - "p90": 71.16799801588058, - "p95": 73.72800260782242, - "p99": 86.5280032157898 + "p50": 112.96000331640244, + "p90": 132.22399353981018, + "p95": 365.4080033302307, + "p99": 413.4080111980438 }, "combine": { - "p50": 77.95199751853943, - "p90": 79.19999957084656, - "p95": 80.06399869918823, - "p99": 83.8719978928566 + "p50": 108.47999900579453, + "p90": 119.1679984331131, + "p95": 342.78398752212524, + "p99": 383.07198882102966 }, "roundtrip": { - "p50": 128.7039965391159, - "p90": 131.1360001564026, - "p95": 132.76800513267517, - "p99": 140.6400054693222 + "p50": 194.815993309021, + "p90": 447.80799746513367, + "p95": 469.2800045013428, + "p99": 492.576003074646 }, "isolatedSum": { - "p50": 145.88800072669983, - "p90": 150.36799758672714, - "p95": 153.79200130701065, - "p99": 170.4000011086464 + "p50": 221.44000232219696, + "p90": 251.39199197292328, + "p95": 708.191990852356, + "p99": 796.4800000190735 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2239,35 +2320,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 92.70399808883667, - "p90": 97.63199836015701, - "p95": 99.71199929714203, - "p99": 135.42400300502777 + "p50": 128.7360042333603, + "p90": 375.0079870223999, + "p95": 405.5359959602356, + "p99": 426.2399971485138 }, "combine": { - "p50": 114.78400230407715, - "p90": 116.70400202274323, - "p95": 118.97599697113037, - "p99": 164.0319973230362 + "p50": 128.89599800109863, + "p90": 378.33601236343384, + "p95": 401.91999077796936, + "p99": 419.0079867839813 }, "roundtrip": { - "p50": 190.62399864196777, - "p90": 196.60800695419312, - "p95": 197.66399264335632, - "p99": 203.99999618530273 + "p50": 227.90400683879852, + "p90": 245.53599953651428, + "p95": 489.24800753593445, + "p99": 524.1919755935669 }, "isolatedSum": { - "p50": 207.48800039291382, - "p90": 214.33600038290024, - "p95": 218.6879962682724, - "p99": 299.45600032806396 + "p50": 257.6320022344589, + "p90": 753.3439993858337, + "p95": 807.455986738205, + "p99": 845.2479839324951 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2275,16 +2356,16 @@ ] }, { - "id": "cx-bbb0479e", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_c9569580", - "comparisonKey": "c777627e39152404", + "id": "cx-1789a31a", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||af0b2d2a9119979", + "colorKey": "gb200_62dbe147", + "comparisonKey": "3fdd98e0ac017897", "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:04.332610+00:00", + "generatedAt": "2026-06-29T13:55:53.501138+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_04", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -2292,30 +2373,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "GB200 EP8 · deepep · bf16 · balanced", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -2323,133 +2405,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "af0b2d2a9119979", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28286434915", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286434915", - "createdAt": "2026-06-27T10:26:04.332610+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.86400085687637, - "p90": 59.51999872922897, - "p95": 61.824001371860504, - "p99": 71.32799923419952 - }, - "combine": { - "p50": 67.64800101518631, - "p90": 69.72800195217133, - "p95": 71.07199728488922, - "p99": 90.55999666452408 - }, - "roundtrip": { - "p50": 109.21599715948105, - "p90": 114.656001329422, - "p95": 115.87200313806534, - "p99": 121.08799815177917 - }, - "isolatedSum": { - "p50": 124.51200187206268, - "p90": 129.2480006814003, - "p95": 132.89599865674973, - "p99": 161.8879958987236 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 57.95200169086456, - "p90": 60.83200126886368, - "p95": 64.70400094985962, - "p99": 79.6160027384758 - }, - "combine": { - "p50": 68.00000369548798, - "p90": 69.40799951553345, - "p95": 70.30399888753891, - "p99": 80.70400357246399 - }, - "roundtrip": { - "p50": 108.89600217342377, - "p90": 112.47999966144562, - "p95": 117.47200042009354, - "p99": 286.20800375938416 - }, - "isolatedSum": { - "p50": 125.95200538635254, - "p90": 130.24000078439713, - "p95": 135.00799983739853, - "p99": 160.3200063109398 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 59.90400165319443, - "p90": 63.87200206518173, - "p95": 66.880002617836, - "p99": 74.72000271081924 + "p50": 90.81599861383438, + "p90": 104.22399640083313, + "p95": 107.77600109577179, + "p99": 122.14399874210358 }, "combine": { - "p50": 69.2799985408783, - "p90": 78.46400141716003, - "p95": 78.87999713420868, - "p99": 91.26400202512741 + "p50": 82.68799632787704, + "p90": 90.94399958848953, + "p95": 93.79199892282486, + "p99": 96.8639999628067 }, "roundtrip": { - "p50": 123.58400225639343, - "p90": 127.77599692344666, - "p95": 128.9920061826706, - "p99": 140.70400595664978 + "p50": 150.07999539375305, + "p90": 160.38399934768677, + "p95": 163.83999586105347, + "p99": 174.20800030231476 }, "isolatedSum": { - "p50": 129.18400019407272, - "p90": 142.33600348234177, - "p95": 145.75999975204468, - "p99": 165.98400473594666 + "p50": 173.50399494171143, + "p90": 195.16799598932266, + "p95": 201.56800001859665, + "p99": 219.00799870491028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2458,72 +2466,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.7120001912117, - "p90": 61.91999837756157, - "p95": 64.2239972949028, - "p99": 73.53600114583969 - }, - "combine": { - "p50": 70.68800181150436, - "p90": 79.16799932718277, - "p95": 79.77599650621414, - "p99": 83.39200168848038 - }, - "roundtrip": { - "p50": 121.21599912643433, - "p90": 126.49600207805634, - "p95": 127.20000743865967, - "p99": 133.91999900341034 - }, - "isolatedSum": { - "p50": 130.40000200271606, - "p90": 141.08799770474434, - "p95": 143.99999380111694, - "p99": 156.92800283432007 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 66.94400310516357, - "p90": 73.5040009021759, - "p95": 75.26399940252304, - "p99": 77.69600301980972 + "p50": 92.70399808883667, + "p90": 106.33599758148193, + "p95": 111.23199760913849, + "p99": 132.4480026960373 }, "combine": { - "p50": 78.62400263547897, - "p90": 79.71200346946716, - "p95": 79.93599772453308, - "p99": 82.94399827718735 + "p50": 85.28000116348267, + "p90": 94.04800087213516, + "p95": 95.93600034713745, + "p99": 104.99200224876404 }, "roundtrip": { - "p50": 121.11999839544296, - "p90": 123.32800030708313, - "p95": 124.41600114107132, - "p99": 129.7920048236847 + "p50": 157.31200575828552, + "p90": 167.35999286174774, + "p95": 170.1119989156723, + "p99": 175.48799514770508 }, "isolatedSum": { - "p50": 145.56800574064255, - "p90": 153.21600437164307, - "p95": 155.19999712705612, - "p99": 160.64000129699707 + "p50": 177.98399925231934, + "p90": 200.3839984536171, + "p95": 207.16799795627594, + "p99": 237.44000494480133 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2532,72 +2503,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.76000219583511, - "p90": 71.80800288915634, - "p95": 74.46400076150894, - "p99": 86.84799820184708 - }, - "combine": { - "p50": 80.22399991750717, - "p90": 83.10399949550629, - "p95": 90.30400216579437, - "p99": 92.6079973578453 - }, - "roundtrip": { - "p50": 132.38400220870972, - "p90": 137.05599308013916, - "p95": 138.72000575065613, - "p99": 158.9439958333969 - }, - "isolatedSum": { - "p50": 149.98400211334229, - "p90": 154.91200238466263, - "p95": 164.7680029273033, - "p99": 179.45599555969238 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 82.56000280380249, - "p90": 89.75999802350998, - "p95": 91.0400003194809, - "p99": 103.20000350475311 + "p50": 100.16000270843506, + "p90": 110.27199774980545, + "p95": 113.76000195741653, + "p99": 121.5360015630722 }, "combine": { - "p50": 92.6399976015091, - "p90": 94.97600048780441, - "p95": 102.55999863147736, - "p99": 106.36799782514572 + "p50": 94.01600062847137, + "p90": 98.84800016880035, + "p95": 103.96800190210342, + "p99": 107.61599987745285 }, "roundtrip": { - "p50": 160.16000509262085, - "p90": 165.0560051202774, - "p95": 166.75199568271637, - "p99": 179.77599799633026 + "p50": 169.66399550437927, + "p90": 178.6240041255951, + "p95": 181.43999576568604, + "p99": 188.51199746131897 }, "isolatedSum": { - "p50": 175.20000040531158, - "p90": 184.7359985113144, - "p95": 193.59999895095825, - "p99": 209.56800132989883 + "p50": 194.17600333690643, + "p90": 209.1199979186058, + "p95": 217.72800385951996, + "p99": 229.15200144052505 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2606,35 +2540,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.46399658918381, - "p90": 96.79999947547913, - "p95": 98.88000041246414, - "p99": 114.656001329422 + "p50": 137.63199746608734, + "p90": 147.61599898338318, + "p95": 149.98400211334229, + "p99": 158.39999914169312 }, "combine": { - "p50": 116.03199690580368, - "p90": 117.44000017642975, - "p95": 117.91999638080597, - "p99": 127.83999741077423 + "p50": 145.56799829006195, + "p90": 153.9520025253296, + "p95": 156.38400614261627, + "p99": 162.7199947834015 }, "roundtrip": { - "p50": 195.6160068511963, - "p90": 200.41599869728088, - "p95": 201.664000749588, - "p99": 227.35999524593353 + "p50": 260.70401072502136, + "p90": 270.04799246788025, + "p95": 273.79199862480164, + "p99": 286.3680124282837 }, "isolatedSum": { - "p50": 210.4959934949875, - "p90": 214.23999965190887, - "p95": 216.7999967932701, - "p99": 242.49599874019623 + "p50": 283.1999957561493, + "p90": 301.56800150871277, + "p95": 306.36800825595856, + "p99": 321.1199939250946 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2642,16 +2576,16 @@ ] }, { - "id": "cx-2fa7319c", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "b300_c9569580", - "comparisonKey": "89fa2de88509570c", + "id": "cx-03edcd25", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb200_3028258e", + "comparisonKey": "021bfb0baa9d2669", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:54:19.552522+00:00", + "generatedAt": "2026-06-29T13:56:38.892289+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -2659,30 +2593,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "GB200 EP8 · deepep · bf16 · balanced-rank-local", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -2690,207 +2625,170 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28273513209", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273513209", - "createdAt": "2026-06-27T00:54:19.552522+00:00", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.41600117087364, - "p90": 58.46399813890457, - "p95": 60.95999851822853, - "p99": 71.55200093984604 - }, - "combine": { - "p50": 66.27199798822403, - "p90": 67.55200028419495, - "p95": 68.28799843788147, - "p99": 77.27999985218048 - }, - "roundtrip": { - "p50": 105.85600137710571, - "p90": 112.28799819946289, - "p95": 113.3119985461235, - "p99": 124.09599870443344 - }, - "isolatedSum": { - "p50": 122.68799915909767, - "p90": 126.01599842309952, - "p95": 129.24799695611, - "p99": 148.83200079202652 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 56.60799890756607, - "p90": 58.04799869656563, - "p95": 59.39200147986412, - "p99": 63.64800035953522 + "p50": 91.07200056314468, + "p90": 104.2879968881607, + "p95": 108.83200168609619, + "p99": 114.1119971871376 }, "combine": { - "p50": 67.03999638557434, - "p90": 68.7360018491745, - "p95": 69.15199756622314, - "p99": 77.2159993648529 + "p50": 68.9919963479042, + "p90": 73.53600114583969, + "p95": 78.36800068616867, + "p99": 82.20800012350082 }, "roundtrip": { - "p50": 107.04000294208527, - "p90": 109.76000130176544, - "p95": 111.35999858379364, - "p99": 119.19999867677689 + "p50": 140.06400108337402, + "p90": 150.81599354743958, + "p95": 154.30399775505066, + "p99": 162.1440052986145 }, "isolatedSum": { - "p50": 123.64799529314041, - "p90": 126.78400054574013, - "p95": 128.54399904608727, - "p99": 140.86399972438812 + "p50": 160.0639969110489, + "p90": 177.8239980340004, + "p95": 187.20000237226486, + "p99": 196.31999731063843 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 58.81600081920624, - "p90": 64.44799900054932, - "p95": 66.01600348949432, - "p99": 71.61600142717361 + "p50": 94.30400282144547, + "p90": 107.51999914646149, + "p95": 112.09599673748016, + "p99": 126.88000500202179 }, "combine": { - "p50": 67.26399809122086, - "p90": 69.63200122117996, - "p95": 77.15199887752533, - "p99": 78.91199737787247 + "p50": 70.8480030298233, + "p90": 79.52000200748444, + "p95": 81.37600123882294, + "p99": 85.60000360012054 }, "roundtrip": { - "p50": 122.20799922943115, - "p90": 125.18399953842163, - "p95": 125.91999769210815, - "p99": 130.3360015153885 + "p50": 144.896000623703, + "p90": 157.4079990386963, + "p95": 161.72799468040466, + "p99": 180.28800189495087 }, "isolatedSum": { - "p50": 126.0799989104271, - "p90": 134.08000022172928, - "p95": 143.16800236701965, - "p99": 150.52799880504608 + "p50": 165.15200585126877, + "p90": 187.04000115394592, + "p95": 193.4719979763031, + "p99": 212.48000860214233 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 59.42400172352791, - "p90": 64.25599753856659, - "p95": 67.87200272083282, - "p99": 74.62400197982788 + "p50": 98.75199943780899, + "p90": 114.27199840545654, + "p95": 126.11199915409088, + "p99": 222.08000719547272 }, "combine": { - "p50": 68.9919963479042, - "p90": 78.015998005867, - "p95": 78.62400263547897, - "p99": 81.88799768686295 + "p50": 80.79999685287476, + "p90": 85.1840004324913, + "p95": 91.42400324344635, + "p99": 94.87999975681305 }, "roundtrip": { - "p50": 119.39200013875961, - "p90": 125.05599856376648, - "p95": 126.17599964141846, - "p99": 130.36799430847168 + "p50": 150.62400698661804, + "p90": 162.6559942960739, + "p95": 165.75999557971954, + "p99": 174.27200078964233 }, "isolatedSum": { - "p50": 128.4159980714321, - "p90": 142.2719955444336, - "p95": 146.4960053563118, - "p99": 156.51199966669083 + "p50": 179.55199629068375, + "p90": 199.45599883794785, + "p95": 217.53600239753723, + "p99": 316.96000695228577 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 66.68800115585327, - "p90": 73.7600028514862, - "p95": 75.13599842786789, - "p99": 80.35200089216232 + "p50": 101.43999755382538, + "p90": 111.29599809646606, + "p95": 116.06399714946747, + "p99": 124.70400333404541 }, "combine": { - "p50": 69.88800317049026, - "p90": 78.5600021481514, - "p95": 78.75200361013412, - "p99": 82.56000280380249 + "p50": 82.0159986615181, + "p90": 91.61599725484848, + "p95": 93.53599697351456, + "p99": 96.54399752616882 }, "roundtrip": { - "p50": 119.26399916410446, - "p90": 121.47200107574463, - "p95": 123.52000176906586, - "p99": 127.68000364303589 + "p50": 157.50400722026825, + "p90": 166.46400094032288, + "p95": 169.50400173664093, + "p99": 179.3919950723648 }, "isolatedSum": { - "p50": 136.57600432634354, - "p90": 152.3200049996376, - "p95": 153.888002038002, - "p99": 162.9120036959648 + "p50": 183.45599621534348, + "p90": 202.91199535131454, + "p95": 209.59999412298203, + "p99": 221.24800086021423 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -2898,16 +2796,16 @@ ] }, { - "id": "cx-dc6ca42c", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "8a9fa1be98f83eb3", + "id": "cx-50034489", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb200_71fc8a17", + "comparisonKey": "ba506a9c9dcd4b28", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:17.025326+00:00", + "generatedAt": "2026-06-29T14:00:31.280638+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -2915,30 +2813,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", + "kernelGeneration": "v1", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -2946,352 +2845,281 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272146490", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272146490", - "createdAt": "2026-06-27T00:06:17.025326+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 55.84000051021576, - "p90": 57.95200169086456, - "p95": 60.54399907588959, - "p99": 68.09599697589874 + "p50": 92.38400310277939, + "p90": 104.16000336408615, + "p95": 107.45599865913391, + "p99": 114.27199840545654 }, "combine": { - "p50": 66.20799750089645, - "p90": 66.94400310516357, - "p95": 67.52000004053116, - "p99": 90.87999910116196 + "p50": 71.9359964132309, + "p90": 80.57600259780884, + "p95": 82.97599852085114, + "p99": 87.23200112581253 }, "roundtrip": { - "p50": 106.04800283908844, - "p90": 111.07199639081955, - "p95": 112.67200112342834, - "p99": 125.15200674533844 + "p50": 145.6640064716339, + "p90": 166.59200191497803, + "p95": 180.7360053062439, + "p99": 195.96800208091736 }, "isolatedSum": { - "p50": 122.04799801111221, - "p90": 124.89600479602814, - "p95": 128.06399911642075, - "p99": 158.9759960770607 + "p50": 164.31999951601028, + "p90": 184.736005961895, + "p95": 190.43199717998505, + "p99": 201.50399953126907 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 59.07199904322624, - "p90": 62.3680017888546, - "p95": 65.08799642324448, - "p99": 71.00799679756165 + "p50": 93.47199648618698, + "p90": 105.59999942779541, + "p95": 111.32799834012985, + "p99": 145.50399780273438 }, "combine": { - "p50": 69.18399780988693, - "p90": 78.14399898052216, - "p95": 78.59200239181519, - "p99": 88.22400122880936 + "p50": 72.64000177383423, + "p90": 80.6720033288002, + "p95": 83.10399949550629, + "p99": 88.03199976682663 }, "roundtrip": { - "p50": 119.07199770212173, - "p90": 124.32000041007996, - "p95": 125.37600100040436, - "p99": 140.06400108337402 + "p50": 144.6080058813095, + "p90": 154.36799824237823, + "p95": 157.0879966020584, + "p99": 163.26400637626648 }, "isolatedSum": { - "p50": 128.25599685311317, - "p90": 140.51200076937675, - "p95": 143.67999881505966, - "p99": 159.231998026371 + "p50": 166.1119982600212, + "p90": 186.2720027565956, + "p95": 194.43199783563614, + "p99": 233.535997569561 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 68.70400160551071, - "p90": 73.66400212049484, - "p95": 75.13599842786789, - "p99": 93.56799721717834 + "p50": 98.11200201511383, + "p90": 239.32799696922302, + "p95": 276.38399600982666, + "p99": 444.5439875125885 }, "combine": { - "p50": 78.62400263547897, - "p90": 79.6160027384758, - "p95": 81.44000172615051, - "p99": 91.48799628019333 + "p50": 78.46400141716003, + "p90": 170.3999936580658, + "p95": 194.5600062608719, + "p99": 231.36000335216522 }, "roundtrip": { - "p50": 130.65600395202637, - "p90": 135.71199774742126, - "p95": 136.76799833774567, - "p99": 144.1279947757721 + "p50": 148.19200336933136, + "p90": 159.2639982700348, + "p95": 165.8560037612915, + "p99": 268.95999908447266 }, "isolatedSum": { - "p50": 147.32800424098969, - "p90": 153.28000485897064, - "p95": 156.5760001540184, - "p99": 185.05599349737167 + "p50": 176.57600343227386, + "p90": 409.7279906272888, + "p95": 470.94400227069855, + "p99": 675.9039908647537 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 93.08800101280212, - "p90": 98.78399968147278, - "p95": 100.63999891281128, - "p99": 110.17599701881409 + "p50": 97.02400118112564, + "p90": 124.32000041007996, + "p95": 139.3280029296875, + "p99": 165.75999557971954 }, "combine": { - "p50": 115.39199948310852, - "p90": 116.28799885511398, - "p95": 117.21599847078323, - "p99": 126.39999389648438 + "p50": 76.12799853086472, + "p90": 84.70399677753448, + "p95": 93.59999746084213, + "p99": 254.7520101070404 }, "roundtrip": { - "p50": 192.25600361824036, - "p90": 198.2080042362213, - "p95": 198.7839937210083, - "p99": 203.61599326133728 + "p50": 147.64800667762756, + "p90": 159.2639982700348, + "p95": 164.09599781036377, + "p99": 179.58399653434753 }, "isolatedSum": { - "p50": 208.48000049591064, - "p90": 215.07199853658676, - "p95": 217.8559973835945, - "p99": 236.57599091529846 + "p50": 173.15199971199036, + "p90": 209.02399718761444, + "p95": 232.92800039052963, + "p99": 420.51200568675995 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-a995e296", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "fe9431c5beaaf675", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:39.072562+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272150514", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272150514", - "createdAt": "2026-06-27T00:06:39.072562+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1758.687973022461, - "p90": 2565.7920837402344, - "p95": 2910.815954208374, - "p99": 3400.576114654541 + "p50": 95.23200243711472, + "p90": 105.79200088977814, + "p95": 109.3439981341362, + "p99": 122.52800166606903 }, "combine": { - "p50": 1759.8719596862793, - "p90": 1907.871961593628, - "p95": 2670.1760292053223, - "p99": 2940.095901489258 + "p50": 80.09599894285202, + "p90": 84.83199775218964, + "p95": 87.39200234413147, + "p99": 97.75999933481216 }, "roundtrip": { - "p50": 1802.39999294281, - "p90": 1987.0719909667969, - "p95": 2666.1760807037354, - "p99": 2924.000024795532 + "p50": 148.70400726795197, + "p90": 159.13599729537964, + "p95": 161.79199516773224, + "p99": 171.55200242996216 }, "isolatedSum": { - "p50": 3518.5599327087402, - "p90": 4473.664045333862, - "p95": 5580.991983413696, - "p99": 6340.672016143799 + "p50": 175.32800137996674, + "p90": 190.62399864196777, + "p95": 196.73600047826767, + "p99": 220.2880010008812 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1754.8799514770508, - "p90": 2488.703966140747, - "p95": 2823.359966278076, - "p99": 3391.4880752563477 + "p50": 98.81599992513657, + "p90": 164.15999829769135, + "p95": 243.00800263881683, + "p99": 392.2879993915558 }, "combine": { - "p50": 1760.4479789733887, - "p90": 1861.184000968933, - "p95": 2647.264003753662, - "p99": 2955.8401107788086 + "p50": 82.43200182914734, + "p90": 94.01600062847137, + "p95": 98.62399846315384, + "p99": 122.68800288438797 }, "roundtrip": { - "p50": 1819.2960023880005, - "p90": 1958.5280418395996, - "p95": 2686.271905899048, - "p99": 2968.319892883301 + "p50": 151.74399316310883, + "p90": 165.40800034999847, + "p95": 177.34399437904358, + "p99": 246.75199389457703 }, "isolatedSum": { - "p50": 3515.3279304504395, - "p90": 4349.88796710968, - "p95": 5470.623970031738, - "p99": 6347.328186035156 + "p50": 181.2480017542839, + "p90": 258.1759989261627, + "p95": 341.6320011019707, + "p99": 514.9760022759438 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1767.3920392990112, - "p90": 2204.767942428589, - "p95": 2829.9520015716553, - "p99": 3398.303985595703 + "p50": 99.20000284910202, + "p90": 109.37599837779999, + "p95": 113.95200341939926, + "p99": 121.08799815177917 }, "combine": { - "p50": 1764.0960216522217, - "p90": 1887.1040344238281, - "p95": 2647.615909576416, - "p99": 3015.5839920043945 + "p50": 83.10399949550629, + "p90": 91.13600105047226, + "p95": 94.30400282144547, + "p99": 98.04800152778625 }, "roundtrip": { - "p50": 1835.6800079345703, - "p90": 1997.1840381622314, - "p95": 2681.3440322875977, - "p99": 2967.072010040283 + "p50": 155.42399883270264, + "p90": 177.2480010986328, + "p95": 278.0799865722656, + "p99": 647.1999883651733 }, "isolatedSum": { - "p50": 3531.488060951233, - "p90": 4091.871976852417, - "p95": 5477.567911148071, - "p99": 6413.887977600098 + "p50": 182.3040023446083, + "p90": 200.51199942827225, + "p95": 208.25600624084473, + "p99": 219.13599967956543 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3300,35 +3128,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 1790.7520532608032, - "p90": 2270.848035812378, - "p95": 2845.247983932495, - "p99": 3459.712028503418 + "p50": 106.72000050544739, + "p90": 115.7120019197464, + "p95": 118.9119964838028, + "p99": 124.32000041007996 }, "combine": { - "p50": 1809.7599744796753, - "p90": 1956.9599628448486, - "p95": 2685.7919692993164, - "p99": 3029.952049255371 + "p50": 97.47199714183807, + "p90": 106.08000308275223, + "p95": 108.31999778747559, + "p99": 110.59200018644333 }, "roundtrip": { - "p50": 1890.3039693832397, - "p90": 2169.4719791412354, - "p95": 2888.256072998047, - "p99": 3985.24808883667 + "p50": 178.20799350738525, + "p90": 185.69600582122803, + "p95": 188.4479969739914, + "p99": 196.44799828529358 }, "isolatedSum": { - "p50": 3600.5120277404785, - "p90": 4227.807998657227, - "p95": 5531.0399532318115, - "p99": 6489.664077758789 + "p50": 204.19199764728546, + "p90": 221.79200500249863, + "p95": 227.23199427127838, + "p99": 234.91200059652328 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3336,16 +3164,16 @@ ] }, { - "id": "cx-b81422f4", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "b300_c9569580", - "comparisonKey": "d97d7a8231265a6c", + "id": "cx-39d61832", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||d0eaac3a0f0ae8c", + "colorKey": "gb200_d945a181", + "comparisonKey": "b74e548739c090a5", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:13.336317+00:00", + "generatedAt": "2026-06-29T13:58:26.409733+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_13", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -3353,30 +3181,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "GB200 EP8 · deepep · bf16 · hotspot-single", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", - "activationProfile": "zeros", + "kernelGeneration": "v1", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -3384,59 +3213,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "d0eaac3a0f0ae8c", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272142980", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272142980", - "createdAt": "2026-06-27T00:06:13.336317+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.63999915122986, - "p90": 59.26400050520897, - "p95": 62.04799935221672, - "p99": 73.85600358247757 + "p50": 91.0400003194809, + "p90": 106.46399855613708, + "p95": 113.34399878978729, + "p99": 134.20799374580383 }, "combine": { - "p50": 66.43199920654297, - "p90": 67.4239993095398, - "p95": 68.25599819421768, - "p99": 78.04799824953079 + "p50": 81.79199695587158, + "p90": 91.96799993515015, + "p95": 96.16000205278397, + "p99": 105.76000064611435 }, "roundtrip": { - "p50": 106.78400099277496, - "p90": 111.39199882745743, - "p95": 113.34399878978729, - "p99": 117.0239970088005 + "p50": 152.38399803638458, + "p90": 199.64799284934998, + "p95": 213.59999477863312, + "p99": 233.40800404548645 }, "isolatedSum": { - "p50": 123.07199835777283, - "p90": 126.68799981474876, - "p95": 130.3039975464344, - "p99": 151.90400183200836 + "p50": 172.83199727535248, + "p90": 198.43199849128723, + "p95": 209.50400084257126, + "p99": 239.96799439191818 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3445,35 +3274,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.55199897289276, - "p90": 61.824001371860504, - "p95": 63.680000603199005, - "p99": 71.07199728488922 + "p50": 92.57599711418152, + "p90": 104.22399640083313, + "p95": 109.79200154542923, + "p99": 132.06399977207184 }, "combine": { - "p50": 68.92800331115723, - "p90": 77.7600035071373, - "p95": 77.95199751853943, - "p99": 78.65600287914276 + "p50": 83.8719978928566, + "p90": 89.4400030374527, + "p95": 92.6399976015091, + "p99": 96.76799923181534 }, "roundtrip": { - "p50": 120.03199756145477, - "p90": 124.4800016283989, - "p95": 125.95200538635254, - "p99": 145.53600549697876 + "p50": 154.2080044746399, + "p90": 164.5440012216568, + "p95": 169.18399930000305, + "p99": 179.71199750900269 }, "isolatedSum": { - "p50": 128.48000228405, - "p90": 139.5840048789978, - "p95": 141.63199812173843, - "p99": 149.72800016403198 + "p50": 176.44799500703812, + "p90": 193.66399943828583, + "p95": 202.43199914693832, + "p99": 228.83199900388718 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3482,35 +3311,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 68.83200258016586, - "p90": 72.38399982452393, - "p95": 75.16799867153168, - "p99": 78.17599922418594 + "p50": 101.27999633550644, + "p90": 116.86400324106216, + "p95": 129.2800009250641, + "p99": 143.77599954605103 }, "combine": { - "p50": 78.65600287914276, - "p90": 79.71200346946716, - "p95": 80.57600259780884, - "p99": 100.92800110578537 + "p50": 93.05600076913834, + "p90": 101.56799852848053, + "p95": 107.07200318574905, + "p99": 119.4240003824234 }, "roundtrip": { - "p50": 130.72000443935394, - "p90": 134.2719942331314, - "p95": 135.74400544166565, - "p99": 155.7759940624237 + "p50": 162.81600296497345, + "p90": 173.0560064315796, + "p95": 176.03200674057007, + "p99": 183.16799402236938 }, "isolatedSum": { - "p50": 147.48800545930862, - "p90": 152.0960032939911, - "p95": 155.74400126934052, - "p99": 179.1040003299713 + "p50": 194.33599710464478, + "p90": 218.4320017695427, + "p95": 236.35200411081314, + "p99": 263.1999999284744 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3519,35 +3348,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 93.34400296211243, - "p90": 95.93600034713745, - "p95": 99.2640033364296, - "p99": 107.61599987745285 + "p50": 130.87999820709229, + "p90": 144.3839967250824, + "p95": 169.21600699424744, + "p99": 363.23198676109314 }, "combine": { - "p50": 115.4559999704361, - "p90": 116.44800007343292, - "p95": 117.0559972524643, - "p99": 126.43200159072876 + "p50": 142.2719955444336, + "p90": 151.58399939537048, + "p95": 155.64799308776855, + "p99": 173.47200214862823 }, "roundtrip": { - "p50": 192.9599940776825, - "p90": 198.81600141525269, - "p95": 199.8080015182495, - "p99": 274.1439938545227 + "p50": 248.06399643421173, + "p90": 264.6079957485199, + "p95": 275.4879891872406, + "p99": 288.12798857688904 }, "isolatedSum": { - "p50": 208.80000293254852, - "p90": 212.38400042057037, - "p95": 216.3200005888939, - "p99": 234.0480014681816 + "p50": 273.1519937515259, + "p90": 295.9679961204529, + "p95": 324.864000082016, + "p99": 536.7039889097214 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3555,47 +3384,48 @@ ] }, { - "id": "cx-53b3c366", - "identity": "b300|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_307ed708", - "comparisonKey": "8f32ac097503699d", + "id": "cx-8c49b354", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb200_d826ab8d", + "comparisonKey": "97827a35998e3c24", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:59.262697+00:00", + "generatedAt": "2026-06-29T13:51:49.411928+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -3603,59 +3433,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285677323", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285677323", - "createdAt": "2026-06-27T09:50:59.262697+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.50399827957153, - "p90": 59.67999994754791, - "p95": 61.69600039720535, - "p99": 81.7599967122078 + "p50": 90.87999910116196, + "p90": 317.984014749527, + "p95": 359.6479892730713, + "p99": 389.3440067768097 }, "combine": { - "p50": 67.00800359249115, - "p90": 68.1919977068901, - "p95": 69.5360004901886, - "p99": 77.63200253248215 + "p50": 82.04799890518188, + "p90": 339.80798721313477, + "p95": 354.71999645233154, + "p99": 369.4399893283844 }, "roundtrip": { - "p50": 107.51999914646149, - "p90": 112.92800307273865, - "p95": 114.49600011110306, - "p99": 130.68799674510956 + "p50": 148.6400067806244, + "p90": 411.8080139160156, + "p95": 429.0879964828491, + "p99": 449.6000111103058 }, "isolatedSum": { - "p50": 124.51200187206268, - "p90": 127.87199765443802, - "p95": 131.23200088739395, - "p99": 159.39199924468994 + "p50": 172.92799800634384, + "p90": 657.7920019626617, + "p95": 714.3679857254028, + "p99": 758.7839961051941 }, "roundtripMeasured": true, "dispatchLogicalBytes": 630784, "combineLogicalBytes": 630784, "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3664,35 +3494,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.50399827957153, - "p90": 59.58399921655655, - "p95": 61.983998864889145, - "p99": 74.20799881219864 + "p50": 91.839998960495, + "p90": 330.4319977760315, + "p95": 363.072007894516, + "p99": 384.768009185791 }, "combine": { - "p50": 67.32799857854843, - "p90": 69.43999975919724, - "p95": 76.9599974155426, - "p99": 81.50400221347809 + "p50": 82.11199939250946, + "p90": 326.1120021343231, + "p95": 352.83198952674866, + "p99": 365.1840090751648 }, "roundtrip": { - "p50": 108.06400328874588, - "p90": 110.88000237941742, - "p95": 113.50400000810623, - "p99": 120.51200121641159 + "p50": 149.1519957780838, + "p90": 380.44801354408264, + "p95": 422.87999391555786, + "p99": 441.9519901275635 }, "isolatedSum": { - "p50": 124.83199685811996, - "p90": 129.02399897575378, - "p95": 138.94399628043175, - "p99": 155.71200102567673 + "p50": 173.95199835300446, + "p90": 656.5439999103546, + "p95": 715.9039974212646, + "p99": 749.9520182609558 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3701,35 +3531,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 59.51999872922897, - "p90": 61.76000088453293, - "p95": 63.64800035953522, - "p99": 68.12799721956253 + "p50": 93.72799843549728, + "p90": 336.2559974193573, + "p95": 363.48798871040344, + "p99": 400.2879858016968 }, "combine": { - "p50": 68.86400282382965, - "p90": 77.37600058317184, - "p95": 78.04799824953079, - "p99": 80.54400235414505 + "p50": 84.09599959850311, + "p90": 324.6079981327057, + "p95": 361.02399230003357, + "p99": 458.624005317688 }, "roundtrip": { - "p50": 123.90399724245071, - "p90": 126.75200402736664, - "p95": 127.20000743865967, - "p99": 130.94399869441986 + "p50": 152.0639955997467, + "p90": 391.87198877334595, + "p95": 423.007994890213, + "p99": 448.0000138282776 }, "isolatedSum": { - "p50": 128.38400155305862, - "p90": 139.13600146770477, - "p95": 141.695998609066, - "p99": 148.67199957370758 + "p50": 177.8239980340004, + "p90": 660.863995552063, + "p95": 724.511981010437, + "p99": 858.9119911193848 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3738,35 +3568,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 60.447998344898224, - "p90": 65.5680000782013, - "p95": 67.71200150251389, - "p99": 73.88799637556076 + "p50": 94.33600306510925, + "p90": 351.3279855251312, + "p95": 371.4880049228668, + "p99": 384.70399379730225 }, "combine": { - "p50": 69.023996591568, - "p90": 77.63200253248215, - "p95": 78.27199995517731, - "p99": 79.68000322580338 + "p50": 84.6719965338707, + "p90": 316.1599934101105, + "p95": 347.1679985523224, + "p99": 368.7039911746979 }, "roundtrip": { - "p50": 120.7360029220581, - "p90": 126.11199915409088, - "p95": 127.48800218105316, - "p99": 135.6160044670105 + "p50": 154.4319987297058, + "p90": 396.12799882888794, + "p95": 427.0400106906891, + "p99": 455.9679925441742 }, "isolatedSum": { - "p50": 129.47199493646622, - "p90": 143.20000261068344, - "p95": 145.9840014576912, - "p99": 153.56799960136414 + "p50": 179.00799959897995, + "p90": 667.4879789352417, + "p95": 718.6560034751892, + "p99": 753.4079849720001 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3775,35 +3605,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 62.55999952554703, - "p90": 69.08799707889557, - "p95": 71.35999947786331, - "p99": 78.23999971151352 + "p50": 96.63999825716019, + "p90": 343.392014503479, + "p95": 373.4399974346161, + "p99": 391.4560079574585 }, "combine": { - "p50": 77.66400277614594, - "p90": 79.1039988398552, - "p95": 79.45600152015686, - "p99": 81.216000020504 + "p50": 86.30400151014328, + "p90": 322.56001234054565, + "p95": 349.88799691200256, + "p99": 374.783992767334 }, "roundtrip": { - "p50": 120.25599926710129, - "p90": 122.65600264072418, - "p95": 124.15999919176102, - "p99": 136.63999736309052 + "p50": 155.64799308776855, + "p90": 387.4239921569824, + "p95": 428.9279878139496, + "p99": 452.63999700546265 }, "isolatedSum": { - "p50": 140.22400230169296, - "p90": 148.19199591875076, - "p95": 150.81600099802017, - "p99": 159.45599973201752 + "p50": 182.94399976730347, + "p90": 665.9520268440247, + "p95": 723.3279943466187, + "p99": 766.2400007247925 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3812,35 +3642,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.92000341415405, - "p90": 75.13599842786789, - "p95": 75.80800354480743, - "p99": 80.73599636554718 + "p50": 100.44799745082855, + "p90": 344.67199444770813, + "p95": 374.2719888687134, + "p99": 391.2000060081482 }, "combine": { - "p50": 78.91199737787247, - "p90": 79.80799674987793, - "p95": 80.35200089216232, - "p99": 83.71199667453766 + "p50": 93.56799721717834, + "p90": 322.6560056209564, + "p95": 338.1440043449402, + "p99": 371.71199917793274 }, "roundtrip": { - "p50": 131.26400113105774, - "p90": 136.06399297714233, - "p95": 137.79200613498688, - "p99": 158.78400206565857 + "p50": 163.93600404262543, + "p90": 399.6799886226654, + "p95": 419.295996427536, + "p99": 453.5039961338043 }, "isolatedSum": { - "p50": 148.83200079202652, - "p90": 154.94399517774582, - "p95": 156.16000443696976, - "p99": 164.44799304008484 + "p50": 194.0159946680069, + "p90": 667.3280000686646, + "p95": 712.4159932136536, + "p99": 762.9120051860809 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3849,35 +3679,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 88.16000074148178, - "p90": 91.13600105047226, - "p95": 92.3520028591156, - "p99": 106.46399855613708 + "p50": 110.84800213575363, + "p90": 352.06401348114014, + "p95": 369.1520094871521, + "p99": 409.2479944229126 }, "combine": { - "p50": 92.47999638319016, - "p90": 100.96000134944916, - "p95": 102.04800218343735, - "p99": 116.19199812412262 + "p50": 107.04000294208527, + "p90": 330.1759958267212, + "p95": 352.06401348114014, + "p99": 389.3119990825653 }, "roundtrip": { - "p50": 159.8079949617386, - "p90": 163.42400014400482, - "p95": 164.8319959640503, - "p99": 172.03199863433838 + "p50": 187.9359930753708, + "p90": 420.3520119190216, + "p95": 462.68799901008606, + "p99": 484.16000604629517 }, "isolatedSum": { - "p50": 180.63999712467194, - "p90": 192.09600239992142, - "p95": 194.40000504255295, - "p99": 222.6559966802597 + "p50": 217.8880050778389, + "p90": 682.2400093078613, + "p95": 721.2160229682922, + "p99": 798.5599935054779 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3886,35 +3716,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.46399658918381, - "p90": 100.03200173377991, - "p95": 102.04800218343735, - "p99": 111.68000102043152 + "p50": 125.50400197505951, + "p90": 370.2400028705597, + "p95": 393.887996673584, + "p99": 429.2159974575043 }, "combine": { - "p50": 115.48800021409988, - "p90": 116.5120005607605, - "p95": 117.18399822711945, - "p99": 127.61600315570831 + "p50": 128.76799702644348, + "p90": 391.4240002632141, + "p95": 403.872013092041, + "p99": 416.22400283813477 }, "roundtrip": { - "p50": 195.23200392723083, - "p90": 199.13600385189056, - "p95": 200.1280039548874, - "p99": 208.25600624084473 + "p50": 227.39200294017792, + "p90": 472.51200675964355, + "p95": 503.04001569747925, + "p99": 527.1999835968018 }, "isolatedSum": { - "p50": 209.9519968032837, - "p90": 216.5440022945404, - "p95": 219.2320004105568, - "p99": 239.29600417613983 + "p50": 254.271999001503, + "p90": 761.6640031337738, + "p95": 797.760009765625, + "p99": 845.440000295639 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -3922,16 +3752,16 @@ ] }, { - "id": "cx-bb4293a3", - "identity": "b300|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "b300_c9569580", - "comparisonKey": "9212a9f938273ac4", + "id": "cx-e4160fbb", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb200_8703b849", + "comparisonKey": "dcee6033928840f5", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:04.417572+00:00", + "generatedAt": "2026-06-29T13:59:39.819924+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_12", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -3939,30 +3769,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "Kimi-K2", + "label": "GB200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 288, "routing": "uniform", - "routingLabel": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -3970,59 +3801,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287503016", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503016", - "createdAt": "2026-06-27T11:14:04.417572+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.82400071620941, - "p90": 59.90400165319443, - "p95": 62.97600269317627, - "p99": 70.592001080513 + "p50": 89.59999680519104, + "p90": 101.75999999046326, + "p95": 105.85600137710571, + "p99": 112.60800063610077 }, "combine": { - "p50": 66.52799993753433, - "p90": 67.58400052785873, - "p95": 68.9919963479042, - "p99": 78.87999713420868 + "p50": 79.13599908351898, + "p90": 83.55200290679932, + "p95": 85.7279971241951, + "p99": 92.44800359010696 }, "roundtrip": { - "p50": 107.90400207042694, - "p90": 114.20799791812897, - "p95": 114.94400352239609, - "p99": 125.21600723266602 + "p50": 146.36799693107605, + "p90": 157.56799280643463, + "p95": 161.02400422096252, + "p99": 170.20800709724426 }, "isolatedSum": { - "p50": 124.35200065374374, - "p90": 127.48800218105316, - "p95": 131.96799904108047, - "p99": 149.47199821472168 + "p50": 168.73599588871002, + "p90": 185.31200289726257, + "p95": 191.5839985013008, + "p99": 205.05600422620773 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4031,35 +3862,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 58.079998940229416, - "p90": 61.3120011985302, - "p95": 64.03200328350067, - "p99": 79.29600030183792 + "p50": 90.7839983701706, + "p90": 103.29599678516388, + "p95": 108.06400328874588, + "p99": 122.30399996042252 }, "combine": { - "p50": 67.10399687290192, - "p90": 68.41599941253662, - "p95": 69.98399645090103, - "p99": 85.50400286912918 + "p50": 80.28800040483475, + "p90": 84.09599959850311, + "p95": 85.85599809885025, + "p99": 94.27200257778168 }, "roundtrip": { - "p50": 108.03200304508209, - "p90": 110.944002866745, - "p95": 113.15199732780457, - "p99": 129.15199995040894 + "p50": 148.5760062932968, + "p90": 159.19999778270721, + "p95": 164.12800550460815, + "p99": 172.57599532604218 }, "isolatedSum": { - "p50": 125.18399581313133, - "p90": 129.72800061106682, - "p95": 134.0159997344017, - "p99": 164.8000031709671 + "p50": 171.07199877500534, + "p90": 187.391996383667, + "p95": 193.92000138759613, + "p99": 216.5760025382042 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 4, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4068,35 +3899,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 59.7120001912117, - "p90": 61.85600161552429, - "p95": 63.1679967045784, - "p99": 75.39200037717819 + "p50": 90.46400338411331, + "p90": 101.79200023412704, + "p95": 104.54399883747101, + "p99": 112.92800307273865 }, "combine": { - "p50": 68.31999868154526, - "p90": 77.11999863386154, - "p95": 77.7600035071373, - "p99": 89.59999680519104 + "p50": 81.18399977684021, + "p90": 85.4400023818016, + "p95": 90.52799642086029, + "p99": 95.32800316810608 }, "roundtrip": { - "p50": 123.48800152540207, - "p90": 127.61600315570831, - "p95": 128.4479945898056, - "p99": 141.9840008020401 + "p50": 149.02399480342865, + "p90": 160.67199409008026, + "p95": 163.68000209331512, + "p99": 173.8239973783493 }, "isolatedSum": { - "p50": 128.03199887275696, - "p90": 138.97600024938583, - "p95": 140.9280002117157, - "p99": 164.99199718236923 + "p50": 171.64800316095352, + "p90": 187.23200261592865, + "p95": 195.0719952583313, + "p99": 208.25600624084473 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 4, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4105,35 +3936,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.967998415231705, - "p90": 62.81600147485733, - "p95": 66.39999896287918, - "p99": 73.53600114583969 + "p50": 92.99200028181076, + "p90": 102.55999863147736, + "p95": 105.82400113344193, + "p99": 111.93600296974182 }, "combine": { - "p50": 68.44799965620041, - "p90": 76.92799717187881, - "p95": 77.34400033950806, - "p99": 82.75199681520462 + "p50": 83.16799998283386, + "p90": 87.00799942016602, + "p95": 91.58399701118469, + "p99": 99.04000163078308 }, "roundtrip": { - "p50": 122.17599898576736, - "p90": 127.07200646400452, - "p95": 128.25599312782288, - "p99": 142.68800616264343 + "p50": 153.98399531841278, + "p90": 164.8000031709671, + "p95": 166.9439971446991, + "p99": 176.35199427604675 }, "isolatedSum": { - "p50": 128.4159980714321, - "p90": 139.74399864673615, - "p95": 143.74399930238724, - "p99": 156.2879979610443 + "p50": 176.16000026464462, + "p90": 189.56799805164337, + "p95": 197.40799814462662, + "p99": 210.9760046005249 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 4, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4142,34 +3973,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 62.880001962184906, - "p90": 68.80000233650208, - "p95": 71.03999704122543, - "p99": 74.65600222349167 + "p50": 95.42399644851685, + "p90": 105.6319996714592, + "p95": 109.53599959611893, + "p99": 122.01599776744843 }, "combine": { - "p50": 69.11999732255936, - "p90": 78.40000092983246, - "p95": 78.97599786520004, - "p99": 82.40000158548355 + "p50": 83.5840031504631, + "p90": 89.34400230646133, + "p95": 93.56799721717834, + "p99": 95.45599669218063 }, "roundtrip": { - "p50": 121.11999839544296, - "p90": 125.34399330615997, - "p95": 127.13600695133209, - "p99": 134.8479986190796 + "p50": 154.4640064239502, + "p90": 166.07999801635742, + "p95": 169.53599452972412, + "p99": 176.57600343227386 }, "isolatedSum": { - "p50": 131.99999928474426, - "p90": 147.20000326633453, - "p95": 150.01599490642548, - "p99": 157.05600380897522 - }, + "p50": 179.00799959897995, + "p90": 194.97600197792053, + "p95": 203.10399681329727, + "p99": 217.47199445962906 + }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -4179,35 +4010,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.94399631023407, - "p90": 76.48000121116638, - "p95": 77.37600058317184, - "p99": 80.06399869918823 + "p50": 98.24000298976898, + "p90": 108.06400328874588, + "p95": 111.61600053310394, + "p99": 119.61600184440613 }, "combine": { - "p50": 79.3600007891655, - "p90": 80.22399991750717, - "p95": 81.28000050783157, - "p99": 91.90399944782257 + "p50": 91.51999652385712, + "p90": 95.74399888515472, + "p95": 97.82399982213974, + "p99": 103.5199984908104 }, "roundtrip": { - "p50": 134.46399569511414, - "p90": 138.20800185203552, - "p95": 139.71200585365295, - "p99": 151.2320041656494 + "p50": 161.95200383663177, + "p90": 171.83999717235565, + "p95": 174.49599504470825, + "p99": 182.3360025882721 }, "isolatedSum": { - "p50": 150.30399709939957, - "p90": 156.70400112867355, - "p95": 158.65600109100342, - "p99": 171.9679981470108 + "p50": 189.7599995136261, + "p90": 203.8080021739006, + "p95": 209.44000035524368, + "p99": 223.13600033521652 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, "recvTokensMax": 178, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4216,35 +4047,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 88.8959988951683, - "p90": 91.61599725484848, - "p95": 93.24800223112106, - "p99": 102.94400155544281 + "p50": 109.72800105810165, + "p90": 118.52800101041794, + "p95": 122.6240023970604, + "p99": 135.3919953107834 }, "combine": { - "p50": 92.38400310277939, - "p90": 100.63999891281128, - "p95": 101.6639992594719, - "p99": 104.73600029945374 + "p50": 105.56799918413162, + "p90": 109.31199789047241, + "p95": 112.06399649381638, + "p99": 118.68800222873688 }, "roundtrip": { - "p50": 161.31199896335602, - "p90": 165.0879979133606, - "p95": 166.46400094032288, - "p99": 185.7919991016388 + "p50": 187.32799589633942, + "p90": 195.90400159358978, + "p95": 198.7839937210083, + "p99": 205.72799444198608 }, "isolatedSum": { - "p50": 181.2800019979477, - "p90": 192.25599616765976, - "p95": 194.91200149059296, - "p99": 207.68000185489655 + "p50": 215.29600024223328, + "p90": 227.83999890089035, + "p95": 234.68799889087677, + "p99": 254.07999753952026 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 4, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4253,35 +4084,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 95.58399766683578, - "p90": 100.67199915647507, - "p95": 101.79200023412704, - "p99": 108.15999656915665 + "p50": 124.35200065374374, + "p90": 132.54399597644806, + "p95": 135.68000495433807, + "p99": 147.67999947071075 }, "combine": { - "p50": 115.64800143241882, - "p90": 116.57600104808807, - "p95": 117.3119992017746, - "p99": 128.00000607967377 + "p50": 126.0479986667633, + "p90": 132.32000172138214, + "p95": 133.98399949073792, + "p99": 143.2960033416748 }, "roundtrip": { - "p50": 197.05599546432495, - "p90": 200.95999538898468, - "p95": 202.84800231456757, - "p99": 227.90400683879852 + "p50": 224.44799542427063, + "p90": 233.69599878787994, + "p95": 236.51200532913208, + "p99": 246.68799340724945 }, "isolatedSum": { - "p50": 211.2319990992546, - "p90": 217.24800020456314, - "p95": 219.10399943590164, - "p99": 236.1600026488304 + "p50": 250.39999932050705, + "p90": 264.8639976978302, + "p95": 269.664004445076, + "p99": 290.97600281238556 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4289,107 +4120,108 @@ ] }, { - "id": "cx-22c8469b", - "identity": "b300|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "b300_307ed708", - "comparisonKey": "382d98414c6b61e6", + "id": "cx-ee1bfa1a", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb200_62fd6d04", + "comparisonKey": "ca163ecd5d51bcb6", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:28.371280+00:00", + "generatedAt": "2026-06-29T13:55:33.751216+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_02", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "Kimi-K2", + "label": "GB200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285688277", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285688277", - "createdAt": "2026-06-27T09:51:28.371280+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.440001517534256, - "p90": 61.63199990987778, - "p95": 64.64000046253204, - "p99": 82.33600109815598 + "p50": 91.16800129413605, + "p90": 103.29599678516388, + "p95": 107.58399963378906, + "p99": 115.55200070142746 }, "combine": { - "p50": 66.20799750089645, - "p90": 66.880002617836, - "p95": 68.41599941253662, - "p99": 80.32000064849854 + "p50": 78.87999713420868, + "p90": 83.64800363779068, + "p95": 86.27200126647949, + "p99": 95.48799693584442 }, "roundtrip": { - "p50": 107.51999914646149, - "p90": 115.03999680280685, - "p95": 117.40799993276596, - "p99": 124.7360035777092 + "p50": 148.44800531864166, + "p90": 158.78400206565857, + "p95": 162.56000101566315, + "p99": 169.72799599170685 }, "isolatedSum": { - "p50": 123.64799901843071, - "p90": 128.51200252771378, - "p95": 133.05599987506866, - "p99": 162.6560017466545 + "p50": 170.04799842834473, + "p90": 186.94400042295456, + "p95": 193.85600090026855, + "p99": 211.03999763727188 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, "recvTokensMax": 8, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4398,35 +4230,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.631999254226685, - "p90": 60.60799956321716, - "p95": 63.32799792289734, - "p99": 80.70400357246399 + "p50": 92.16000139713287, + "p90": 103.64799946546555, + "p95": 107.35999792814255, + "p99": 113.34399878978729 }, "combine": { - "p50": 66.27199798822403, - "p90": 67.26399809122086, - "p95": 68.12799721956253, - "p99": 78.015998005867 + "p50": 78.68800312280655, + "p90": 83.3280012011528, + "p95": 85.11999994516373, + "p99": 93.44000369310379 }, "roundtrip": { - "p50": 106.81600123643875, - "p90": 109.98400300741196, - "p95": 112.47999966144562, - "p99": 124.79999661445618 + "p50": 147.13600277900696, + "p90": 157.21599757671356, + "p95": 160.5439931154251, + "p99": 169.47199404239655 }, "isolatedSum": { - "p50": 123.90399724245071, - "p90": 127.87199765443802, - "p95": 131.45599514245987, - "p99": 158.720001578331 + "p50": 170.84800451993942, + "p90": 186.97600066661835, + "p95": 192.47999787330627, + "p99": 206.78400248289108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 7, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4435,35 +4267,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.95200169086456, - "p90": 61.37600168585777, - "p95": 65.50399959087372, - "p99": 79.42400127649307 + "p50": 94.17600184679031, + "p90": 106.91200196743011, + "p95": 111.58400028944016, + "p99": 127.07200646400452 }, "combine": { - "p50": 66.97600334882736, - "p90": 69.34399902820587, - "p95": 76.67200267314911, - "p99": 89.63199704885483 + "p50": 80.9599980711937, + "p90": 86.04799956083298, + "p95": 90.46400338411331, + "p99": 94.87999975681305 }, "roundtrip": { - "p50": 111.26399785280228, - "p90": 115.90400338172913, - "p95": 119.00799721479416, - "p99": 129.15199995040894 + "p50": 149.56800639629364, + "p90": 159.93599593639374, + "p95": 163.32800686359406, + "p99": 170.3680008649826 }, "isolatedSum": { - "p50": 124.92800503969193, - "p90": 130.72000071406364, - "p95": 142.17600226402283, - "p99": 169.0559983253479 + "p50": 175.135999917984, + "p90": 192.9600015282631, + "p95": 202.04800367355347, + "p99": 221.95200622081757 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 5, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4472,35 +4304,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.9120015501976, - "p90": 61.055999249219894, - "p95": 62.81600147485733, - "p99": 81.88799768686295 + "p50": 94.59199756383896, + "p90": 106.1440035700798, + "p95": 109.21599715948105, + "p99": 123.1359988451004 }, "combine": { - "p50": 67.64800101518631, - "p90": 69.63200122117996, - "p95": 76.9599974155426, - "p99": 78.72000336647034 + "p50": 82.68799632787704, + "p90": 90.17600119113922, + "p95": 93.85599941015244, + "p99": 104.12800312042236 }, "roundtrip": { - "p50": 123.6800029873848, - "p90": 125.98399817943573, - "p95": 126.8479973077774, - "p99": 133.18400084972382 + "p50": 151.96800231933594, + "p90": 163.87200355529785, + "p95": 167.58400201797485, + "p99": 174.40000176429749 }, "isolatedSum": { - "p50": 126.56000256538391, - "p90": 130.68800047039986, - "p95": 139.77599889039993, - "p99": 160.60800105333328 + "p50": 177.279993891716, + "p90": 196.32000476121902, + "p95": 203.07199656963348, + "p99": 227.26400196552277 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 7, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4509,35 +4341,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 60.736000537872314, - "p90": 66.91200286149979, - "p95": 68.28799843788147, - "p99": 72.83200323581696 + "p50": 94.52799707651138, + "p90": 104.92800176143646, + "p95": 109.15199667215347, + "p99": 120.2239990234375 }, "combine": { - "p50": 68.76800209283829, - "p90": 77.95199751853943, - "p95": 78.43200117349625, - "p99": 78.78399640321732 + "p50": 82.97599852085114, + "p90": 91.80799871683121, + "p95": 94.17600184679031, + "p99": 98.55999797582626 }, "roundtrip": { - "p50": 119.77600306272507, - "p90": 124.67200309038162, - "p95": 127.10399925708771, - "p99": 141.37600362300873 + "p50": 155.03999590873718, + "p90": 165.18400609493256, + "p95": 168.41599345207214, + "p99": 175.26400089263916 }, "isolatedSum": { - "p50": 129.5040026307106, - "p90": 144.86400038003922, - "p95": 146.71999961137772, - "p99": 151.61599963903427 + "p50": 177.50399559736252, + "p90": 196.73600047826767, + "p95": 203.3279985189438, + "p99": 218.78399699926376 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 7, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4546,35 +4378,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.76000219583511, - "p90": 71.19999825954437, - "p95": 73.79200309515, - "p99": 84.57600325345993 + "p50": 98.1760025024414, + "p90": 107.4879989027977, + "p95": 111.10399663448334, + "p99": 119.07199770212173 }, "combine": { - "p50": 78.49600166082382, - "p90": 79.45600152015686, - "p95": 80.4160013794899, - "p99": 102.33599692583084 + "p50": 86.30400151014328, + "p90": 94.01600062847137, + "p95": 96.12800180912018, + "p99": 104.032002389431 }, "roundtrip": { - "p50": 130.97600638866425, - "p90": 135.68000495433807, - "p95": 137.1839940547943, - "p99": 148.41599762439728 + "p50": 160.22400557994843, + "p90": 169.3120002746582, + "p95": 171.55200242996216, + "p99": 178.68800461292267 }, "isolatedSum": { - "p50": 148.25600385665894, - "p90": 150.65599977970123, - "p95": 154.2080044746399, - "p99": 186.91200017929077 + "p50": 184.4800040125847, + "p90": 201.50399953126907, + "p95": 207.23199844360352, + "p99": 223.10400009155273 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 7, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4583,35 +4415,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 86.2400010228157, - "p90": 89.4400030374527, - "p95": 90.59199690818787, - "p99": 99.71199929714203 + "p50": 106.175996363163, + "p90": 115.87200313806534, + "p95": 119.71200257539749, + "p99": 143.8400000333786 }, "combine": { - "p50": 91.61599725484848, - "p90": 93.56799721717834, - "p95": 95.0080007314682, - "p99": 104.3199971318245 + "p50": 103.4879982471466, + "p90": 107.61599987745285, + "p95": 109.11999642848969, + "p99": 117.11999773979187 }, "roundtrip": { - "p50": 160.288006067276, - "p90": 166.4319932460785, - "p95": 173.34400117397308, - "p99": 184.86399948596954 + "p50": 186.52799725532532, + "p90": 194.59199905395508, + "p95": 198.14400374889374, + "p99": 204.8639953136444 }, "isolatedSum": { - "p50": 177.85599827766418, - "p90": 183.00800025463104, - "p95": 185.59999763965607, - "p99": 204.03199642896652 + "p50": 209.6639946103096, + "p90": 223.4880030155182, + "p95": 228.83199900388718, + "p99": 260.95999777317047 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 7, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4620,35 +4452,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.14400160312653, - "p90": 97.50399738550186, - "p95": 99.87200051546097, - "p99": 113.76000195741653 + "p50": 125.63200294971466, + "p90": 134.5600038766861, + "p95": 136.63999736309052, + "p99": 141.79199934005737 }, "combine": { - "p50": 115.26399850845337, - "p90": 115.93600362539291, - "p95": 117.18399822711945, - "p99": 131.20000064373016 + "p50": 134.8479986190796, + "p90": 142.91200041770935, + "p95": 144.28800344467163, + "p99": 150.4639983177185 }, "roundtrip": { - "p50": 192.89599359035492, - "p90": 198.68800044059753, - "p95": 200.19200444221497, - "p99": 209.18400585651398 + "p50": 238.5600060224533, + "p90": 247.16800451278687, + "p95": 250.5280077457428, + "p99": 257.3759853839874 }, "isolatedSum": { - "p50": 209.4080001115799, - "p90": 213.44000101089478, - "p95": 217.0559987425804, - "p99": 244.9600026011467 + "p50": 260.48000156879425, + "p90": 277.47200429439545, + "p95": 280.92800080776215, + "p99": 292.2559976577759 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4656,16 +4488,16 @@ ] }, { - "id": "cx-a22ca77b", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", - "colorKey": "b300_77566238", - "comparisonKey": "08fb0b4fb4077abb", + "id": "cx-1ce8c4bb", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||7eace9164e82cd6", + "colorKey": "gb200_8855aa26", + "comparisonKey": "7e97825cbdd9f3b4", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:04.079730+00:00", + "generatedAt": "2026-06-29T13:57:43.080262+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_02", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -4673,30 +4505,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced", + "label": "GB200 EP8 · deepep · bf16 · zipf-heavy", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -4704,59 +4537,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2279937619f3971", - "workloadId": "set:4:7af12818400d6348", - "workloadSource": "canonical-serialized", + "traceSignature": "7eace9164e82cd6", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271873027", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271873027", - "createdAt": "2026-06-26T23:58:04.079730+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.41600117087364, - "p90": 58.848001062870026, - "p95": 61.216000467538834, - "p99": 80.25600016117096 + "p50": 89.40800279378891, + "p90": 99.67999905347824, + "p95": 103.5199984908104, + "p99": 109.76000130176544 }, "combine": { - "p50": 67.6800012588501, - "p90": 69.60000097751617, - "p95": 76.73600316047668, - "p99": 82.62400329113007 + "p50": 70.23999840021133, + "p90": 73.60000163316727, + "p95": 79.16799932718277, + "p99": 84.95999872684479 }, "roundtrip": { - "p50": 106.49599879980087, - "p90": 109.27999764680862, - "p95": 111.13599687814713, - "p99": 124.1919994354248 + "p50": 135.5839967727661, + "p90": 144.86399292945862, + "p95": 148.6400067806244, + "p99": 155.7759940624237 }, "isolatedSum": { - "p50": 124.09600242972374, - "p90": 128.4480020403862, - "p95": 137.95200362801552, - "p99": 162.88000345230103 + "p50": 159.64800119400024, + "p90": 173.2800006866455, + "p95": 182.68799781799316, + "p99": 194.72000002861023 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, "recvTokensMax": 8, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4765,35 +4598,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.43200162053108, - "p90": 60.70400029420853, - "p95": 62.6240000128746, - "p99": 78.65600287914276 + "p50": 89.53599631786346, + "p90": 100.67199915647507, + "p95": 105.27999699115753, + "p99": 114.75200206041336 }, "combine": { - "p50": 77.98399776220322, - "p90": 78.72000336647034, - "p95": 78.84799689054489, - "p99": 81.4720019698143 + "p50": 72.80000299215317, + "p90": 80.51200211048126, + "p95": 82.17599987983704, + "p99": 86.68799698352814 }, "roundtrip": { - "p50": 118.07999759912491, - "p90": 122.91199713945389, - "p95": 124.1919994354248, - "p99": 131.99999928474426 + "p50": 142.39999651908875, + "p90": 151.5520066022873, + "p95": 154.23999726772308, + "p99": 164.73600268363953 }, "isolatedSum": { - "p50": 136.4159993827343, - "p90": 139.42400366067886, - "p95": 141.4719969034195, - "p99": 160.12800484895706 + "p50": 162.33599931001663, + "p90": 181.18400126695633, + "p95": 187.45599687099457, + "p99": 201.4399990439415 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, "recvTokensMax": 64, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4802,35 +4635,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.82400268316269, - "p90": 71.87200337648392, - "p95": 73.7600028514862, - "p99": 84.25600081682205 + "p50": 97.24800288677216, + "p90": 106.23999685049057, + "p95": 110.62400043010712, + "p99": 121.95199728012085 }, "combine": { - "p50": 79.16799932718277, - "p90": 81.08799904584885, - "p95": 81.91999793052673, - "p99": 90.71999788284302 + "p50": 82.40000158548355, + "p90": 86.14400029182434, + "p95": 91.64799749851227, + "p99": 95.10400146245956 }, "roundtrip": { - "p50": 133.82400572299957, - "p90": 140.09599387645721, - "p95": 141.92000031471252, - "p99": 145.82400023937225 + "p50": 150.7200002670288, + "p90": 159.32799875736237, + "p95": 162.1440052986145, + "p99": 166.49599373340607 }, "isolatedSum": { - "p50": 148.99200201034546, - "p90": 152.96000242233276, - "p95": 155.68000078201294, - "p99": 174.97599869966507 + "p50": 179.6480044722557, + "p90": 192.3839971423149, + "p95": 202.27199792861938, + "p99": 217.0559987425804 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, "recvTokensMax": 256, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4839,35 +4672,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.76000064611435, - "p90": 107.71200060844421, - "p95": 109.02400314807892, - "p99": 114.78400230407715 + "p50": 121.34400010108948, + "p90": 129.82399761676788, + "p95": 133.69600474834442, + "p99": 144.3520039319992 }, "combine": { - "p50": 130.36799430847168, - "p90": 139.615997672081, - "p95": 140.03199338912964, - "p99": 143.13599467277527 + "p50": 130.94399869441986, + "p90": 134.65599715709686, + "p95": 136.19199395179749, + "p99": 146.11199498176575 }, "roundtrip": { - "p50": 230.68800568580627, - "p90": 234.52800512313843, - "p95": 235.55199801921844, - "p99": 240.09600281715393 + "p50": 225.66400468349457, + "p90": 233.3119958639145, + "p95": 236.15999519824982, + "p99": 240.79999327659607 }, "isolatedSum": { - "p50": 236.12799495458603, - "p90": 247.3279982805252, - "p95": 249.05599653720856, - "p99": 257.9199969768524 + "p50": 252.28799879550934, + "p90": 264.47999477386475, + "p95": 269.8879987001419, + "p99": 290.46399891376495 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, "recvTokensMax": 1024, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4875,16 +4708,16 @@ ] }, { - "id": "cx-42672aa9", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", - "colorKey": "b300_77566238", - "comparisonKey": "3fe3497798f4d1dd", + "id": "cx-30fe3c0c", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb200_10fda6e8", + "comparisonKey": "5ba24bce143d87f0", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:00.348230+00:00", + "generatedAt": "2026-06-29T13:50:56.638851+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_08", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -4892,30 +4725,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -4923,59 +4757,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285609982", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285609982", - "createdAt": "2026-06-27T09:48:00.348230+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.50399827957153, - "p90": 59.20000001788139, - "p95": 61.055999249219894, - "p99": 68.60800087451935 + "p50": 94.81599926948547, + "p90": 327.2320032119751, + "p95": 382.84799456596375, + "p99": 408.03200006484985 }, "combine": { - "p50": 67.74400174617767, - "p90": 69.66400146484375, - "p95": 76.83199644088745, - "p99": 81.40800148248672 + "p50": 80.86399734020233, + "p90": 327.4880051612854, + "p95": 357.63201117515564, + "p99": 369.28001046180725 }, "roundtrip": { - "p50": 107.87200182676315, - "p90": 110.11199653148651, - "p95": 112.28799819946289, - "p99": 123.9359974861145 + "p50": 151.74399316310883, + "p90": 415.9039855003357, + "p95": 444.09599900245667, + "p99": 466.048002243042 }, "isolatedSum": { - "p50": 125.2480000257492, - "p90": 128.86400148272514, - "p95": 137.88799569010735, - "p99": 150.01600235700607 + "p50": 175.6799966096878, + "p90": 654.7200083732605, + "p95": 740.4800057411194, + "p99": 777.3120105266571 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -4984,35 +4818,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.760000228881836, - "p90": 60.095999389886856, - "p95": 62.111999839544296, - "p99": 75.6480023264885 + "p50": 94.46399658918381, + "p90": 317.82400608062744, + "p95": 374.9440014362335, + "p99": 404.1920006275177 }, "combine": { - "p50": 67.96800345182419, - "p90": 76.64000242948532, - "p95": 77.44000107049942, - "p99": 78.72000336647034 + "p50": 83.16799998283386, + "p90": 336.92800998687744, + "p95": 363.96801471710205, + "p99": 377.56800651550293 }, "roundtrip": { - "p50": 117.40799993276596, - "p90": 123.19999933242798, - "p95": 123.87199699878693, - "p99": 141.27999544143677 + "p50": 154.7199934720993, + "p90": 418.7839925289154, + "p95": 443.1680142879486, + "p99": 467.74399280548096 }, "isolatedSum": { - "p50": 125.72800368070602, - "p90": 136.73600181937218, - "p95": 139.55200091004372, - "p99": 154.36800569295883 + "p50": 177.63199657201767, + "p90": 654.7520160675049, + "p95": 738.9120161533356, + "p99": 781.7600071430206 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 4, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5021,35 +4855,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 59.42400172352791, - "p90": 61.43999844789505, - "p95": 63.29599767923355, - "p99": 74.78400319814682 + "p50": 96.47999703884125, + "p90": 336.60799264907837, + "p95": 380.47999143600464, + "p99": 397.7920114994049 }, "combine": { - "p50": 77.85599678754807, - "p90": 78.68800312280655, - "p95": 78.97599786520004, - "p99": 93.82399916648865 + "p50": 83.3280012011528, + "p90": 323.71199131011963, + "p95": 365.7599985599518, + "p99": 378.6880075931549 }, "roundtrip": { - "p50": 119.61600184440613, - "p90": 124.51200187206268, - "p95": 125.76000392436981, - "p99": 132.1599930524826 + "p50": 154.40000593662262, + "p90": 409.66400504112244, + "p95": 441.0240054130554, + "p99": 464.1599953174591 }, "isolatedSum": { - "p50": 137.27999851107597, - "p90": 140.1280015707016, - "p95": 142.2719955444336, - "p99": 168.60800236463547 + "p50": 179.80799823999405, + "p90": 660.319983959198, + "p95": 746.2399899959564, + "p99": 776.4800190925598 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 4, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5058,35 +4892,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.776000678539276, - "p90": 62.04799935221672, - "p95": 63.10400366783142, - "p99": 72.86400347948074 + "p50": 97.31200337409973, + "p90": 343.00801157951355, + "p95": 380.47999143600464, + "p99": 403.55199575424194 }, "combine": { - "p50": 78.3040001988411, - "p90": 78.72000336647034, - "p95": 78.97599786520004, - "p99": 82.14399963617325 + "p50": 86.2400010228157, + "p90": 327.1999955177307, + "p95": 362.36798763275146, + "p99": 378.52799892425537 }, "roundtrip": { - "p50": 119.84000355005264, - "p90": 122.75200337171555, - "p95": 125.37600100040436, - "p99": 154.40000593662262 + "p50": 161.69600188732147, + "p90": 429.56799268722534, + "p95": 454.27200198173523, + "p99": 468.9280092716217 }, "isolatedSum": { - "p50": 138.08000087738037, - "p90": 140.76800271868706, - "p95": 142.08000153303146, - "p99": 155.008003115654 + "p50": 183.55200439691544, + "p90": 670.2080070972443, + "p95": 742.8479790687561, + "p99": 782.0799946784973 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5095,35 +4929,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 72.67200201749802, - "p90": 76.4480009675026, - "p95": 77.27999985218048, - "p99": 82.75199681520462 + "p50": 105.92000186443329, + "p90": 365.7599985599518, + "p95": 393.5999870300293, + "p99": 440.73599576950073 }, "combine": { - "p50": 78.40000092983246, - "p90": 78.87999713420868, - "p95": 79.26400005817413, - "p99": 82.65600353479385 + "p50": 89.82399851083755, + "p90": 329.02398705482483, + "p95": 364.9600148200989, + "p99": 384.95999574661255 }, "roundtrip": { - "p50": 121.40800058841705, - "p90": 127.03999876976013, - "p95": 128.7039965391159, - "p99": 145.1839953660965 + "p50": 163.26400637626648, + "p90": 414.0160083770752, + "p95": 437.18400597572327, + "p99": 466.97598695755005 }, "isolatedSum": { - "p50": 151.07200294733047, - "p90": 155.32799810171127, - "p95": 156.54399991035461, - "p99": 165.40800034999847 + "p50": 195.74400037527084, + "p90": 694.7839856147766, + "p95": 758.5600018501282, + "p99": 825.6959915161133 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 6, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5132,35 +4966,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.94399631023407, - "p90": 72.76800274848938, - "p95": 74.17599856853485, - "p99": 80.99199831485748 + "p50": 101.43999755382538, + "p90": 370.0160086154938, + "p95": 390.01598954200745, + "p99": 421.5039908885956 }, "combine": { - "p50": 80.06399869918823, - "p90": 81.66400343179703, - "p95": 89.24800157546997, - "p99": 106.36799782514572 + "p50": 93.98400038480759, + "p90": 326.2079954147339, + "p95": 340.1600122451782, + "p99": 388.0639970302582 }, "roundtrip": { - "p50": 134.33599472045898, - "p90": 141.4719969034195, - "p95": 143.36000382900238, - "p99": 156.25600516796112 + "p50": 165.47200083732605, + "p90": 417.6959991455078, + "p95": 434.143990278244, + "p99": 472.927987575531 }, "isolatedSum": { - "p50": 151.0079950094223, - "p90": 154.4320061802864, - "p95": 163.42400014400482, - "p99": 187.3599961400032 + "p50": 195.42399793863297, + "p90": 696.2240040302277, + "p95": 730.1760017871857, + "p99": 809.5679879188538 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 5, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5169,35 +5003,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 85.53600311279297, - "p90": 89.05600011348724, - "p95": 92.25600212812424, - "p99": 105.76000064611435 + "p50": 113.79200220108032, + "p90": 363.20000886917114, + "p95": 397.92001247406006, + "p99": 420.54399847984314 }, "combine": { - "p50": 94.08000111579895, - "p90": 103.04000228643417, - "p95": 103.29599678516388, - "p99": 114.30399864912033 + "p50": 107.58399963378906, + "p90": 342.6879942417145, + "p95": 355.1360070705414, + "p99": 399.07199144363403 }, "roundtrip": { - "p50": 169.11999881267548, - "p90": 172.63999581336975, - "p95": 174.75199699401855, - "p99": 194.17600333690643 + "p50": 189.91999328136444, + "p90": 450.5600035190582, + "p95": 483.96798968315125, + "p99": 507.61598348617554 }, "isolatedSum": { - "p50": 179.61600422859192, - "p90": 192.09600239992142, - "p95": 195.55199891328812, - "p99": 220.06399929523468 + "p50": 221.37600183486938, + "p90": 705.8880031108856, + "p95": 753.0560195446014, + "p99": 819.6159899234772 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 4, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5206,35 +5040,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 107.13600367307663, - "p90": 125.88800489902496, - "p95": 139.71200585365295, - "p99": 175.55199563503265 + "p50": 127.42400169372559, + "p90": 383.93598794937134, + "p95": 410.3679955005646, + "p99": 428.9279878139496 }, "combine": { - "p50": 131.3920021057129, - "p90": 139.90400731563568, - "p95": 140.09599387645721, - "p99": 151.61600708961487 + "p50": 127.9039978981018, + "p90": 394.23999190330505, + "p95": 412.992000579834, + "p99": 428.5759925842285 }, "roundtrip": { - "p50": 231.7119985818863, - "p90": 236.28799617290497, - "p95": 238.75199258327484, - "p99": 258.2719922065735 + "p50": 228.86399924755096, + "p90": 287.80800104141235, + "p95": 501.8240213394165, + "p99": 531.4559936523438 }, "isolatedSum": { - "p50": 238.52800577878952, - "p90": 265.79201221466064, - "p95": 279.80799973011017, - "p99": 327.1680027246475 + "p50": 255.3279995918274, + "p90": 778.1759798526764, + "p95": 823.3599960803986, + "p99": 857.5039803981781 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5242,16 +5076,16 @@ ] }, { - "id": "cx-c5ecae32", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", - "colorKey": "b300_a314501b", - "comparisonKey": "a145623f8abcc709", + "id": "cx-d3049a56", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb200_0cd6b029", + "comparisonKey": "c40370edc4d42626", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:12.406102+00:00", + "generatedAt": "2026-06-29T14:02:09.823089+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -5259,30 +5093,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "label": "GB200 EP8 · deepep · bf16 · zipf-mild", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -5290,95 +5125,206 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d02a66236b524b8", - "workloadId": "set:4:2eebbed158fe1320", - "workloadSource": "canonical-serialized", + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271879618", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271879618", - "createdAt": "2026-06-26T23:58:12.406102+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 62.97600269317627, - "p90": 65.21599739789963, - "p95": 66.01600348949432, - "p99": 75.74400305747986 + "p50": 88.28800171613693, + "p90": 101.85600072145462, + "p95": 104.60799932479858, + "p99": 112.73600161075592 }, "combine": { - "p50": 54.336000233888626, - "p90": 55.26399984955788, - "p95": 56.60799890756607, - "p99": 65.5359998345375 + "p50": 79.6160027384758, + "p90": 83.5840031504631, + "p95": 88.83199840784073, + "p99": 95.8079993724823 }, "roundtrip": { - "p50": 94.94400024414062, - "p90": 98.27200323343277, - "p95": 100.63999891281128, - "p99": 111.93600296974182 + "p50": 146.464005112648, + "p90": 158.65600109100342, + "p95": 162.59199380874634, + "p99": 169.15200650691986 }, "isolatedSum": { - "p50": 117.3120029270649, - "p90": 120.4799972474575, - "p95": 122.6240023970604, - "p99": 141.28000289201736 + "p50": 167.90400445461273, + "p90": 185.44000387191772, + "p95": 193.4399977326393, + "p99": 208.54400098323822 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 88.128000497818, + "p90": 100.80000013113022, + "p95": 105.53599894046783, + "p99": 113.79200220108032 + }, + "combine": { + "p50": 80.4160013794899, + "p90": 84.60800349712372, + "p95": 87.39200234413147, + "p99": 95.551997423172 + }, + "roundtrip": { + "p50": 147.2959965467453, + "p90": 159.55199301242828, + "p95": 163.10399770736694, + "p99": 171.29600048065186 + }, + "isolatedSum": { + "p50": 168.5440018773079, + "p90": 185.40800362825394, + "p95": 192.9280012845993, + "p99": 209.34399962425232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 90.7519981265068, + "p90": 103.39199751615524, + "p95": 109.21599715948105, + "p99": 118.30399930477142 + }, + "combine": { + "p50": 82.17599987983704, + "p90": 88.44800293445587, + "p95": 92.51199662685394, + "p99": 98.39999675750732 + }, + "roundtrip": { + "p50": 150.751993060112, + "p90": 161.43999993801117, + "p95": 165.72800278663635, + "p99": 173.69599640369415 + }, + "isolatedSum": { + "p50": 172.92799800634384, + "p90": 191.84000045061111, + "p95": 201.727993786335, + "p99": 216.70399606227875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, { "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.39200147986412, - "p90": 61.63199990987778, - "p95": 62.65600025653839, - "p99": 71.68000191450119 + "p50": 92.44800359010696, + "p90": 105.02400249242783, + "p95": 107.71200060844421, + "p99": 118.17599833011627 }, "combine": { - "p50": 56.73599988222122, - "p90": 65.34399837255478, - "p95": 65.95200300216675, - "p99": 85.4400023818016 + "p50": 84.3840017914772, + "p90": 131.6480040550232, + "p95": 169.3120002746582, + "p99": 203.5519927740097 }, "roundtrip": { - "p50": 108.57599973678589, - "p90": 113.56800049543381, - "p95": 114.84800279140472, - "p99": 120.12799829244614 + "p50": 155.2640050649643, + "p90": 188.1600022315979, + "p95": 260.44800877571106, + "p99": 295.7119941711426 }, "isolatedSum": { - "p50": 116.12800136208534, - "p90": 126.97599828243256, - "p95": 128.60800325870514, - "p99": 157.1200042963028 + "p50": 176.83200538158417, + "p90": 236.67200654745102, + "p95": 277.0240008831024, + "p99": 321.727991104126 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 93.82399916648865, + "p90": 106.1440035700798, + "p95": 109.37599837779999, + "p99": 114.9120032787323 + }, + "combine": { + "p50": 84.09599959850311, + "p90": 90.52799642086029, + "p95": 92.28800237178802, + "p99": 96.41599655151367 + }, + "roundtrip": { + "p50": 155.13600409030914, + "p90": 167.61599481105804, + "p95": 170.59199512004852, + "p99": 178.20799350738525 + }, + "isolatedSum": { + "p50": 177.91999876499176, + "p90": 196.6719999909401, + "p95": 201.664000749588, + "p99": 211.32799983024597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -5388,34 +5334,71 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.72000205516815, - "p90": 76.57600194215775, - "p95": 77.88799703121185, - "p99": 85.31200140714645 + "p50": 97.63199836015701, + "p90": 109.24799740314484, + "p95": 112.2559979557991, + "p99": 118.49600076675415 }, "combine": { - "p50": 66.6240006685257, - "p90": 67.32799857854843, - "p95": 67.61600077152252, - "p99": 78.84799689054489 + "p50": 91.67999774217606, + "p90": 96.22400254011154, + "p95": 98.4639972448349, + "p99": 105.95200210809708 }, "roundtrip": { - "p50": 120.51200121641159, - "p90": 123.99999797344208, - "p95": 124.64000284671783, - "p99": 130.0159990787506 + "p50": 161.8880033493042, + "p90": 172.44799435138702, + "p95": 176.35199427604675, + "p99": 183.32800269126892 }, "isolatedSum": { - "p50": 137.34400272369385, - "p90": 143.90400052070618, - "p95": 145.50399780273438, - "p99": 164.15999829769135 + "p50": 189.31199610233307, + "p90": 205.47199994325638, + "p95": 210.719995200634, + "p99": 224.44800287485123 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 1, - "recvTokensMax": 32, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.59200018644333, + "p90": 120.35199999809265, + "p95": 123.29600006341934, + "p99": 131.84000551700592 + }, + "combine": { + "p50": 106.08000308275223, + "p90": 110.3999987244606, + "p95": 115.00799655914307, + "p99": 120.7360029220581 + }, + "roundtrip": { + "p50": 190.11199474334717, + "p90": 198.4959989786148, + "p95": 202.2079974412918, + "p99": 209.82399582862854 + }, + "isolatedSum": { + "p50": 216.67200326919556, + "p90": 230.75199872255325, + "p95": 238.3039966225624, + "p99": 252.57600843906403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -5425,35 +5408,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 70.11199742555618, - "p90": 71.87200337648392, - "p95": 73.79200309515, - "p99": 79.64800298213959 + "p50": 128.25599312782288, + "p90": 137.66400516033173, + "p95": 142.20799505710602, + "p99": 147.87200093269348 }, "combine": { - "p50": 68.35199892520905, - "p90": 70.04799693822861, - "p95": 76.92799717187881, - "p99": 79.1039988398552 + "p50": 135.51999628543854, + "p90": 144.0960019826889, + "p95": 145.75999975204468, + "p99": 152.6080071926117 }, "roundtrip": { - "p50": 122.23999947309494, - "p90": 129.5360028743744, - "p95": 131.32800161838531, - "p99": 142.87999272346497 + "p50": 241.02400243282318, + "p90": 249.91999566555023, + "p95": 253.02401185035706, + "p99": 258.7200105190277 }, "isolatedSum": { - "p50": 138.46399635076523, - "p90": 141.92000031471252, - "p95": 150.7200002670288, - "p99": 158.75200182199478 + "p50": 263.7759894132614, + "p90": 281.76000714302063, + "p95": 287.9679948091507, + "p99": 300.4800081253052 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 7, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5461,16 +5444,16 @@ ] }, { - "id": "cx-db4e17eb", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", - "colorKey": "b300_592e9a16", - "comparisonKey": "22200746e5037727", + "id": "cx-acc388f6", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb200_4a0087e5", + "comparisonKey": "c58460e1e2fc4307", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:06.153274+00:00", + "generatedAt": "2026-06-29T14:02:31.096616+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -5478,30 +5461,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced+eplb", + "label": "GB200 EP8 · deepep · bf16 · zipf-mild+eplb", "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -5509,59 +5493,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f0e66a15078595b", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285612438", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285612438", - "createdAt": "2026-06-27T09:48:06.153274+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.760000228881836, - "p90": 60.864001512527466, - "p95": 63.13599646091461, - "p99": 69.08799707889557 + "p50": 95.32800316810608, + "p90": 107.87200182676315, + "p95": 112.92800307273865, + "p99": 125.82400441169739 }, "combine": { - "p50": 55.52000179886818, - "p90": 57.37600103020668, - "p95": 64.44799900054932, - "p99": 66.17599725723267 + "p50": 79.71200346946716, + "p90": 84.63999629020691, + "p95": 89.6959975361824, + "p99": 95.36000341176987 }, "roundtrip": { - "p50": 95.29600292444229, - "p90": 98.14400225877762, - "p95": 99.64799880981445, - "p99": 105.05600273609161 + "p50": 152.25599706172943, + "p90": 163.29599916934967, + "p95": 167.39200055599213, + "p99": 177.69600450992584 }, "isolatedSum": { - "p50": 113.28000202775002, - "p90": 118.24000254273415, - "p95": 127.58399546146393, - "p99": 135.26399433612823 + "p50": 175.04000663757324, + "p90": 192.51199811697006, + "p95": 202.62400060892105, + "p99": 221.18400782346725 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 2, - "recvTokensMax": 3, - "stragglerRank": 4, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5570,35 +5554,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.8560009598732, - "p90": 59.84000116586685, - "p95": 61.72800064086914, - "p99": 72.41600006818771 + "p50": 94.30400282144547, + "p90": 106.59199953079224, + "p95": 111.42399907112122, + "p99": 122.3360002040863 }, "combine": { - "p50": 56.76800012588501, - "p90": 65.63200056552887, - "p95": 66.17599725723267, - "p99": 66.94400310516357 + "p50": 81.53600245714188, + "p90": 87.36000210046768, + "p95": 90.84799885749817, + "p99": 96.16000205278397 }, "roundtrip": { - "p50": 105.34399747848511, - "p90": 112.15999722480774, - "p95": 113.40799927711487, - "p99": 127.26399302482605 + "p50": 154.01600301265717, + "p90": 165.82399606704712, + "p95": 168.99199783802032, + "p99": 178.43200266361237 }, "isolatedSum": { - "p50": 114.62400108575821, - "p90": 125.47200173139572, - "p95": 127.9039978981018, - "p99": 139.3600031733513 + "p50": 175.84000527858734, + "p90": 193.95200163125992, + "p95": 202.27199792861938, + "p99": 218.49600225687027 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 2, - "recvTokensMax": 6, - "stragglerRank": 4, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5607,35 +5591,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.82400071620941, - "p90": 59.776000678539276, - "p95": 61.535999178886414, - "p99": 68.12799721956253 + "p50": 95.96800059080124, + "p90": 107.64800012111664, + "p95": 114.1119971871376, + "p99": 131.55199587345123 }, "combine": { - "p50": 65.60000032186508, - "p90": 66.46399945020676, - "p95": 66.97600334882736, - "p99": 77.504001557827 + "p50": 83.26400071382523, + "p90": 90.43200314044952, + "p95": 93.24800223112106, + "p99": 101.18400305509567 }, "roundtrip": { - "p50": 111.29599809646606, - "p90": 114.14399743080139, - "p95": 114.84800279140472, - "p99": 123.45600128173828 + "p50": 156.70399367809296, + "p90": 167.52000153064728, + "p95": 171.83999717235565, + "p99": 181.05599284172058 }, "isolatedSum": { - "p50": 123.4240010380745, - "p90": 126.24000012874603, - "p95": 128.51200252771378, - "p99": 145.63199877738953 + "p50": 179.23200130462646, + "p90": 198.08000326156616, + "p95": 207.35999941825867, + "p99": 232.7359989285469 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 2, - "recvTokensMax": 12, - "stragglerRank": 4, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5644,35 +5628,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.23200026154518, - "p90": 60.864001512527466, - "p95": 62.55999952554703, - "p99": 69.18399780988693 + "p50": 97.79199957847595, + "p90": 109.63200032711029, + "p95": 114.75200206041336, + "p99": 130.14400005340576 }, "combine": { - "p50": 65.88800251483917, - "p90": 66.59200042486191, - "p95": 66.94400310516357, - "p99": 69.5360004901886 + "p50": 85.1840004324913, + "p90": 94.04800087213516, + "p95": 97.75999933481216, + "p99": 148.60799908638 }, "roundtrip": { - "p50": 107.07200318574905, - "p90": 109.50399935245514, - "p95": 111.29599809646606, - "p99": 122.52800166606903 + "p50": 159.743994474411, + "p90": 172.06400632858276, + "p95": 179.77599799633026, + "p99": 339.9040102958679 }, "isolatedSum": { - "p50": 125.12000277638435, - "p90": 127.45600193738937, - "p95": 129.5040026307106, - "p99": 138.71999830007553 + "p50": 182.97600001096725, + "p90": 203.68000119924545, + "p95": 212.51200139522552, + "p99": 278.75199913978577 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 2, - "recvTokensMax": 24, - "stragglerRank": 4, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5681,35 +5665,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 60.575999319553375, - "p90": 63.64800035953522, - "p95": 66.20799750089645, - "p99": 75.58400183916092 + "p50": 98.59199821949005, + "p90": 109.8560020327568, + "p95": 114.14399743080139, + "p99": 128.86400520801544 }, "combine": { - "p50": 66.17599725723267, - "p90": 66.97600334882736, - "p95": 67.19999760389328, - "p99": 70.14399766921997 + "p50": 86.7839977145195, + "p90": 93.59999746084213, + "p95": 95.67999839782715, + "p99": 103.16800326108932 }, "roundtrip": { - "p50": 108.09600353240967, - "p90": 110.20799726247787, - "p95": 112.2559979557991, - "p99": 118.94399672746658 + "p50": 162.1759980916977, + "p90": 173.43999445438385, + "p95": 176.7359972000122, + "p99": 182.36799538135529 }, "isolatedSum": { - "p50": 126.75199657678604, - "p90": 130.62400370836258, - "p95": 133.40799510478973, - "p99": 145.7279995083809 + "p50": 185.37599593400955, + "p90": 203.45599949359894, + "p95": 209.82399582862854, + "p99": 232.03200846910477 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 2, - "recvTokensMax": 48, - "stragglerRank": 4, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5718,35 +5702,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 60.67200005054474, - "p90": 62.752000987529755, - "p95": 64.03200328350067, - "p99": 73.95199686288834 + "p50": 101.98400169610977, + "p90": 111.90400272607803, + "p95": 115.35999923944473, + "p99": 121.08799815177917 }, "combine": { - "p50": 66.23999774456024, - "p90": 67.1359971165657, - "p95": 67.61600077152252, - "p99": 78.14399898052216 + "p50": 92.73599833250046, + "p90": 98.14400225877762, + "p95": 101.1200025677681, + "p99": 107.45599865913391 }, "roundtrip": { - "p50": 108.89600217342377, - "p90": 111.39199882745743, - "p95": 113.69600147008896, - "p99": 122.52800166606903 + "p50": 166.9120043516159, + "p90": 177.18400061130524, + "p95": 181.92000687122345, + "p99": 189.11999464035034 }, "isolatedSum": { - "p50": 126.91199779510498, - "p90": 129.88799810409546, - "p95": 131.6480040550232, - "p99": 152.0959958434105 + "p50": 194.72000002861023, + "p90": 210.04800498485565, + "p95": 216.48000180721283, + "p99": 228.5439968109131 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 2, - "recvTokensMax": 96, - "stragglerRank": 4, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5755,35 +5739,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 76.64000242948532, - "p90": 78.52800190448761, - "p95": 79.29600030183792, - "p99": 89.37600255012512 + "p50": 111.90400272607803, + "p90": 121.47200107574463, + "p95": 125.34399330615997, + "p99": 131.23199343681335 }, "combine": { - "p50": 68.00000369548798, - "p90": 76.80000364780426, - "p95": 77.47200131416321, - "p99": 79.39200103282928 + "p50": 107.80800133943558, + "p90": 116.06399714946747, + "p95": 119.10399794578552, + "p99": 127.23200023174286 }, "roundtrip": { - "p50": 124.25599992275238, - "p90": 128.9599984884262, - "p95": 129.7920048236847, - "p99": 141.59999787807465 + "p50": 192.9599940776825, + "p90": 203.99999618530273, + "p95": 210.207998752594, + "p99": 223.61600399017334 }, "isolatedSum": { - "p50": 144.6400061249733, - "p90": 155.32800555229187, - "p95": 156.76800161600113, - "p99": 168.7680035829544 + "p50": 219.7120040655136, + "p90": 237.5359982252121, + "p95": 244.4479912519455, + "p99": 258.4639936685562 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 2, - "recvTokensMax": 192, - "stragglerRank": 4, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5792,35 +5776,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 81.18399977684021, - "p90": 87.55200356245041, - "p95": 89.47200328111649, - "p99": 95.74399888515472 + "p50": 127.20000743865967, + "p90": 135.00800728797913, + "p95": 138.65600526332855, + "p99": 144.16000247001648 }, "combine": { - "p50": 81.98399841785431, - "p90": 90.7839983701706, - "p95": 91.0400003194809, - "p99": 102.78400033712387 + "p50": 125.69600343704224, + "p90": 133.02400708198547, + "p95": 137.63199746608734, + "p99": 143.5520052909851 }, "roundtrip": { - "p50": 146.08000218868256, - "p90": 148.28799664974213, - "p95": 150.81599354743958, - "p99": 159.743994474411 + "p50": 226.4000028371811, + "p90": 235.07200181484222, + "p95": 239.19999599456787, + "p99": 247.55200743675232 }, "isolatedSum": { - "p50": 163.16799819469452, - "p90": 178.336001932621, - "p95": 180.51200360059738, - "p99": 198.5279992222786 + "p50": 252.8960108757019, + "p90": 268.0320143699646, + "p95": 276.2880027294159, + "p99": 287.7120077610016 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 4, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -5828,16 +5812,16 @@ ] }, { - "id": "cx-72792847", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", - "colorKey": "b300_5b993222", - "comparisonKey": "10e590b8f933d382", + "id": "cx-04c531ee", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb200_ff33b726", + "comparisonKey": "61f8a26a723405f9", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:30.886921+00:00", + "generatedAt": "2026-06-29T14:03:53.703773+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -5845,30 +5829,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "label": "GB200 EP8 · deepep · bf16 · zipf-moderate", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -5876,389 +5861,170 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2ad5ef98d328fa1", - "workloadId": "set:4:286be993cd819ed9", - "workloadSource": "canonical-serialized", + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271900377", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271900377", - "createdAt": "2026-06-26T23:58:30.886921+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.96000158786774, - "p90": 59.10399928689003, - "p95": 62.272001057863235, - "p99": 71.68000191450119 + "p50": 89.34400230646133, + "p90": 101.9200012087822, + "p95": 105.59999942779541, + "p99": 113.63200098276138 }, "combine": { - "p50": 66.39999896287918, - "p90": 67.07199662923813, - "p95": 67.45599955320358, - "p99": 90.17600119113922 + "p50": 76.73600316047668, + "p90": 81.98399841785431, + "p95": 83.96799862384796, + "p99": 89.50400352478027 }, "roundtrip": { - "p50": 106.91200196743011, - "p90": 113.40799927711487, - "p95": 117.18399822711945, - "p99": 195.77600061893463 + "p50": 143.74400675296783, + "p90": 154.7199934720993, + "p95": 158.75199437141418, + "p99": 165.3439998626709 }, "isolatedSum": { - "p50": 123.36000055074692, - "p90": 126.17599591612816, - "p95": 129.72800061106682, - "p99": 161.8560031056404 + "p50": 166.08000546693802, + "p90": 183.9039996266365, + "p95": 189.56799805164337, + "p99": 203.13600450754166 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, "recvTokensMax": 8, - "stragglerRank": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 59.58399921655655, - "p90": 62.65600025653839, - "p95": 65.34399837255478, - "p99": 81.85599744319916 + "p50": 90.01599997282028, + "p90": 102.46399790048599, + "p95": 105.24799674749374, + "p99": 111.29599809646606 }, "combine": { - "p50": 68.00000369548798, - "p90": 77.11999863386154, - "p95": 77.79199630022049, - "p99": 79.9039974808693 + "p50": 78.14399898052216, + "p90": 83.0719992518425, + "p95": 86.11200004816055, + "p99": 95.04000097513199 }, "roundtrip": { - "p50": 122.36800044775009, - "p90": 125.791996717453, - "p95": 127.71199643611908, - "p99": 145.82400023937225 + "p50": 146.62399888038635, + "p90": 158.52800011634827, + "p95": 162.1759980916977, + "p99": 182.52800405025482 }, "isolatedSum": { - "p50": 127.58400291204453, - "p90": 139.77599889039993, - "p95": 143.13599467277527, - "p99": 161.75999492406845 + "p50": 168.15999895334244, + "p90": 185.5359971523285, + "p95": 191.3599967956543, + "p99": 206.33599907159805 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 69.63200122117996, - "p90": 75.32799988985062, - "p95": 77.27999985218048, - "p99": 98.08000177145004 + "p50": 91.58399701118469, + "p90": 104.16000336408615, + "p95": 108.89600217342377, + "p99": 117.76000261306763 }, "combine": { - "p50": 78.62400263547897, - "p90": 79.26400005817413, - "p95": 79.45600152015686, - "p99": 89.75999802350998 + "p50": 80.25600016117096, + "p90": 84.51200276613235, + "p95": 89.91999924182892, + "p99": 102.04800218343735 }, "roundtrip": { - "p50": 133.53599607944489, - "p90": 137.15200126171112, - "p95": 138.5280042886734, - "p99": 155.10399639606476 + "p50": 148.6400067806244, + "p90": 160.7999950647354, + "p95": 163.96799683570862, + "p99": 171.4559942483902 }, "isolatedSum": { - "p50": 148.25600385665894, - "p90": 154.59199994802475, - "p95": 156.73600137233734, - "p99": 187.83999979496002 + "p50": 171.83999717235565, + "p90": 188.6720061302185, + "p95": 198.81600141525269, + "p99": 219.80800479650497 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 7, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 103.39199751615524, - "p90": 104.96000200510025, - "p95": 106.62399977445602, - "p99": 110.81600189208984 + "p50": 92.47999638319016, + "p90": 105.31199723482132, + "p95": 109.11999642848969, + "p99": 117.44000017642975 }, "combine": { - "p50": 127.80800461769104, - "p90": 129.2160004377365, - "p95": 130.5920034646988, - "p99": 150.62400698661804 + "p50": 82.24000036716461, + "p90": 90.62399715185165, + "p95": 93.31200271844864, + "p99": 102.78400033712387 }, "roundtrip": { - "p50": 215.87200462818146, - "p90": 223.07200729846954, - "p95": 224.7679978609085, - "p99": 231.32799565792084 + "p50": 152.16000378131866, + "p90": 164.63999450206757, + "p95": 167.7439957857132, + "p99": 177.2480010986328 }, "isolatedSum": { - "p50": 231.20000213384628, - "p90": 234.17600244283676, - "p95": 237.21600323915482, - "p99": 261.4400088787079 + "p50": 174.71999675035477, + "p90": 195.93599438667297, + "p95": 202.43199914693832, + "p99": 220.22400051355362 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f390f28a", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", - "colorKey": "b300_5b993222", - "comparisonKey": "82de9b5581f31438", - "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:17.677386+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b6caf944f6bb621", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28285661360", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285661360", - "createdAt": "2026-06-27T09:50:17.677386+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 56.48000165820122, - "p90": 58.88000130653381, - "p95": 61.37600168585777, - "p99": 68.89600306749344 - }, - "combine": { - "p50": 66.17599725723267, - "p90": 66.78400188684464, - "p95": 67.32799857854843, - "p99": 69.95200365781784 - }, - "roundtrip": { - "p50": 105.56799918413162, - "p90": 112.19199746847153, - "p95": 112.70400136709213, - "p99": 120.7360029220581 - }, - "isolatedSum": { - "p50": 122.65599891543388, - "p90": 125.66400319337845, - "p95": 128.7040002644062, - "p99": 138.84800672531128 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 56.76800012588501, - "p90": 59.007998555898666, - "p95": 62.04799935221672, - "p99": 67.52000004053116 - }, - "combine": { - "p50": 66.3679987192154, - "p90": 67.64800101518631, - "p95": 68.2239979505539, - "p99": 77.66400277614594 - }, - "roundtrip": { - "p50": 106.27199709415436, - "p90": 120.60800194740295, - "p95": 129.56799566745758, - "p99": 144.99199390411377 - }, - "isolatedSum": { - "p50": 123.1359988451004, - "p90": 126.65599957108498, - "p95": 130.27199730277061, - "p99": 145.1840028166771 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, - "recvTokensMax": 16, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 57.53599852323532, - "p90": 59.4559982419014, - "p95": 61.69600039720535, - "p99": 68.44799965620041 - }, - "combine": { - "p50": 67.61600077152252, - "p90": 69.50400024652481, - "p95": 77.2159993648529, - "p99": 91.13600105047226 - }, - "roundtrip": { - "p50": 113.76000195741653, - "p90": 118.8800036907196, - "p95": 121.69600278139114, - "p99": 124.9919980764389 - }, - "isolatedSum": { - "p50": 125.15199929475784, - "p90": 128.9599984884262, - "p95": 138.91199976205826, - "p99": 159.58400070667267 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 32, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 58.04799869656563, - "p90": 60.54399907588959, - "p95": 63.61600011587143, - "p99": 71.16799801588058 - }, - "combine": { - "p50": 67.58400052785873, - "p90": 69.56800073385239, - "p95": 72.89600372314453, - "p99": 80.89599758386612 - }, - "roundtrip": { - "p50": 122.30399996042252, - "p90": 125.11999905109406, - "p95": 126.3359934091568, - "p99": 137.28000223636627 - }, - "isolatedSum": { - "p50": 125.63199922442436, - "p90": 130.11199980974197, - "p95": 136.51200383901596, - "p99": 152.0639955997467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6267,34 +6033,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 59.776000678539276, - "p90": 67.10399687290192, - "p95": 69.7920024394989, - "p99": 78.11199873685837 + "p50": 92.73599833250046, + "p90": 103.80800068378448, + "p95": 107.61599987745285, + "p99": 116.5120005607605 }, "combine": { - "p50": 68.44799965620041, - "p90": 78.14399898052216, - "p95": 78.46400141716003, - "p99": 79.64800298213959 + "p50": 82.49600231647491, + "p90": 91.16800129413605, + "p95": 94.27200257778168, + "p99": 103.4879982471466 }, "roundtrip": { - "p50": 119.55200135707855, - "p90": 124.38400089740753, - "p95": 125.56800246238708, - "p99": 129.18399274349213 + "p50": 152.54400670528412, + "p90": 163.7759953737259, + "p95": 167.00799763202667, + "p99": 175.07199943065643 }, "isolatedSum": { - "p50": 128.22400033473969, - "p90": 145.24799585342407, - "p95": 148.25600385665894, - "p99": 157.76000171899796 + "p50": 175.23200064897537, + "p90": 194.97600197792053, + "p95": 201.88800245523453, + "p99": 219.9999988079071 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, - "fanoutMean": 5.3125, - "recvTokensMax": 128, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -6304,34 +6070,34 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 68.9919963479042, - "p90": 74.11199808120728, - "p95": 75.99999755620956, - "p99": 90.87999910116196 + "p50": 97.82399982213974, + "p90": 108.31999778747559, + "p95": 111.93600296974182, + "p99": 120.57600170373917 }, "combine": { - "p50": 78.5600021481514, - "p90": 79.19999957084656, - "p95": 79.77599650621414, - "p99": 91.26400202512741 + "p50": 85.63199639320374, + "p90": 93.85599941015244, + "p95": 95.51999717950821, + "p99": 102.59199887514114 }, "roundtrip": { - "p50": 130.3360015153885, - "p90": 134.20799374580383, - "p95": 136.00000739097595, - "p99": 146.33600413799286 + "p50": 159.13599729537964, + "p90": 168.99199783802032, + "p95": 171.77599668502808, + "p99": 178.0479997396469 }, "isolatedSum": { - "p50": 147.5519984960556, - "p90": 153.31199765205383, - "p95": 155.7759940624237, - "p99": 182.14400112628937 + "p50": 183.45599621534348, + "p90": 202.17599719762802, + "p95": 207.45600014925003, + "p99": 223.1680005788803 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -6341,34 +6107,34 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 83.03999900817871, - "p90": 88.60799670219421, - "p95": 89.56799656152725, - "p99": 95.16800194978714 + "p50": 106.36799782514572, + "p90": 115.87200313806534, + "p95": 119.80800330638885, + "p99": 127.55200266838074 }, "combine": { - "p50": 91.67999774217606, - "p90": 93.9520001411438, - "p95": 94.55999732017517, - "p99": 102.46399790048599 + "p50": 103.45599800348282, + "p90": 108.31999778747559, + "p95": 109.8560020327568, + "p99": 119.29599940776825 }, "roundtrip": { - "p50": 159.19999778270721, - "p90": 164.000004529953, - "p95": 166.24000668525696, - "p99": 175.20000040531158 + "p50": 185.08799374103546, + "p90": 194.62400674819946, + "p95": 198.5280066728592, + "p99": 210.33599972724915 }, "isolatedSum": { - "p50": 174.71999675035477, - "p90": 182.559996843338, - "p95": 184.12799388170242, - "p99": 197.63199985027313 + "p50": 209.82399582862854, + "p90": 224.19200092554092, + "p95": 229.66400533914566, + "p99": 246.848002076149 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38621184, - "combineLogicalBytes": 38621184, - "fanoutMean": 5.26171875, - "recvTokensMax": 512, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -6378,34 +6144,34 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 105.3759977221489, - "p90": 106.6880002617836, - "p95": 108.2879975438118, - "p99": 117.76000261306763 + "p50": 127.51999497413635, + "p90": 135.42400300502777, + "p95": 139.615997672081, + "p99": 144.80000734329224 }, "combine": { - "p50": 127.9039978981018, - "p90": 131.1360001564026, - "p95": 138.20800185203552, - "p99": 144.03200149536133 + "p50": 134.0479999780655, + "p90": 142.97600090503693, + "p95": 145.05599439144135, + "p99": 154.91199493408203 }, "roundtrip": { - "p50": 220.47999501228333, - "p90": 224.41600263118744, - "p95": 225.69599747657776, - "p99": 234.65600609779358 + "p50": 236.2239956855774, + "p90": 244.32000517845154, + "p95": 247.45599925518036, + "p99": 256.8959891796112 }, "isolatedSum": { - "p50": 233.2799956202507, - "p90": 237.8240004181862, - "p95": 246.49599939584732, - "p99": 261.79200410842896 + "p50": 261.56799495220184, + "p90": 278.4000039100647, + "p95": 284.67199206352234, + "p99": 299.71200227737427 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -6414,16 +6180,16 @@ ] }, { - "id": "cx-6a4bc237", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", - "colorKey": "b300_39a5906c", - "comparisonKey": "f7e177d587167ca7", + "id": "cx-a12c4b2b", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb200_acbc8de8", + "comparisonKey": "a122841e63a6f52b", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:24.903917+00:00", + "generatedAt": "2026-06-29T14:04:17.680714+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -6431,30 +6197,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb", + "label": "GB200 EP8 · deepep · bf16 · zipf-moderate+eplb", "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -6462,59 +6229,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "e41f5099a9733ac", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.830078125, - "eplbImbalanceAfter": 1.0007595486111112, - "backendVersion": "1.2.1", + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285664068", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285664068", - "createdAt": "2026-06-27T09:50:24.903917+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 55.64799904823303, - "p90": 57.37600103020668, - "p95": 59.647999703884125, - "p99": 68.12799721956253 + "p50": 90.65599739551544, + "p90": 102.24000364542007, + "p95": 106.30399733781815, + "p99": 116.19199812412262 }, "combine": { - "p50": 65.60000032186508, - "p90": 66.20799750089645, - "p95": 66.68800115585327, - "p99": 77.27999985218048 + "p50": 79.0719985961914, + "p90": 83.55200290679932, + "p95": 85.4720026254654, + "p99": 92.86399930715561 }, "roundtrip": { - "p50": 104.12800312042236, - "p90": 109.92000252008438, - "p95": 111.35999858379364, - "p99": 116.35199934244156 + "p50": 146.27200365066528, + "p90": 157.47199952602386, + "p95": 161.21600568294525, + "p99": 169.3439930677414 }, "isolatedSum": { - "p50": 121.24799937009811, - "p90": 123.58399853110313, - "p95": 126.3360008597374, - "p99": 145.407997071743 + "p50": 169.72799599170685, + "p90": 185.7920065522194, + "p95": 191.77599996328354, + "p99": 209.05599743127823 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, "recvTokensMax": 7, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6523,35 +6290,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 55.80800026655197, - "p90": 58.111999183893204, - "p95": 60.19200012087822, - "p99": 80.60800284147263 + "p50": 90.20800143480301, + "p90": 102.04800218343735, + "p95": 107.04000294208527, + "p99": 117.8240031003952 }, "combine": { - "p50": 65.92000275850296, - "p90": 67.35999882221222, - "p95": 68.1919977068901, - "p99": 78.27199995517731 + "p50": 81.05599880218506, + "p90": 88.60799670219421, + "p95": 91.51999652385712, + "p99": 95.0080007314682 }, "roundtrip": { - "p50": 104.80000078678131, - "p90": 107.16799646615982, - "p95": 109.56799983978271, - "p99": 119.6800023317337 + "p50": 150.84800124168396, + "p90": 161.05599701404572, + "p95": 164.51199352741241, + "p99": 170.52799463272095 }, "isolatedSum": { - "p50": 121.72800302505493, - "p90": 125.47199800610542, - "p95": 128.38399782776833, - "p99": 158.88000279664993 + "p50": 171.26400023698807, + "p90": 190.65599888563156, + "p95": 198.55999946594238, + "p99": 212.8320038318634 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 7, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6560,35 +6327,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 56.57599866390228, - "p90": 59.776000678539276, - "p95": 64.7360011935234, - "p99": 73.18399846553802 + "p50": 91.48799628019333, + "p90": 102.11200267076492, + "p95": 106.59199953079224, + "p99": 128.1599998474121 }, "combine": { - "p50": 66.3679987192154, - "p90": 67.77600198984146, - "p95": 68.4799998998642, - "p99": 82.33600109815598 + "p50": 81.40800148248672, + "p90": 88.83199840784073, + "p95": 91.51999652385712, + "p99": 96.00000083446503 }, "roundtrip": { - "p50": 111.84000223875046, - "p90": 116.67200177907944, - "p95": 120.51200121641159, - "p99": 148.15999567508698 + "p50": 150.78400075435638, + "p90": 161.53599321842194, + "p95": 164.8319959640503, + "p99": 170.04799842834473 }, "isolatedSum": { - "p50": 122.94399738311768, - "p90": 127.55200266838074, - "p95": 133.2160010933876, - "p99": 155.519999563694 + "p50": 172.89599776268005, + "p90": 190.94400107860565, + "p95": 198.11199605464935, + "p99": 224.16000068187714 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 23, - "stragglerRank": 7, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6597,35 +6364,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 57.72799998521805, - "p90": 59.808000922203064, - "p95": 61.664000153541565, - "p99": 71.3919997215271 + "p50": 93.82399916648865, + "p90": 103.45599800348282, + "p95": 107.39199817180634, + "p99": 113.76000195741653 }, "combine": { - "p50": 67.10399687290192, - "p90": 69.11999732255936, - "p95": 76.31999999284744, - "p99": 80.9599980711937 + "p50": 83.13599973917007, + "p90": 90.04800021648407, + "p95": 92.25600212812424, + "p99": 102.9760017991066 }, "roundtrip": { - "p50": 121.79200351238251, - "p90": 124.4800016283989, - "p95": 125.2480000257492, - "p99": 135.77599823474884 + "p50": 155.64799308776855, + "p90": 165.56799411773682, + "p95": 169.37600076198578, + "p99": 175.74399709701538 }, "isolatedSum": { - "p50": 124.83199685811996, - "p90": 128.92799824476242, - "p95": 137.984000146389, - "p99": 152.3519977927208 + "p50": 176.95999890565872, + "p90": 193.5039982199669, + "p95": 199.64800029993057, + "p99": 216.73600375652313 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4730880, - "combineLogicalBytes": 4730880, - "fanoutMean": 5.15625, - "recvTokensMax": 44, - "stragglerRank": 7, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6634,35 +6401,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 65.05600363016129, - "p90": 69.95200365781784, - "p95": 73.56800138950348, - "p99": 77.11999863386154 + "p50": 94.71999853849411, + "p90": 103.45599800348282, + "p95": 106.81600123643875, + "p99": 112.96000331640244 }, "combine": { - "p50": 67.48799979686737, - "p90": 77.63200253248215, - "p95": 77.85599678754807, - "p99": 78.49600166082382 + "p50": 84.51200276613235, + "p90": 91.51999652385712, + "p95": 93.24800223112106, + "p99": 98.65599870681763 }, "roundtrip": { - "p50": 118.9119964838028, - "p90": 122.04799801111221, - "p95": 123.99999797344208, - "p99": 128.86400520801544 + "p50": 156.41599893569946, + "p90": 166.30400717258453, + "p95": 169.18399930000305, + "p99": 177.40799486637115 }, "isolatedSum": { - "p50": 132.54400342702866, - "p90": 147.5840061903, - "p95": 151.42399817705154, - "p99": 155.61600029468536 + "p50": 179.23200130462646, + "p90": 194.97599452733994, + "p95": 200.06400346755981, + "p99": 211.61600202322006 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9691136, - "combineLogicalBytes": 9691136, - "fanoutMean": 5.28125, - "recvTokensMax": 88, - "stragglerRank": 0, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6671,35 +6438,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 68.00000369548798, - "p90": 71.03999704122543, - "p95": 73.72800260782242, - "p99": 91.39200299978256 + "p50": 98.08000177145004, + "p90": 107.29599744081497, + "p95": 110.30399799346924, + "p99": 118.6240017414093 }, "combine": { - "p50": 77.95199751853943, - "p90": 78.68800312280655, - "p95": 79.3600007891655, - "p99": 89.63199704885483 + "p50": 92.22400188446045, + "p90": 96.12800180912018, + "p95": 99.74399954080582, + "p99": 105.15200346708298 }, "roundtrip": { - "p50": 128.83199751377106, - "p90": 133.66399705410004, - "p95": 135.0719928741455, - "p99": 143.26399564743042 + "p50": 164.12800550460815, + "p90": 173.43999445438385, + "p95": 176.60799622535706, + "p99": 184.64000523090363 }, "isolatedSum": { - "p50": 145.9520012140274, - "p90": 149.72800016403198, - "p95": 153.08800339698792, - "p99": 181.0240000486374 + "p50": 190.3040036559105, + "p90": 203.42399924993515, + "p95": 210.04799753427505, + "p99": 223.77600520849228 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19568640, - "combineLogicalBytes": 19568640, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 7, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6708,35 +6475,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 81.216000020504, - "p90": 87.93599903583527, - "p95": 89.15200084447861, - "p99": 95.0080007314682 + "p50": 109.63200032711029, + "p90": 117.60000139474869, + "p95": 120.38400024175644, + "p99": 127.6479959487915 }, "combine": { - "p50": 91.48799628019333, - "p90": 93.88799965381622, - "p95": 100.96000134944916, - "p99": 101.95200145244598 + "p50": 106.04800283908844, + "p90": 112.06399649381638, + "p95": 115.52000045776367, + "p99": 118.97599697113037 }, "roundtrip": { - "p50": 156.25600516796112, - "p90": 160.25599837303162, - "p95": 161.98399662971497, - "p99": 176.92799866199493 + "p50": 188.51199746131897, + "p90": 196.73599302768707, + "p95": 199.77599382400513, + "p99": 204.70400154590607 }, "isolatedSum": { - "p50": 172.70399630069733, - "p90": 181.8239986896515, - "p95": 190.11200219392776, - "p99": 196.96000218391418 + "p50": 215.68000316619873, + "p90": 229.66399788856506, + "p95": 235.9040006995201, + "p99": 246.62399291992188 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38750208, - "combineLogicalBytes": 38750208, - "fanoutMean": 5.279296875, - "recvTokensMax": 348, - "stragglerRank": 7, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6745,35 +6512,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 92.79999881982803, - "p90": 94.91200000047684, - "p95": 96.6079980134964, - "p99": 106.23999685049057 + "p50": 124.35200065374374, + "p90": 131.58400356769562, + "p95": 135.45599579811096, + "p99": 141.66399836540222 }, "combine": { - "p50": 114.78400230407715, - "p90": 116.41599982976913, - "p95": 117.66400188207626, - "p99": 128.60800325870514 + "p50": 126.91199779510498, + "p90": 132.192000746727, + "p95": 135.68000495433807, + "p99": 141.4400041103363 }, "roundtrip": { - "p50": 190.8479928970337, - "p90": 196.73599302768707, - "p95": 197.82400131225586, - "p99": 204.51200008392334 + "p50": 224.60800409317017, + "p90": 232.67200589179993, + "p95": 236.89599335193634, + "p99": 241.98399484157562 }, "isolatedSum": { - "p50": 207.58400112390518, - "p90": 211.32799983024597, - "p95": 214.27199989557266, - "p99": 234.8480001091957 + "p50": 251.26399844884872, + "p90": 263.7760043144226, + "p95": 271.13600075244904, + "p99": 283.1040024757385 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77342720, - "combineLogicalBytes": 77342720, - "fanoutMean": 5.2685546875, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, "recvTokensMax": 687, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6781,16 +6548,16 @@ ] }, { - "id": "cx-f11d8dc8", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", - "colorKey": "b300_e3d449ce", - "comparisonKey": "6570d3a11ae9f14f", + "id": "cx-a1958791", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb200_be611b2a", + "comparisonKey": "cb842765866b5c94", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:47:37.037332+00:00", + "generatedAt": "2026-06-29T14:01:17.988369+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -6798,30 +6565,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · uniform+eplb", + "label": "GB200 EP8 · deepep · bf16 · zipf+eplb", "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -6829,59 +6597,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "73351bbcd4d02de", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.078125, - "eplbImbalanceAfter": 1.00048828125, - "backendVersion": "1.2.1", + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285602756", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285602756", - "createdAt": "2026-06-27T09:47:37.037332+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 58.400001376867294, - "p90": 59.967998415231705, - "p95": 62.431998550891876, - "p99": 78.65600287914276 + "p50": 88.0960002541542, + "p90": 98.24000298976898, + "p95": 103.04000228643417, + "p99": 112.35199868679047 }, "combine": { - "p50": 66.04799628257751, - "p90": 66.59200042486191, - "p95": 67.71200150251389, - "p99": 77.56800204515457 + "p50": 79.3600007891655, + "p90": 83.39200168848038, + "p95": 86.75199747085571, + "p99": 93.6959981918335 }, "roundtrip": { - "p50": 107.19999670982361, - "p90": 113.18399757146835, - "p95": 114.62400108575821, - "p99": 137.5039964914322 + "p50": 144.51199769973755, + "p90": 154.11199629306793, + "p95": 156.51200711727142, + "p99": 160.8320027589798 }, "isolatedSum": { - "p50": 124.44799765944481, - "p90": 126.55999884009361, - "p95": 130.14400005340576, - "p99": 156.22400492429733 + "p50": 167.4560010433197, + "p90": 181.63200467824936, + "p95": 189.7919997572899, + "p99": 206.04799687862396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 516096, - "combineLogicalBytes": 516096, - "fanoutMean": 4.5, - "recvTokensMax": 6, - "stragglerRank": 7, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6890,35 +6658,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 58.75200033187866, - "p90": 60.03199890255928, - "p95": 60.67200005054474, - "p99": 65.60000032186508 + "p50": 87.5839963555336, + "p90": 98.49599748849869, + "p95": 102.14400291442871, + "p99": 110.72000116109848 }, "combine": { - "p50": 66.20799750089645, - "p90": 67.71200150251389, - "p95": 68.60800087451935, - "p99": 88.86399865150452 + "p50": 80.28800040483475, + "p90": 84.57600325345993, + "p95": 88.44800293445587, + "p99": 96.00000083446503 }, "roundtrip": { - "p50": 108.0000028014183, - "p90": 112.2559979557991, - "p95": 115.167997777462, - "p99": 124.03199821710587 + "p50": 146.04799449443817, + "p90": 157.27999806404114, + "p95": 159.87199544906616, + "p99": 166.59200191497803 }, "isolatedSum": { - "p50": 124.95999783277512, - "p90": 127.74400040507317, - "p95": 129.2800009250641, - "p99": 154.4639989733696 + "p50": 167.87199676036835, + "p90": 183.07200074195862, + "p95": 190.59200584888458, + "p99": 206.7200019955635 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1089536, - "combineLogicalBytes": 1089536, - "fanoutMean": 4.75, - "recvTokensMax": 11, - "stragglerRank": 7, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6927,35 +6695,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 59.167999774217606, - "p90": 61.85600161552429, - "p95": 63.840001821517944, - "p99": 70.3359991312027 + "p50": 90.81599861383438, + "p90": 101.50399804115295, + "p95": 107.00800269842148, + "p99": 116.44800007343292 }, "combine": { - "p50": 68.12799721956253, - "p90": 76.48000121116638, - "p95": 76.92799717187881, - "p99": 77.91999727487564 + "p50": 81.63200318813324, + "p90": 88.35200220346451, + "p95": 91.87199920415878, + "p99": 95.20000219345093 }, "roundtrip": { - "p50": 120.44800072908401, - "p90": 124.09599870443344, - "p95": 125.59999525547028, - "p99": 134.33599472045898 + "p50": 148.8959938287735, + "p90": 160.22400557994843, + "p95": 164.2879992723465, + "p99": 171.74400389194489 }, "isolatedSum": { - "p50": 127.29599699378014, - "p90": 138.33600282669067, - "p95": 140.76799899339676, - "p99": 148.25599640607834 + "p50": 172.44800180196762, + "p90": 189.85600024461746, + "p95": 198.88000190258026, + "p99": 211.64800226688385 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2207744, - "combineLogicalBytes": 2207744, - "fanoutMean": 4.8125, - "recvTokensMax": 23, - "stragglerRank": 7, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -6964,35 +6732,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 60.864001512527466, - "p90": 68.31999868154526, - "p95": 70.97599655389786, - "p99": 79.99999821186066 + "p50": 91.77599847316742, + "p90": 100.5759984254837, + "p95": 104.67199981212616, + "p99": 109.8880022764206 }, "combine": { - "p50": 68.31999868154526, - "p90": 77.11999863386154, - "p95": 77.79199630022049, - "p99": 79.42400127649307 + "p50": 83.00799876451492, + "p90": 90.55999666452408, + "p95": 92.86399930715561, + "p99": 100.16000270843506 }, "roundtrip": { - "p50": 121.60000205039978, - "p90": 125.91999769210815, - "p95": 127.03999876976013, - "p99": 133.08799266815186 + "p50": 152.5759994983673, + "p90": 161.76000237464905, + "p95": 164.63999450206757, + "p99": 172.92800545692444 }, "isolatedSum": { - "p50": 129.18400019407272, - "p90": 145.4399973154068, - "p95": 148.76799285411835, - "p99": 159.42399948835373 + "p50": 174.78399723768234, + "p90": 191.13599509000778, + "p95": 197.53599911928177, + "p99": 210.04800498485565 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4558848, - "combineLogicalBytes": 4558848, - "fanoutMean": 4.96875, - "recvTokensMax": 46, - "stragglerRank": 6, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7001,35 +6769,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 67.45599955320358, - "p90": 73.37599992752075, - "p95": 74.5600014925003, - "p99": 81.40800148248672 + "p50": 94.24000233411789, + "p90": 102.46399790048599, + "p95": 105.82400113344193, + "p99": 113.50400000810623 }, "combine": { - "p50": 68.9919963479042, - "p90": 78.20799946784973, - "p95": 78.46400141716003, - "p99": 81.15199953317642 + "p50": 85.1840004324913, + "p90": 91.80799871683121, + "p95": 93.44000369310379, + "p99": 96.28800302743912 }, "roundtrip": { - "p50": 121.15199863910675, - "p90": 124.25599992275238, - "p95": 126.01600587368011, - "p99": 138.97599279880524 + "p50": 154.4640064239502, + "p90": 163.00800442695618, + "p95": 166.59200191497803, + "p99": 175.4239946603775 }, "isolatedSum": { - "p50": 136.4479959011078, - "p90": 151.58399939537048, - "p95": 153.02400290966034, - "p99": 162.56000101566315 + "p50": 179.4240027666092, + "p90": 194.2719966173172, + "p95": 199.26400482654572, + "p99": 209.79200303554535 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9347072, - "combineLogicalBytes": 9347072, - "fanoutMean": 5.09375, - "recvTokensMax": 86, - "stragglerRank": 7, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7038,35 +6806,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.43199986219406, - "p90": 71.80800288915634, - "p95": 73.47200065851212, - "p99": 81.44000172615051 + "p50": 97.24800288677216, + "p90": 106.72000050544739, + "p95": 109.66400057077408, + "p99": 117.44000017642975 }, "combine": { - "p50": 78.46400141716003, - "p90": 79.39200103282928, - "p95": 80.99199831485748, - "p99": 92.12800115346909 + "p50": 92.44800359010696, + "p90": 95.87199985980988, + "p95": 98.52799773216248, + "p99": 103.2319962978363 }, "roundtrip": { - "p50": 132.7359974384308, - "p90": 137.56799697875977, - "p95": 138.62399756908417, - "p99": 143.71199905872345 + "p50": 161.6320013999939, + "p90": 170.56000232696533, + "p95": 173.40800166130066, + "p99": 179.1359931230545 }, "isolatedSum": { - "p50": 148.8960012793541, - "p90": 151.20000392198563, - "p95": 154.4639989733696, - "p99": 173.5680028796196 + "p50": 189.69600647687912, + "p90": 202.59200036525726, + "p95": 208.19199830293655, + "p99": 220.67199647426605 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 18995200, - "combineLogicalBytes": 18995200, - "fanoutMean": 5.17578125, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, "recvTokensMax": 178, - "stragglerRank": 7, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7075,35 +6843,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 83.16799998283386, - "p90": 88.95999938249588, - "p95": 89.82399851083755, - "p99": 93.85599941015244 + "p50": 109.02400314807892, + "p90": 116.57600104808807, + "p95": 119.19999867677689, + "p99": 127.07200646400452 }, "combine": { - "p50": 91.58399701118469, - "p90": 93.82399916648865, - "p95": 101.1200025677681, - "p99": 114.68800157308578 + "p50": 106.39999806880951, + "p90": 112.57600039243698, + "p95": 115.87200313806534, + "p99": 120.31999975442886 }, "roundtrip": { - "p50": 159.42400693893433, - "p90": 163.10399770736694, - "p95": 164.35199975967407, - "p99": 169.37600076198578 + "p50": 187.55200505256653, + "p90": 195.3279972076416, + "p95": 198.7520009279251, + "p99": 204.48000729084015 }, "isolatedSum": { - "p50": 174.75199699401855, - "p90": 182.78399854898453, - "p95": 190.94400107860565, - "p99": 208.54400098323822 + "p50": 215.42400121688843, + "p90": 229.15200144052505, + "p95": 235.07200181484222, + "p99": 247.39200621843338 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38291456, - "combineLogicalBytes": 38291456, - "fanoutMean": 5.216796875, - "recvTokensMax": 348, - "stragglerRank": 7, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7112,35 +6880,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 95.2640026807785, - "p90": 97.59999811649323, - "p95": 99.90400075912476, - "p99": 116.28799885511398 + "p50": 123.64800274372101, + "p90": 130.8159977197647, + "p95": 133.760005235672, + "p99": 140.35199582576752 }, "combine": { - "p50": 115.23199826478958, - "p90": 115.84000289440155, - "p95": 116.38399958610535, - "p99": 126.20800733566284 + "p50": 127.71199643611908, + "p90": 132.54399597644806, + "p95": 135.71199774742126, + "p99": 143.39199662208557 }, "roundtrip": { - "p50": 193.9840018749237, - "p90": 199.64799284934998, - "p95": 200.6399929523468, - "p99": 210.52800118923187 + "p50": 223.87200593948364, + "p90": 230.84799945354462, + "p95": 234.72000658512115, + "p99": 240.60800671577454 }, "isolatedSum": { - "p50": 210.49600094556808, - "p90": 213.44000101089478, - "p95": 216.2880003452301, - "p99": 242.49600619077682 + "p50": 251.3599991798401, + "p90": 263.35999369621277, + "p95": 269.47200298309326, + "p99": 283.7439924478531 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77113344, - "combineLogicalBytes": 77113344, - "fanoutMean": 5.2529296875, - "recvTokensMax": 685, - "stragglerRank": 7, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7148,47 +6916,48 @@ ] }, { - "id": "cx-7d11224e", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "b300_8d2811e3", - "comparisonKey": "801e704d68c28ca9", + "id": "cx-063a34f6", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_42130d21", + "comparisonKey": "386f464c43c562a3", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:25.920368+00:00", + "generatedAt": "2026-06-29T13:48:13.108347+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf", + "label": "GB200 EP8 · deepep · bf16 [cl]", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -7196,59 +6965,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285620595", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285620595", - "createdAt": "2026-06-27T09:48:25.920368+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.384000927209854, - "p90": 58.81600081920624, - "p95": 61.37600168585777, - "p99": 80.60800284147263 + "p50": 75.1039981842041, + "p90": 87.80799806118011, + "p95": 92.38400310277939, + "p99": 98.49599748849869 }, "combine": { - "p50": 65.47199934720993, - "p90": 66.3679987192154, - "p95": 66.72000139951706, - "p99": 68.09599697589874 + "p50": 81.98399841785431, + "p90": 87.99999952316284, + "p95": 92.6079973578453, + "p99": 97.82399982213974 }, "roundtrip": { - "p50": 107.42399841547012, - "p90": 111.84000223875046, - "p95": 112.96000331640244, - "p99": 126.14400684833527 + "p50": 137.02400028705597, + "p90": 149.4079977273941, + "p95": 153.79199385643005, + "p99": 161.82400286197662 }, "isolatedSum": { - "p50": 121.85600027441978, - "p90": 125.18399953842163, - "p95": 128.09600308537483, - "p99": 148.70399981737137 + "p50": 157.0879966020584, + "p90": 175.80799758434296, + "p95": 184.9920004606247, + "p99": 196.31999731063843 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7257,35 +7026,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.2559999525547, - "p90": 58.33600088953972, - "p95": 60.447998344898224, - "p99": 72.83200323581696 + "p50": 76.89599692821503, + "p90": 94.55999732017517, + "p95": 100.00000149011612, + "p99": 114.04799669981003 }, "combine": { - "p50": 66.01600348949432, - "p90": 66.68800115585327, - "p95": 67.48799979686737, - "p99": 91.90399944782257 + "p50": 83.64800363779068, + "p90": 95.8079993724823, + "p95": 98.01600128412247, + "p99": 141.27999544143677 }, "roundtrip": { - "p50": 105.02400249242783, - "p90": 112.41599917411804, - "p95": 113.0559965968132, - "p99": 119.64800208806992 + "p50": 138.87999951839447, + "p90": 154.59200739860535, + "p95": 159.55199301242828, + "p99": 173.7920045852661 }, "isolatedSum": { - "p50": 122.27200344204903, - "p90": 125.02400204539299, - "p95": 127.9359981417656, - "p99": 164.73600268363953 + "p50": 160.5440005660057, + "p90": 190.36799669265747, + "p95": 198.0160027742386, + "p99": 255.3279921412468 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 7, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7294,34 +7063,34 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 56.73599988222122, - "p90": 59.29600074887276, - "p95": 60.99199876189232, - "p99": 73.11999797821045 + "p50": 76.60800218582153, + "p90": 88.76799792051315, + "p95": 92.12800115346909, + "p99": 102.11200267076492 }, "combine": { - "p50": 66.20799750089645, - "p90": 67.55200028419495, - "p95": 68.80000233650208, - "p99": 79.74400371313095 + "p50": 83.64800363779068, + "p90": 89.79199826717377, + "p95": 94.46399658918381, + "p99": 97.37599641084671 }, "roundtrip": { - "p50": 105.85600137710571, - "p90": 108.73600095510483, - "p95": 110.43199896812439, - "p99": 124.92799758911133 + "p50": 139.16799426078796, + "p90": 150.7200002670288, + "p95": 155.35999834537506, + "p99": 163.4880006313324 }, "isolatedSum": { - "p50": 122.94399738311768, - "p90": 126.8480010330677, - "p95": 129.7920010983944, - "p99": 152.8640016913414 + "p50": 160.2560058236122, + "p90": 178.55999618768692, + "p95": 186.5919977426529, + "p99": 199.48799908161163 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -7331,35 +7100,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.30400064587593, - "p90": 64.57599997520447, - "p95": 65.85600227117538, - "p99": 70.88000327348709 + "p50": 81.34400099515915, + "p90": 94.33600306510925, + "p95": 97.120001912117, + "p99": 110.3999987244606 }, "combine": { - "p50": 66.81600213050842, - "p90": 68.51200014352798, - "p95": 69.023996591568, - "p99": 78.17599922418594 + "p50": 84.86399799585342, + "p90": 93.1520015001297, + "p95": 95.64799815416336, + "p99": 103.10400277376175 }, "roundtrip": { - "p50": 114.56000059843063, - "p90": 121.15199863910675, - "p95": 122.5920021533966, - "p99": 138.72000575065613 + "p50": 141.76000654697418, + "p90": 153.888002038002, + "p95": 156.99200332164764, + "p99": 164.2560064792633 }, "isolatedSum": { - "p50": 125.12000277638435, - "p90": 133.08800011873245, - "p95": 134.87999886274338, - "p99": 149.05600249767303 + "p50": 166.20799899101257, + "p90": 187.48800456523895, + "p95": 192.76800006628036, + "p99": 213.50400149822235 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7368,35 +7137,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 59.29600074887276, - "p90": 63.45599889755249, - "p95": 66.3679987192154, - "p99": 85.82399785518646 + "p50": 82.46400207281113, + "p90": 92.22400188446045, + "p95": 96.16000205278397, + "p99": 102.08000242710114 }, "combine": { - "p50": 67.03999638557434, - "p90": 69.023996591568, - "p95": 70.3359991312027, - "p99": 79.93599772453308 + "p50": 86.17600053548813, + "p90": 95.2640026807785, + "p95": 96.8639999628067, + "p99": 105.27999699115753 }, "roundtrip": { - "p50": 122.6240023970604, - "p90": 125.66399574279785, - "p95": 126.65599584579468, - "p99": 131.9359987974167 + "p50": 144.96000111103058, + "p90": 157.02399611473083, + "p95": 159.67999398708344, + "p99": 168.96000504493713 }, "isolatedSum": { - "p50": 126.3359971344471, - "p90": 132.47999548912048, - "p95": 136.7039978504181, - "p99": 165.75999557971954 + "p50": 168.64000260829926, + "p90": 187.48800456523895, + "p95": 193.02400201559067, + "p99": 207.35999941825867 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 7, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7405,35 +7174,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 74.14399832487106, - "p90": 76.54400169849396, - "p95": 77.85599678754807, - "p99": 89.4400030374527 + "p50": 87.10400015115738, + "p90": 123.99999797344208, + "p95": 142.56000518798828, + "p99": 181.08800053596497 }, "combine": { - "p50": 77.11999863386154, - "p90": 78.52800190448761, - "p95": 78.68800312280655, - "p99": 89.4400030374527 + "p50": 95.16800194978714, + "p90": 110.81600189208984, + "p95": 121.56800180673599, + "p99": 172.41600155830383 }, "roundtrip": { - "p50": 127.10399925708771, - "p90": 132.1280002593994, - "p95": 133.760005235672, - "p99": 136.3839954137802 + "p50": 153.85599434375763, + "p90": 186.94399297237396, + "p95": 214.27200734615326, + "p99": 252.79998779296875 }, "isolatedSum": { - "p50": 151.2639969587326, - "p90": 155.07200360298157, - "p95": 156.54399991035461, - "p99": 178.8800060749054 + "p50": 182.27200210094452, + "p90": 234.81599986553192, + "p95": 264.1280069947243, + "p99": 353.5040020942688 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7442,35 +7211,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 79.77599650621414, - "p90": 81.53600245714188, - "p95": 82.8159973025322, - "p99": 89.9839997291565 + "p50": 96.22400254011154, + "p90": 104.19200360774994, + "p95": 108.57599973678589, + "p99": 115.10399729013443 }, "combine": { - "p50": 90.87999910116196, - "p90": 102.88000106811523, - "p95": 104.41599786281586, - "p99": 115.58400094509125 + "p50": 107.07200318574905, + "p90": 112.5119999051094, + "p95": 116.92799627780914, + "p99": 122.43200093507767 }, "roundtrip": { - "p50": 157.95199573040009, - "p90": 162.59199380874634, - "p95": 164.19200599193573, - "p99": 182.68799781799316 + "p50": 174.84800517559052, + "p90": 183.03999304771423, + "p95": 185.31200289726257, + "p99": 190.2720034122467 }, "isolatedSum": { - "p50": 170.6559956073761, - "p90": 184.4160035252571, - "p95": 187.23199516534805, - "p99": 205.56800067424774 + "p50": 203.2960057258606, + "p90": 216.70400351285934, + "p95": 225.50399601459503, + "p99": 237.5359982252121 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 5, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7479,35 +7248,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 101.24800354242325, - "p90": 104.22399640083313, - "p95": 105.3759977221489, - "p99": 124.67200309038162 + "p50": 109.6000000834465, + "p90": 117.60000139474869, + "p95": 121.11999839544296, + "p99": 127.10399925708771 }, "combine": { - "p50": 126.17599964141846, - "p90": 127.71199643611908, - "p95": 128.31999361515045, - "p99": 139.93600010871887 + "p50": 126.97599828243256, + "p90": 133.15199315547943, + "p95": 134.68800485134125, + "p99": 140.1599943637848 }, "roundtrip": { - "p50": 208.92800390720367, - "p90": 213.76000344753265, - "p95": 214.78399634361267, - "p99": 229.0239930152893 + "p50": 211.64800226688385, + "p90": 227.80799865722656, + "p95": 236.86400055885315, + "p99": 291.20001196861267 }, "isolatedSum": { - "p50": 227.4240031838417, - "p90": 231.9359928369522, - "p95": 233.69599133729935, - "p99": 264.6080031991005 + "p50": 236.57599836587906, + "p90": 250.75199455022812, + "p95": 255.8080032467842, + "p99": 267.2639936208725 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7515,107 +7284,182 @@ ] }, { - "id": "cx-cc647506", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", - "colorKey": "b300_8d2811e3", - "comparisonKey": "478acd4108c50326", + "id": "cx-fea7e1cd", + "identity": "gb200|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_e13e1290", + "comparisonKey": "69a9fc41fa25ee9c", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:32.426052+00:00", + "generatedAt": "2026-06-29T13:55:18.057516+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_05", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", - "mode": "normal", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf", + "label": "GB200 EP8 · deepep · bf16 LL", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "1fa7fe74d0e30a3", - "workloadId": "set:4:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271886823", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823", - "createdAt": "2026-06-26T23:58:32.426052+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.703999638557434, - "p90": 59.90400165319443, - "p95": 62.65600025653839, - "p99": 69.98399645090103 + "p50": 85.60000360012054, + "p90": 315.42399525642395, + "p95": 344.31999921798706, + "p99": 359.9039912223816 }, "combine": { - "p50": 65.88800251483917, - "p90": 66.43199920654297, - "p95": 66.72000139951706, - "p99": 73.7600028514862 + "p50": 77.504001557827, + "p90": 85.7279971241951, + "p95": 87.39200234413147, + "p99": 94.24000233411789 }, "roundtrip": { - "p50": 107.16799646615982, - "p90": 112.83200234174728, - "p95": 114.14399743080139, - "p99": 120.44800072908401 + "p50": 115.77600240707397, + "p90": 322.52800464630127, + "p95": 349.40800070762634, + "p99": 384.64000821113586 }, "isolatedSum": { - "p50": 122.5920021533966, - "p90": 126.3360008597374, - "p95": 129.37600165605545, - "p99": 143.74399930238724 + "p50": 163.10400515794754, + "p90": 401.15199238061905, + "p95": 431.71200156211853, + "p99": 454.1439935564995 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 88.57599645853043, + "p90": 314.39998745918274, + "p95": 344.5119857788086, + "p99": 376.3839900493622 + }, + "combine": { + "p50": 75.07199794054031, + "p90": 86.36800199747086, + "p95": 88.19200098514557, + "p99": 95.58399766683578 + }, + "roundtrip": { + "p50": 117.95199662446976, + "p90": 337.92001008987427, + "p95": 386.78398728370667, + "p99": 395.26399970054626 + }, + "isolatedSum": { + "p50": 163.64799439907074, + "p90": 400.7679894566536, + "p95": 432.70398676395416, + "p99": 471.96798771619797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 91.48799628019333, + "p90": 312.9599988460541, + "p95": 336.64000034332275, + "p99": 369.59999799728394 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 94.40000355243683, + "p95": 95.64799815416336, + "p99": 98.04800152778625 + }, + "roundtrip": { + "p50": 123.9359974861145, + "p90": 347.80800342559814, + "p95": 385.72800159454346, + "p99": 398.97599816322327 + }, + "isolatedSum": { + "p50": 174.3679940700531, + "p90": 407.3600023984909, + "p95": 432.2879984974861, + "p99": 467.6479995250702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7624,35 +7468,72 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.848001062870026, - "p90": 60.80000102519989, - "p95": 62.84800171852112, - "p99": 74.40000027418137 + "p50": 98.84800016880035, + "p90": 298.2400059700012, + "p95": 344.57600116729736, + "p99": 375.5840063095093 }, "combine": { - "p50": 68.00000369548798, - "p90": 70.30399888753891, - "p95": 76.99199765920639, - "p99": 78.5600021481514 + "p50": 94.52799707651138, + "p90": 105.31199723482132, + "p95": 107.68000036478043, + "p99": 314.1759932041168 }, "roundtrip": { - "p50": 116.54400080442429, - "p90": 123.29600006341934, - "p95": 124.83199685811996, - "p99": 130.46400249004364 + "p50": 132.1280002593994, + "p90": 362.2719943523407, + "p95": 400.41598677635193, + "p99": 413.2480025291443 }, "isolatedSum": { - "p50": 126.848004758358, - "p90": 131.1039999127388, - "p95": 139.8399993777275, - "p99": 152.96000242233276 + "p50": 193.37599724531174, + "p90": 403.55200320482254, + "p95": 452.2560015320778, + "p99": 689.7599995136261 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 106.36799782514572, + "p90": 313.4720027446747, + "p95": 347.4560081958771, + "p99": 373.24801087379456 + }, + "combine": { + "p50": 96.67199850082397, + "p90": 120.41600048542023, + "p95": 121.56800180673599, + "p99": 124.54400211572647 + }, + "roundtrip": { + "p50": 154.2080044746399, + "p90": 408.25599431991577, + "p95": 423.6159920692444, + "p99": 434.3680143356323 + }, + "isolatedSum": { + "p50": 203.0399963259697, + "p90": 433.8880032300949, + "p95": 469.02401000261307, + "p99": 497.792012989521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7661,35 +7542,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 75.9039968252182, - "p90": 78.27199995517731, - "p95": 79.52000200748444, - "p99": 87.5839963555336 + "p50": 125.791996717453, + "p90": 322.33598828315735, + "p95": 359.74401235580444, + "p99": 386.2079977989197 }, "combine": { - "p50": 78.40000092983246, - "p90": 79.19999957084656, - "p95": 79.71200346946716, - "p99": 83.64800363779068 + "p50": 122.6240023970604, + "p90": 217.82399713993073, + "p95": 219.90400552749634, + "p99": 228.67199778556824 }, "roundtrip": { - "p50": 134.24000144004822, - "p90": 138.20800185203552, - "p95": 139.5840048789978, - "p99": 144.3520039319992 + "p50": 211.07199788093567, + "p90": 437.79200315475464, + "p95": 477.60000824928284, + "p99": 492.92799830436707 }, "isolatedSum": { - "p50": 154.30399775505066, - "p90": 157.47199952602386, - "p95": 159.2320054769516, - "p99": 171.23199999332428 + "p50": 248.4159991145134, + "p90": 540.1599854230881, + "p95": 579.6480178833008, + "p99": 614.8799955844879 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 7, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 351.39200091362, + "p90": 369.376003742218, + "p95": 553.4080266952515, + "p99": 628.7680268287659 + }, + "combine": { + "p50": 358.62401127815247, + "p90": 366.7519986629486, + "p95": 368.4479892253876, + "p99": 375.67999958992004 + }, + "roundtrip": { + "p50": 397.21599221229553, + "p90": 642.3680186271667, + "p95": 668.1280136108398, + "p99": 700.4799842834473 + }, + "isolatedSum": { + "p50": 710.0160121917725, + "p90": 736.1280024051666, + "p95": 921.856015920639, + "p99": 1004.4480264186859 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7698,35 +7616,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 103.07200253009796, - "p90": 105.98400235176086, - "p95": 107.04000294208527, - "p99": 113.21599781513214 + "p50": 596.9600081443787, + "p90": 619.4239854812622, + "p95": 630.4000020027161, + "p99": 876.1919736862183 }, "combine": { - "p50": 127.13600695133209, - "p90": 128.1599998474121, - "p95": 128.57599556446075, - "p99": 131.04000687599182 + "p50": 623.8399744033813, + "p90": 634.880006313324, + "p95": 636.031985282898, + "p99": 638.9439702033997 }, "roundtrip": { - "p50": 209.1200053691864, - "p90": 214.30400013923645, - "p95": 216.12800657749176, - "p99": 229.66399788856506 + "p50": 675.6160259246826, + "p90": 928.5439848899841, + "p95": 978.0799746513367, + "p99": 1023.2959985733032 }, "isolatedSum": { - "p50": 230.20800948143005, - "p90": 234.14400219917297, - "p95": 235.61599850654602, - "p99": 244.25600469112396 + "p50": 1220.79998254776, + "p90": 1254.3039917945862, + "p95": 1266.431987285614, + "p99": 1515.135943889618 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7734,107 +7652,108 @@ ] }, { - "id": "cx-17ec2f4d", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", - "colorKey": "b300_2e44c039", - "comparisonKey": "c7065362244c808a", + "id": "cx-67003169", + "identity": "gb200|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_09c517b8", + "comparisonKey": "068a135c875dc310", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:49.525819+00:00", + "generatedAt": "2026-06-29T13:55:48.669860+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", - "mode": "normal", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "label": "GB200 EP8 · deepep · bf16 LL", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "22da8b58646609c", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285651441", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285651441", - "createdAt": "2026-06-27T09:49:49.525819+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 58.62399935722351, - "p90": 66.04799628257751, - "p95": 68.03199648857117, - "p99": 76.19199901819229 + "p50": 80.70400357246399, + "p90": 308.51200222969055, + "p95": 334.1439962387085, + "p99": 359.9039912223816 }, "combine": { - "p50": 56.352000683546066, - "p90": 64.7680014371872, - "p95": 64.99200314283371, - "p99": 68.06399673223495 + "p50": 83.74399691820145, + "p90": 88.83199840784073, + "p95": 94.01600062847137, + "p99": 98.65599870681763 }, "roundtrip": { - "p50": 95.45599669218063, - "p90": 100.96000134944916, - "p95": 103.04000228643417, - "p99": 108.96000266075134 + "p50": 114.14399743080139, + "p90": 330.52799105644226, + "p95": 371.8400001525879, + "p99": 390.6559944152832 }, "isolatedSum": { - "p50": 114.97600004076958, - "p90": 130.8159977197647, - "p95": 133.02399963140488, - "p99": 144.25599575042725 + "p50": 164.44800049066544, + "p90": 397.3440006375313, + "p95": 428.15999686717987, + "p99": 458.5599899291992 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7843,35 +7762,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.24800005555153, - "p90": 59.7120001912117, - "p95": 62.49599903821945, - "p99": 69.72800195217133 + "p50": 87.42400258779526, + "p90": 318.015992641449, + "p95": 347.135990858078, + "p99": 371.16798758506775 }, "combine": { - "p50": 56.223999708890915, - "p90": 64.7680014371872, - "p95": 65.0240033864975, - "p99": 66.17599725723267 + "p50": 81.727996468544, + "p90": 87.99999952316284, + "p95": 93.47199648618698, + "p99": 94.94400024414062 }, "roundtrip": { - "p50": 98.04800152778625, - "p90": 104.25599664449692, - "p95": 105.6319996714592, - "p99": 116.03199690580368 + "p50": 117.15199798345566, + "p90": 339.35999870300293, + "p95": 386.55999302864075, + "p99": 400.9920060634613 }, "isolatedSum": { - "p50": 113.47199976444244, - "p90": 124.4800016283989, - "p95": 127.52000242471695, - "p99": 135.903999209404 + "p50": 169.15199905633926, + "p90": 406.0159921646118, + "p95": 440.607987344265, + "p99": 466.1119878292084 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 6, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -7880,34 +7799,34 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.37600103020668, - "p90": 59.51999872922897, - "p95": 61.91999837756157, - "p99": 67.87200272083282 + "p50": 89.82399851083755, + "p90": 309.82398986816406, + "p95": 332.09601044654846, + "p99": 371.8720078468323 }, "combine": { - "p50": 56.51199817657471, - "p90": 64.89600241184235, - "p95": 65.5680000782013, - "p99": 68.86400282382965 + "p50": 82.91199803352356, + "p90": 94.71999853849411, + "p95": 95.67999839782715, + "p99": 98.2080027461052 }, "roundtrip": { - "p50": 102.33599692583084, - "p90": 107.39199817180634, - "p95": 110.1439967751503, - "p99": 116.80000275373459 + "p50": 124.38400089740753, + "p90": 381.24799728393555, + "p95": 397.3439931869507, + "p99": 415.45599699020386 }, "isolatedSum": { - "p50": 113.88799920678139, - "p90": 124.41600114107132, - "p95": 127.48799845576286, - "p99": 136.73600554466248 + "p50": 172.73599654436111, + "p90": 404.5439884066582, + "p95": 427.7760088443756, + "p99": 470.08001059293747 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -7917,34 +7836,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.04799869656563, - "p90": 60.575999319553375, - "p95": 64.09599632024765, - "p99": 75.16799867153168 + "p50": 98.78399968147278, + "p90": 323.87199997901917, + "p95": 358.97600650787354, + "p99": 381.53600692749023 }, "combine": { - "p50": 56.8000003695488, - "p90": 66.04799628257751, - "p95": 66.3359984755516, - "p99": 67.64800101518631 + "p50": 85.50400286912918, + "p90": 99.16800260543823, + "p95": 100.99200159311295, + "p99": 106.55999928712845 }, "roundtrip": { - "p50": 108.73600095510483, - "p90": 112.99200356006622, - "p95": 113.92000317573547, - "p99": 129.82399761676788 + "p50": 131.3920021057129, + "p90": 366.9759929180145, + "p95": 405.63198924064636, + "p99": 421.4400053024292 }, "isolatedSum": { - "p50": 114.84799906611443, - "p90": 126.62399560213089, - "p95": 130.43199479579926, - "p99": 142.815999686718 + "p50": 184.28800255060196, + "p90": 423.0400025844574, + "p95": 459.9680081009865, + "p99": 488.0960062146187 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -7954,34 +7873,34 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 58.720000088214874, - "p90": 60.83200126886368, - "p95": 63.040003180503845, - "p99": 79.52000200748444 + "p50": 105.40799796581268, + "p90": 325.72799921035767, + "p95": 357.7280044555664, + "p99": 397.5360095500946 }, "combine": { - "p50": 65.98400324583054, - "p90": 66.81600213050842, - "p95": 66.97600334882736, - "p99": 68.83200258016586 + "p50": 94.17600184679031, + "p90": 118.97599697113037, + "p95": 120.70400267839432, + "p99": 122.97599762678146 }, "roundtrip": { - "p50": 107.39199817180634, - "p90": 109.47199910879135, - "p95": 111.1999973654747, - "p99": 118.17599833011627 + "p50": 153.9199948310852, + "p90": 412.0959937572479, + "p95": 432.8959882259369, + "p99": 448.15999269485474 }, "isolatedSum": { - "p50": 124.70400333404541, - "p90": 127.6480033993721, - "p95": 130.0160065293312, - "p99": 148.3520045876503 + "p50": 199.583999812603, + "p90": 444.70399618148804, + "p95": 478.4320071339607, + "p99": 520.5120071768761 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -7991,72 +7910,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.36799937486649, - "p90": 76.12799853086472, - "p95": 77.47200131416321, - "p99": 94.81599926948547 + "p50": 115.42399972677231, + "p90": 323.2960104942322, + "p95": 364.51199650764465, + "p99": 395.4240083694458 }, "combine": { - "p50": 68.51200014352798, - "p90": 72.7040022611618, - "p95": 76.86399668455124, - "p99": 80.19199967384338 + "p50": 116.86400324106216, + "p90": 211.5519940853119, + "p95": 219.04000639915466, + "p99": 221.02400660514832 }, "roundtrip": { - "p50": 120.99199742078781, - "p90": 125.50400197505951, - "p95": 126.75200402736664, - "p99": 134.07999277114868 + "p50": 210.87999641895294, + "p90": 444.09599900245667, + "p95": 473.9519953727722, + "p99": 495.2960014343262 }, "isolatedSum": { - "p50": 138.87999951839447, - "p90": 148.83200079202652, - "p95": 154.33599799871445, - "p99": 175.00799894332886 + "p50": 232.28800296783447, + "p90": 534.8480045795441, + "p95": 583.5520029067993, + "p99": 616.4480149745941 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 78.49600166082382, - "p90": 80.1599994301796, - "p95": 81.727996468544, - "p99": 87.5839963555336 + "p50": 351.6800105571747, + "p90": 367.2960102558136, + "p95": 580.4160237312317, + "p99": 641.7279839515686 }, "combine": { - "p50": 82.78399705886841, - "p90": 90.81599861383438, - "p95": 91.36000275611877, - "p99": 93.53599697351456 + "p50": 363.3280098438263, + "p90": 367.5520122051239, + "p95": 369.05598640441895, + "p99": 375.7439851760864 }, "roundtrip": { - "p50": 146.2080031633377, - "p90": 154.91199493408203, - "p95": 157.98400342464447, - "p99": 176.06399953365326 + "p50": 396.9919979572296, + "p90": 656.1279892921448, + "p95": 681.3759803771973, + "p99": 705.7600021362305 }, "isolatedSum": { - "p50": 161.27999871969223, - "p90": 170.97599804401398, - "p95": 173.08799922466278, - "p99": 181.11999332904816 + "p50": 715.008020401001, + "p90": 734.8480224609375, + "p95": 949.4720101356506, + "p99": 1017.471969127655 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11210752, - "combineLogicalBytes": 11210752, - "fanoutMean": 1.52734375, - "recvTokensMax": 512, - "stragglerRank": 6, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8065,35 +7984,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 91.61599725484848, - "p90": 93.59999746084213, - "p95": 94.59199756383896, - "p99": 101.21600329875946 + "p50": 583.9679837226868, + "p90": 615.3920292854309, + "p95": 620.415985584259, + "p99": 875.9040236473083 }, "combine": { - "p50": 116.35199934244156, - "p90": 125.56800246238708, - "p95": 126.52799487113953, - "p99": 127.83999741077423 + "p50": 616.5440082550049, + "p90": 626.6880035400391, + "p95": 628.0320286750793, + "p99": 636.1600160598755 }, "roundtrip": { - "p50": 195.39199769496918, - "p90": 202.94399559497833, - "p95": 204.6079933643341, - "p99": 235.83999276161194 + "p50": 671.455979347229, + "p90": 726.0479927062988, + "p95": 978.8159728050232, + "p99": 1028.2880067825317 }, "isolatedSum": { - "p50": 207.96799659729004, - "p90": 219.16799992322922, - "p95": 221.11999243497849, - "p99": 229.0560007095337 + "p50": 1200.5119919776917, + "p90": 1242.08003282547, + "p95": 1248.4480142593384, + "p99": 1512.0640397071838 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8101,47 +8020,48 @@ ] }, { - "id": "cx-3bfb4348", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", - "colorKey": "b300_2e44c039", - "comparisonKey": "5c5e6a7ecdec195f", + "id": "cx-22d0d299", + "identity": "gb200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb200_b0118480", + "comparisonKey": "90f59e3a9d53c258", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:26.448327+00:00", + "generatedAt": "2026-06-29T13:59:11.907002+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -8149,59 +8069,133 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "47fddabb3277bec", - "workloadId": "set:4:6b84350720aa8233", - "workloadSource": "canonical-serialized", + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271893428", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271893428", - "createdAt": "2026-06-26T23:58:26.448327+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.832000613212585, - "p90": 63.71200084686279, - "p95": 64.54399973154068, - "p99": 69.88800317049026 + "p50": 434.143990278244, + "p90": 461.43999695777893, + "p95": 621.5680241584778, + "p99": 739.0720248222351 }, "combine": { - "p50": 55.67999929189682, - "p90": 58.20799991488457, - "p95": 64.86400216817856, - "p99": 68.89600306749344 + "p50": 65.8240020275116, + "p90": 307.93601274490356, + "p95": 345.12001276016235, + "p99": 361.31200194358826 }, "roundtrip": { - "p50": 94.52799707651138, - "p90": 99.2640033364296, - "p95": 101.56799852848053, - "p99": 107.04000294208527 + "p50": 490.5279874801636, + "p90": 542.4000024795532, + "p95": 733.8560223579407, + "p99": 905.1839709281921 }, "isolatedSum": { - "p50": 112.5119999051094, - "p90": 121.92000076174736, - "p95": 129.40800189971924, - "p99": 138.7840062379837 + "p50": 499.9679923057556, + "p90": 769.3760097026825, + "p95": 966.6880369186401, + "p99": 1100.3840267658234 }, "roundtripMeasured": true, "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 4, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 436.41600012779236, + "p90": 472.57599234580994, + "p95": 669.0880060195923, + "p99": 738.5280132293701 + }, + "combine": { + "p50": 66.01600348949432, + "p90": 296.1280047893524, + "p95": 337.76000142097473, + "p99": 359.0719997882843 + }, + "roundtrip": { + "p50": 485.152006149292, + "p90": 519.0079808235168, + "p95": 712.0959758758545, + "p99": 781.4080119132996 + }, + "isolatedSum": { + "p50": 502.4320036172867, + "p90": 768.7039971351624, + "p95": 1006.848007440567, + "p99": 1097.6000130176544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 430.9439957141876, + "p90": 464.06400203704834, + "p95": 625.2800226211548, + "p99": 730.6560277938843 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 312.22400069236755, + "p95": 346.5920090675354, + "p99": 364.28800225257874 + }, + "roundtrip": { + "p50": 487.36000061035156, + "p90": 514.4320130348206, + "p95": 689.631998538971, + "p99": 788.0319952964783 + }, + "isolatedSum": { + "p50": 499.4239956140518, + "p90": 776.2880027294159, + "p95": 971.8720316886902, + "p99": 1094.944030046463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8210,35 +8204,72 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 56.92800134420395, - "p90": 59.039998799562454, - "p95": 60.5119988322258, - "p99": 66.04799628257751 + "p50": 437.824010848999, + "p90": 520.7359790802002, + "p95": 702.4000287055969, + "p99": 772.704005241394 }, "combine": { - "p50": 56.63999915122986, - "p90": 66.23999774456024, - "p95": 66.56000018119812, - "p99": 78.91199737787247 + "p50": 70.3359991312027, + "p90": 304.76799607276917, + "p95": 341.5679931640625, + "p99": 357.9519987106323 }, "roundtrip": { - "p50": 107.80800133943558, - "p90": 113.43999952077866, - "p95": 114.656001329422, - "p99": 124.22399967908859 + "p50": 490.6879961490631, + "p90": 520.19202709198, + "p95": 694.815993309021, + "p99": 798.4960079193115 }, "isolatedSum": { - "p50": 113.56800049543381, - "p90": 125.2799965441227, - "p95": 127.07199901342392, - "p99": 144.95999366044998 + "p50": 508.1600099802017, + "p90": 825.5039751529694, + "p95": 1043.9680218696594, + "p99": 1130.6560039520264 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 437.9520118236542, + "p90": 488.41598629951477, + "p95": 633.5359811782837, + "p99": 747.0399737358093 + }, + "combine": { + "p50": 71.71200215816498, + "p90": 311.74400448799133, + "p95": 348.224014043808, + "p99": 364.1279935836792 + }, + "roundtrip": { + "p50": 488.2879853248596, + "p90": 523.0399966239929, + "p95": 684.4800114631653, + "p99": 813.4719729423523 + }, + "isolatedSum": { + "p50": 509.66401398181915, + "p90": 800.1599907875061, + "p95": 981.7599952220917, + "p99": 1111.1679673194885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8247,35 +8278,72 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 67.90400296449661, - "p90": 74.07999783754349, - "p95": 75.93599706888199, - "p99": 82.2720006108284 + "p50": 432.5760006904602, + "p90": 461.66399121284485, + "p95": 627.3599863052368, + "p99": 729.8240065574646 }, "combine": { - "p50": 67.90400296449661, - "p90": 70.0799971818924, - "p95": 77.05599814653397, - "p99": 79.26400005817413 + "p50": 74.46400076150894, + "p90": 332.92800188064575, + "p95": 355.1360070705414, + "p99": 370.3039884567261 }, "roundtrip": { - "p50": 120.4800009727478, - "p90": 124.89599734544754, - "p95": 126.27199292182922, - "p99": 140.99200069904327 + "p50": 484.6400022506714, + "p90": 511.87199354171753, + "p95": 675.9679913520813, + "p99": 799.5520234107971 }, "isolatedSum": { - "p50": 135.80800592899323, - "p90": 144.15999501943588, - "p95": 152.99199521541595, - "p99": 161.53600066900253 + "p50": 507.04000145196915, + "p90": 794.5919930934906, + "p95": 982.4959933757782, + "p99": 1100.1279950141907 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 4, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 430.30399084091187, + "p90": 462.336003780365, + "p95": 612.4160289764404, + "p99": 742.3359751701355 + }, + "combine": { + "p50": 86.56000345945358, + "p90": 317.8560137748718, + "p95": 353.2800078392029, + "p99": 383.87200236320496 + }, + "roundtrip": { + "p50": 488.8319969177246, + "p90": 514.4320130348206, + "p95": 559.8400235176086, + "p99": 780.1920175552368 + }, + "isolatedSum": { + "p50": 516.8639943003654, + "p90": 780.1920175552368, + "p95": 965.6960368156433, + "p99": 1126.2079775333405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8284,35 +8352,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 91.77599847316742, - "p90": 94.17600184679031, - "p95": 95.74399888515472, - "p99": 114.20799791812897 + "p50": 431.7440092563629, + "p90": 473.56799244880676, + "p95": 657.9840183258057, + "p99": 752.8960108757019 }, "combine": { - "p50": 116.28799885511398, - "p90": 119.19999867677689, - "p95": 126.36800110340118, - "p99": 130.43199479579926 + "p50": 102.7199998497963, + "p90": 332.7679932117462, + "p95": 354.91201281547546, + "p99": 391.1359906196594 }, "roundtrip": { - "p50": 194.0159946680069, - "p90": 201.08799636363983, - "p95": 202.84800231456757, - "p99": 212.92799711227417 + "p50": 511.2000107765198, + "p90": 537.4400019645691, + "p95": 744.0320253372192, + "p99": 820.7679986953735 }, "isolatedSum": { - "p50": 208.0639973282814, - "p90": 213.3760005235672, - "p95": 222.1119999885559, - "p99": 244.63999271392822 + "p50": 534.4640091061592, + "p90": 806.335985660553, + "p95": 1012.8960311412811, + "p99": 1144.0320014953613 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8320,47 +8388,48 @@ ] }, { - "id": "cx-272778fb", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", - "colorKey": "b300_6d2e4735", - "comparisonKey": "d0ac3c3db4cc1004", + "id": "cx-9ac4ce24", + "identity": "gb200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb200_b0118480", + "comparisonKey": "1ee739e4974cb32b", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:21.321707+00:00", + "generatedAt": "2026-06-29T14:00:58.916373+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_12", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -8368,59 +8437,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "5a3054422534366", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.40625, - "eplbImbalanceAfter": 1.0004417782738093, - "backendVersion": "1.2.1", + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285654027", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285654027", - "createdAt": "2026-06-27T09:50:21.321707+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 1764.9600505828857, - "p90": 2543.519973754883, - "p95": 2872.8959560394287, - "p99": 3412.3198986053467 + "p50": 413.88800740242004, + "p90": 448.38398694992065, + "p95": 665.4719710350037, + "p99": 718.5279726982117 }, "combine": { - "p50": 1750.1120567321777, - "p90": 1847.872018814087, - "p95": 2633.280038833618, - "p99": 2927.1678924560547 + "p50": 66.52799993753433, + "p90": 307.0720136165619, + "p95": 347.29599952697754, + "p99": 358.72000455856323 }, "roundtrip": { - "p50": 1795.583963394165, - "p90": 1911.6159677505493, - "p95": 2635.5841159820557, - "p99": 2994.0481185913086 + "p50": 462.5920057296753, + "p90": 680.832028388977, + "p95": 723.8079905509949, + "p99": 898.464024066925 }, "isolatedSum": { - "p50": 3515.0721073150635, - "p90": 4391.39199256897, - "p95": 5506.175994873047, - "p99": 6339.487791061401 + "p50": 480.4160073399544, + "p90": 755.4560005664825, + "p95": 1012.7679705619812, + "p99": 1077.247977256775 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 559104, - "combineLogicalBytes": 559104, - "fanoutMean": 4.875, - "recvTokensMax": 6, - "stragglerRank": 4, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8429,35 +8498,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 1758.4320306777954, - "p90": 2088.1919860839844, - "p95": 2806.4639568328857, - "p99": 3415.616035461426 + "p50": 416.51201248168945, + "p90": 624.3839859962463, + "p95": 660.2240204811096, + "p99": 708.4159851074219 }, "combine": { - "p50": 1750.3039836883545, - "p90": 1858.9119911193848, - "p95": 2584.0001106262207, - "p99": 2952.4800777435303 + "p50": 67.90400296449661, + "p90": 311.90401315689087, + "p95": 347.51999378204346, + "p99": 360.1920008659363 }, "roundtrip": { - "p50": 1806.7200183868408, - "p90": 1925.663948059082, - "p95": 2728.480100631714, - "p99": 3011.45601272583 + "p50": 462.75201439857483, + "p90": 490.4960095882416, + "p95": 711.7120027542114, + "p99": 755.1680207252502 }, "isolatedSum": { - "p50": 3508.73601436615, - "p90": 3947.103977203369, - "p95": 5390.464067459106, - "p99": 6368.096113204956 + "p50": 484.41601544618607, + "p90": 936.2879991531372, + "p95": 1007.7440142631531, + "p99": 1068.6079859733582 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 5, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8466,35 +8535,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 1758.9119672775269, - "p90": 2487.6160621643066, - "p95": 2937.9520416259766, - "p99": 3416.5759086608887 + "p50": 414.11200165748596, + "p90": 459.4239890575409, + "p95": 659.775972366333, + "p99": 783.9360237121582 }, "combine": { - "p50": 1761.1839771270752, - "p90": 1895.840048789978, - "p95": 2682.528018951416, - "p99": 3779.8080444335938 + "p50": 69.85600292682648, + "p90": 311.5200102329254, + "p95": 345.40799260139465, + "p99": 358.5599958896637 }, "roundtrip": { - "p50": 1816.7680501937866, - "p90": 1913.0879640579224, - "p95": 2590.4319286346436, - "p99": 2941.8559074401855 + "p50": 461.40798926353455, + "p90": 505.69599866867065, + "p95": 727.7759909629822, + "p99": 896.2879776954651 }, "isolatedSum": { - "p50": 3520.095944404602, - "p90": 4383.456110954285, - "p95": 5620.480060577393, - "p99": 7196.383953094482 + "p50": 483.96800458431244, + "p90": 770.9439992904663, + "p95": 1005.1839649677277, + "p99": 1142.496019601822 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2465792, - "combineLogicalBytes": 2465792, - "fanoutMean": 5.375, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, "recvTokensMax": 25, - "stragglerRank": 4, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8503,35 +8572,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 1764.6080255508423, - "p90": 2651.520013809204, - "p95": 3028.7680625915527, - "p99": 5341.599941253662 + "p50": 409.66400504112244, + "p90": 435.5199933052063, + "p95": 646.8160152435303, + "p99": 696.6400146484375 }, "combine": { - "p50": 1762.5279426574707, - "p90": 1947.9999542236328, - "p95": 2684.351921081543, - "p99": 13385.215759277344 + "p50": 72.09599763154984, + "p90": 340.12800455093384, + "p95": 353.2159924507141, + "p99": 366.65600538253784 }, "roundtrip": { - "p50": 1826.6880512237549, - "p90": 1935.968041419983, - "p95": 2620.6719875335693, - "p99": 2976.8319129943848 + "p50": 466.75199270248413, + "p90": 502.3040175437927, + "p95": 720.3199863433838, + "p99": 801.1839985847473 }, "isolatedSum": { - "p50": 3527.135968208313, - "p90": 4599.519968032837, - "p95": 5713.119983673096, - "p99": 18726.815700531006 + "p50": 481.7600026726723, + "p90": 775.6479978561401, + "p95": 1000.0320076942444, + "p99": 1063.2960200309753 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4988928, - "combineLogicalBytes": 4988928, - "fanoutMean": 5.4375, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8540,35 +8609,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 1762.2079849243164, - "p90": 2232.2559356689453, - "p95": 2812.9279613494873, - "p99": 3426.3999462127686 + "p50": 417.02398657798767, + "p90": 445.1200067996979, + "p95": 636.9600296020508, + "p99": 717.9520130157471 }, "combine": { - "p50": 1772.7359533309937, - "p90": 2522.815942764282, - "p95": 2989.471912384033, - "p99": 6136.096000671387 + "p50": 73.2479989528656, + "p90": 326.1120021343231, + "p95": 354.3680012226105, + "p99": 369.08799409866333 }, "roundtrip": { - "p50": 1848.736047744751, - "p90": 2838.3679389953613, - "p95": 3572.223901748657, - "p99": 5888.12780380249 + "p50": 462.6559913158417, + "p90": 507.1359872817993, + "p95": 715.4880166053772, + "p99": 774.6880054473877 }, "isolatedSum": { - "p50": 3534.94393825531, - "p90": 4755.0718784332275, - "p95": 5802.3998737335205, - "p99": 9562.495946884155 + "p50": 490.27198553085327, + "p90": 771.232008934021, + "p95": 991.3280308246613, + "p99": 1087.0400071144104 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9791488, - "combineLogicalBytes": 9791488, - "fanoutMean": 5.3359375, - "recvTokensMax": 94, - "stragglerRank": 4, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8577,35 +8646,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 1787.775993347168, - "p90": 3058.4959983825684, - "p95": 4017.824172973633, - "p99": 5667.263984680176 + "p50": 416.6400134563446, + "p90": 596.7360138893127, + "p95": 663.5839939117432, + "p99": 754.3359994888306 }, "combine": { - "p50": 1784.991979598999, - "p90": 2866.368055343628, - "p95": 3568.7999725341797, - "p99": 5742.527961730957 + "p50": 76.76800340414047, + "p90": 349.37599301338196, + "p95": 362.5600039958954, + "p99": 374.08000230789185 }, "roundtrip": { - "p50": 1844.5760011672974, - "p90": 2729.248046875, - "p95": 3046.3039875030518, - "p99": 5490.079879760742 + "p50": 461.9840085506439, + "p90": 490.62401056289673, + "p95": 718.3679938316345, + "p99": 761.5039944648743 }, "isolatedSum": { - "p50": 3572.767972946167, - "p90": 5924.864053726196, - "p95": 7586.6241455078125, - "p99": 11409.791946411133 + "p50": 493.4080168604851, + "p90": 946.1120069026947, + "p95": 1026.1439979076385, + "p99": 1128.4160017967224 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19410944, - "combineLogicalBytes": 19410944, - "fanoutMean": 5.2890625, - "recvTokensMax": 178, - "stragglerRank": 6, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8614,35 +8683,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 1779.520034790039, - "p90": 2049.920082092285, - "p95": 2758.0161094665527, - "p99": 3381.7598819732666 + "p50": 415.2640104293823, + "p90": 447.55199551582336, + "p95": 679.6159744262695, + "p99": 781.2479734420776 }, "combine": { - "p50": 1785.2799892425537, - "p90": 1907.647967338562, - "p95": 2608.544111251831, - "p99": 2964.8640155792236 + "p50": 90.30400216579437, + "p90": 112.31999844312668, + "p95": 341.0879969596863, + "p99": 384.223997592926 }, "roundtrip": { - "p50": 1863.2320165634155, - "p90": 1987.104058265686, - "p95": 2669.055938720703, - "p99": 3054.5599460601807 + "p50": 479.8080027103424, + "p90": 509.5040202140808, + "p95": 724.6400117874146, + "p99": 845.6000089645386 }, "isolatedSum": { - "p50": 3564.800024032593, - "p90": 3957.568049430847, - "p95": 5366.560220718384, - "p99": 6346.62389755249 + "p50": 505.5680125951767, + "p90": 559.87199395895, + "p95": 1020.7039713859558, + "p99": 1165.4719710350037 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38678528, - "combineLogicalBytes": 38678528, - "fanoutMean": 5.26953125, - "recvTokensMax": 360, - "stragglerRank": 4, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8651,35 +8720,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 1799.9680042266846, - "p90": 2006.6559314727783, - "p95": 2855.6160926818848, - "p99": 3409.8880290985107 + "p50": 432.3520064353943, + "p90": 658.9760184288025, + "p95": 717.8879976272583, + "p99": 915.6799912452698 }, "combine": { - "p50": 1824.8319625854492, - "p90": 3164.6718978881836, - "p95": 5201.375961303711, - "p99": 6098.78396987915 + "p50": 109.15199667215347, + "p90": 117.72800236940384, + "p95": 354.46399450302124, + "p99": 394.27199959754944 }, "roundtrip": { - "p50": 1909.2479944229126, - "p90": 3033.3759784698486, - "p95": 5025.9199142456055, - "p99": 5985.599994659424 + "p50": 504.41598892211914, + "p90": 538.8799905776978, + "p95": 754.0159821510315, + "p99": 827.9680013656616 }, "isolatedSum": { - "p50": 3624.799966812134, - "p90": 5171.327829360962, - "p95": 8056.992053985596, - "p99": 9508.671998977661 + "p50": 541.5040031075478, + "p90": 776.7040207982063, + "p95": 1072.3519921302795, + "p99": 1309.9519908428192 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 77285376, - "fanoutMean": 5.2646484375, - "recvTokensMax": 704, - "stragglerRank": 4, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8687,47 +8756,48 @@ ] }, { - "id": "cx-d77e8004", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", - "colorKey": "b300_7ab35d34", - "comparisonKey": "9b1abb398e739521", + "id": "cx-f128fb76", + "identity": "gb200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_b0118480", + "comparisonKey": "a413134cd507bf47", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:58.943687+00:00", + "generatedAt": "2026-06-29T14:02:46.682266+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_17", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-mild", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -8735,59 +8805,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "f3df51be7d5c32b", - "workloadId": "set:8:289b7f9c14292e96", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285630258", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285630258", - "createdAt": "2026-06-27T09:48:58.943687+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.40800127387047, - "p90": 59.647999703884125, - "p95": 62.49599903821945, - "p99": 74.07999783754349 + "p50": 446.9119906425476, + "p90": 482.87999629974365, + "p95": 691.7120218276978, + "p99": 746.9120025634766 }, "combine": { - "p50": 66.56000018119812, - "p90": 67.26399809122086, - "p95": 67.64800101518631, - "p99": 78.5600021481514 + "p50": 73.60000163316727, + "p90": 332.63999223709106, + "p95": 356.83199763298035, + "p99": 371.8400001525879 }, "roundtrip": { - "p50": 109.6000000834465, - "p90": 113.69600147008896, - "p95": 114.52800035476685, - "p99": 122.65600264072418 + "p50": 498.75199794769287, + "p90": 718.6880111694336, + "p95": 828.0959725379944, + "p99": 1953.1519412994385 }, "isolatedSum": { - "p50": 123.96800145506859, - "p90": 126.91199779510498, - "p95": 130.14400005340576, - "p99": 152.63999998569489 + "p50": 520.5119922757149, + "p90": 815.5199885368347, + "p95": 1048.544019460678, + "p99": 1118.7520027160645 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8796,35 +8866,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.5999990105629, - "p90": 59.07199904322624, - "p95": 61.43999844789505, - "p99": 67.52000004053116 + "p50": 444.44799423217773, + "p90": 638.0800008773804, + "p95": 707.4559926986694, + "p99": 760.8640193939209 }, "combine": { - "p50": 66.84800237417221, - "p90": 67.32799857854843, - "p95": 68.2239979505539, - "p99": 82.2720006108284 + "p50": 73.15199822187424, + "p90": 341.18399024009705, + "p95": 357.695996761322, + "p99": 368.0959939956665 }, "roundtrip": { - "p50": 107.32799768447876, - "p90": 110.11199653148651, - "p95": 111.51999980211258, - "p99": 119.19999867677689 + "p50": 491.58400297164917, + "p90": 511.3919973373413, + "p95": 693.1520104408264, + "p99": 797.0560193061829 }, "isolatedSum": { - "p50": 124.44800138473511, - "p90": 126.39999762177467, - "p95": 129.66399639844894, - "p99": 149.79200065135956 + "p50": 517.599992454052, + "p90": 979.2639911174774, + "p95": 1065.1519894599915, + "p99": 1128.9600133895874 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1103872, - "combineLogicalBytes": 1103872, - "fanoutMean": 4.8125, - "recvTokensMax": 16, - "stragglerRank": 4, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8833,35 +8903,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 58.75200033187866, - "p90": 60.70400029420853, - "p95": 61.91999837756157, - "p99": 70.91200351715088 + "p50": 442.49600172042847, + "p90": 603.0719876289368, + "p95": 717.1199917793274, + "p99": 807.6159954071045 }, "combine": { - "p50": 68.80000233650208, - "p90": 76.99199765920639, - "p95": 77.27999985218048, - "p99": 78.52800190448761 + "p50": 75.42400062084198, + "p90": 344.35200691223145, + "p95": 359.391987323761, + "p99": 377.6000142097473 }, "roundtrip": { - "p50": 115.80800265073776, - "p90": 121.8239963054657, - "p95": 123.52000176906586, - "p99": 133.66399705410004 + "p50": 488.12800645828247, + "p90": 520.7359790802002, + "p95": 765.3440237045288, + "p99": 900.160014629364 }, "isolatedSum": { - "p50": 127.55200266838074, - "p90": 137.69599795341492, - "p95": 139.19999822974205, - "p99": 149.4400054216385 + "p50": 517.9200023412704, + "p90": 947.4239945411682, + "p95": 1076.5119791030884, + "p99": 1185.2160096168518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2250752, - "combineLogicalBytes": 2250752, - "fanoutMean": 4.90625, - "recvTokensMax": 31, - "stragglerRank": 4, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8870,35 +8940,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 60.15999987721443, - "p90": 65.15199691057205, - "p95": 66.97600334882736, - "p99": 73.18399846553802 + "p50": 449.0559995174408, + "p90": 476.7040014266968, + "p95": 684.831976890564, + "p99": 756.3520073890686 }, "combine": { - "p50": 68.60800087451935, - "p90": 77.15199887752533, - "p95": 77.82399654388428, - "p99": 80.03199845552444 + "p50": 77.05599814653397, + "p90": 343.29599142074585, + "p95": 359.96800661087036, + "p99": 373.56799840927124 }, "roundtrip": { - "p50": 120.4800009727478, - "p90": 125.44000148773193, - "p95": 126.71999633312225, - "p99": 144.3520039319992 + "p50": 503.29601764678955, + "p90": 709.4079852104187, + "p95": 812.2239708900452, + "p99": 1050.6559610366821 }, "isolatedSum": { - "p50": 128.76800075173378, - "p90": 142.30399578809738, - "p95": 144.79999989271164, - "p99": 153.21599692106247 + "p50": 526.1119976639748, + "p90": 819.9999928474426, + "p95": 1044.7999835014343, + "p99": 1129.9200057983398 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4472832, - "combineLogicalBytes": 4472832, - "fanoutMean": 4.875, - "recvTokensMax": 62, - "stragglerRank": 4, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8907,35 +8977,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 63.551999628543854, - "p90": 69.95200365781784, - "p95": 71.10399752855301, - "p99": 88.128000497818 + "p50": 447.488009929657, + "p90": 475.48800706863403, + "p95": 701.1520266532898, + "p99": 776.9280076026917 }, "combine": { - "p50": 76.89599692821503, - "p90": 78.87999713420868, - "p95": 79.26400005817413, - "p99": 82.46400207281113 + "p50": 78.33600044250488, + "p90": 330.3360044956207, + "p95": 364.1279935836792, + "p99": 375.5840063095093 }, "roundtrip": { - "p50": 121.05599790811539, - "p90": 124.89599734544754, - "p95": 126.91199779510498, - "p99": 135.68000495433807 + "p50": 499.1999864578247, + "p90": 527.9039740562439, + "p95": 778.8800001144409, + "p99": 872.8960156440735 }, "isolatedSum": { - "p50": 140.44799655675888, - "p90": 148.83200079202652, - "p95": 150.36799758672714, - "p99": 170.59200257062912 + "p50": 525.8240103721619, + "p90": 805.8240115642548, + "p95": 1065.280020236969, + "p99": 1152.512013912201 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8888320, - "combineLogicalBytes": 8888320, - "fanoutMean": 4.84375, - "recvTokensMax": 124, - "stragglerRank": 4, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8944,35 +9014,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.68800181150436, - "p90": 76.51200145483017, - "p95": 77.44000107049942, - "p99": 82.49600231647491 + "p50": 449.1199851036072, + "p90": 478.5279929637909, + "p95": 704.4159770011902, + "p99": 780.8640003204346 }, "combine": { - "p50": 79.19999957084656, - "p90": 79.74400371313095, - "p95": 80.19199967384338, - "p99": 89.82399851083755 + "p50": 81.95199817419052, + "p90": 327.0080089569092, + "p95": 361.6639971733093, + "p99": 380.92800974845886 }, "roundtrip": { - "p50": 133.40799510478973, - "p90": 137.82399892807007, - "p95": 139.0720009803772, - "p99": 155.29599785804749 + "p50": 499.64800477027893, + "p90": 522.5279927253723, + "p95": 745.4720139503479, + "p99": 810.591995716095 }, "isolatedSum": { - "p50": 149.88800138235092, - "p90": 156.25600516796112, - "p95": 157.6320007443428, - "p99": 172.32000082731247 + "p50": 531.0719832777977, + "p90": 805.5360019207001, + "p95": 1066.0799741744995, + "p99": 1161.7920100688934 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 17733632, - "combineLogicalBytes": 17733632, - "fanoutMean": 4.83203125, - "recvTokensMax": 248, - "stragglerRank": 4, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -8981,35 +9051,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 87.5839963555336, - "p90": 90.87999910116196, - "p95": 91.67999774217606, - "p99": 98.9760011434555 + "p50": 450.75199007987976, + "p90": 663.3599996566772, + "p95": 725.600004196167, + "p99": 879.4559836387634 }, "combine": { - "p50": 92.47999638319016, - "p90": 101.40799731016159, - "p95": 101.98400169610977, - "p99": 115.64800143241882 + "p50": 96.96000069379807, + "p90": 338.591992855072, + "p95": 368.22399497032166, + "p99": 396.06401324272156 }, "roundtrip": { - "p50": 159.55199301242828, - "p90": 163.29599916934967, - "p95": 165.43999314308167, - "p99": 171.07200622558594 + "p50": 517.6960229873657, + "p90": 542.6560044288635, + "p95": 798.1439828872681, + "p99": 868.9919710159302 }, "isolatedSum": { - "p50": 180.06399273872375, - "p90": 192.28799641132355, - "p95": 193.66399943828583, - "p99": 214.62400257587433 + "p50": 547.7119907736778, + "p90": 1001.9519925117493, + "p95": 1093.8239991664886, + "p99": 1275.519996881485 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 35424256, - "combineLogicalBytes": 35424256, - "fanoutMean": 4.826171875, - "recvTokensMax": 492, - "stragglerRank": 4, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9018,35 +9088,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 103.67999970912933, - "p90": 105.56799918413162, - "p95": 108.70400071144104, - "p99": 117.91999638080597 + "p50": 447.00801372528076, + "p90": 475.0399887561798, + "p95": 717.2480225563049, + "p99": 823.8720297813416 }, "combine": { - "p50": 127.87200510501862, - "p90": 128.9599984884262, - "p95": 130.0799995660782, - "p99": 138.5280042886734 + "p50": 114.97599631547928, + "p90": 121.21599912643433, + "p95": 124.51200187206268, + "p99": 421.4079976081848 }, "roundtrip": { - "p50": 214.1440063714981, - "p90": 220.47999501228333, - "p95": 222.49600291252136, - "p99": 228.19200158119202 + "p50": 533.6319804191589, + "p90": 553.6959767341614, + "p95": 573.9520192146301, + "p99": 891.9360041618347 }, "isolatedSum": { - "p50": 231.55200481414795, - "p90": 234.52799767255783, - "p95": 238.78400027751923, - "p99": 256.44800066947937 + "p50": 561.98401004076, + "p90": 596.2559878826141, + "p95": 841.7600244283676, + "p99": 1245.2800273895264 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 4, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9054,16 +9124,16 @@ ] }, { - "id": "cx-945e07fc", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", - "colorKey": "b300_5e3d915a", - "comparisonKey": "7cc654cb13c70aa7", + "id": "cx-67cf349f", + "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_6d63c708", + "comparisonKey": "ef8fcf8c07a567e5", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:00.117687+00:00", + "generatedAt": "2026-06-29T13:49:55.837432+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", @@ -9071,30 +9141,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -9102,59 +9173,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "16babcaf4204243", - "workloadId": "set:8:289b7f9c14292e96", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.61328125, - "eplbImbalanceAfter": 1.0009114583333334, - "backendVersion": "1.2.1", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285632999", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285632999", - "createdAt": "2026-06-27T09:49:00.117687+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 55.67999929189682, - "p90": 57.72799998521805, - "p95": 59.87200140953064, - "p99": 69.50400024652481 + "p50": 94.94400024414062, + "p90": 106.55999928712845, + "p95": 111.61600053310394, + "p99": 120.28799951076508 }, "combine": { - "p50": 65.50399959087372, - "p90": 66.17599725723267, - "p95": 66.30399823188782, - "p99": 69.31199878454208 + "p50": 73.18399846553802, + "p90": 78.72000336647034, + "p95": 82.30400085449219, + "p99": 89.12000060081482 }, "roundtrip": { - "p50": 104.54399883747101, - "p90": 111.42399907112122, - "p95": 112.28799819946289, - "p99": 117.60000139474869 + "p50": 250.46399235725403, + "p90": 263.61599564552307, + "p95": 267.61600375175476, + "p99": 275.07200837135315 }, "isolatedSum": { - "p50": 121.18399888277054, - "p90": 123.90399724245071, - "p95": 126.17599964141846, - "p99": 138.8159990310669 + "p50": 168.12799870967865, + "p90": 185.28000265359879, + "p95": 193.92000138759613, + "p99": 209.4080001115799 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9163,72 +9234,72 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 55.80800026655197, - "p90": 57.66399949789047, - "p95": 58.9120015501976, - "p99": 63.87200206518173 + "p50": 95.2640026807785, + "p90": 107.39199817180634, + "p95": 111.51999980211258, + "p99": 119.07199770212173 }, "combine": { - "p50": 65.85600227117538, - "p90": 67.03999638557434, - "p95": 67.80800223350525, - "p99": 78.52800190448761 + "p50": 73.88799637556076, + "p90": 79.83999699354172, + "p95": 83.23200047016144, + "p99": 88.03199976682663 }, "roundtrip": { - "p50": 105.18400371074677, - "p90": 107.26399719715118, - "p95": 108.15999656915665, - "p99": 112.47999966144562 + "p50": 252.22399830818176, + "p90": 264.6079957485199, + "p95": 267.96799898147583, + "p99": 276.99199318885803 }, "isolatedSum": { - "p50": 121.66400253772736, - "p90": 124.70399588346481, - "p95": 126.72000378370285, - "p99": 142.40000396966934 + "p50": 169.15199905633926, + "p90": 187.23199516534805, + "p95": 194.75200027227402, + "p99": 207.10399746894836 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1189888, - "combineLogicalBytes": 1189888, - "fanoutMean": 5.1875, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 }, { "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 56.51199817657471, - "p90": 58.111999183893204, - "p95": 59.23200026154518, - "p99": 65.18399715423584 + "p50": 95.93600034713745, + "p90": 109.47199910879135, + "p95": 114.656001329422, + "p99": 128.63999605178833 }, "combine": { - "p50": 66.23999774456024, - "p90": 67.90400296449661, - "p95": 68.35199892520905, - "p99": 76.48000121116638 + "p50": 75.6480023264885, + "p90": 80.99199831485748, + "p95": 84.99199897050858, + "p99": 90.36800265312195 }, "roundtrip": { - "p50": 106.39999806880951, - "p90": 109.50399935245514, - "p95": 114.3679991364479, - "p99": 124.9919980764389 + "p50": 254.01601195335388, + "p90": 266.7520046234131, + "p95": 270.3999876976013, + "p99": 277.50399708747864 }, "isolatedSum": { - "p50": 122.75199592113495, - "p90": 126.01600214838982, - "p95": 127.58399918675423, - "p99": 141.66399836540222 + "p50": 171.58400267362595, + "p90": 190.46399742364883, + "p95": 199.64800029993057, + "p99": 219.00799870491028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 23, - "stragglerRank": 6, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9237,35 +9308,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.17599967122078, - "p90": 60.575999319553375, - "p95": 61.69600039720535, - "p99": 74.97599720954895 + "p50": 93.37600320577621, + "p90": 105.15200346708298, + "p95": 110.36799848079681, + "p99": 121.08799815177917 }, "combine": { - "p50": 67.03999638557434, - "p90": 69.31199878454208, - "p95": 76.57600194215775, - "p99": 78.91199737787247 + "p50": 77.47200131416321, + "p90": 83.74399691820145, + "p95": 87.55200356245041, + "p99": 94.33600306510925 }, "roundtrip": { - "p50": 119.4240003824234, - "p90": 124.79999661445618, - "p95": 125.88800489902496, - "p99": 129.43999469280243 + "p50": 254.33599948883057, + "p90": 267.1999931335449, + "p95": 272.0640003681183, + "p99": 284.8320007324219 }, "isolatedSum": { - "p50": 125.21599605679512, - "p90": 129.88799810409546, - "p95": 138.2720023393631, - "p99": 153.88799458742142 + "p50": 170.84800451993942, + "p90": 188.89600038528442, + "p95": 197.92000204324722, + "p99": 215.42400121688843 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, "recvTokensMax": 47, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9274,35 +9345,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 59.776000678539276, - "p90": 65.88800251483917, - "p95": 67.03999638557434, - "p99": 71.9359964132309 + "p50": 95.2640026807785, + "p90": 106.55999928712845, + "p95": 110.91200262308121, + "p99": 119.19999867677689 }, "combine": { - "p50": 68.12799721956253, - "p90": 77.44000107049942, - "p95": 77.79199630022049, - "p99": 89.6959975361824 + "p50": 79.96799796819687, + "p90": 85.56800335645676, + "p95": 88.128000497818, + "p99": 95.13600170612335 }, "roundtrip": { - "p50": 118.81600320339203, - "p90": 124.83199685811996, - "p95": 125.91999769210815, - "p99": 131.80799782276154 + "p50": 257.85601139068604, + "p90": 271.2000012397766, + "p95": 274.81600642204285, + "p99": 392.35201478004456 }, "isolatedSum": { - "p50": 127.9039978981018, - "p90": 143.3280035853386, - "p95": 144.83199268579483, - "p99": 161.6319939494133 + "p50": 175.23200064897537, + "p90": 192.1280026435852, + "p95": 199.0400031208992, + "p99": 214.33600038290024 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9605120, - "combineLogicalBytes": 9605120, - "fanoutMean": 5.234375, - "recvTokensMax": 93, - "stragglerRank": 5, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9311,35 +9382,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 68.03199648857117, - "p90": 69.2799985408783, - "p95": 71.23199850320816, - "p99": 76.22399926185608 + "p50": 94.59199756383896, + "p90": 105.24799674749374, + "p95": 109.82400178909302, + "p99": 120.25599926710129 }, "combine": { - "p50": 77.85599678754807, - "p90": 78.5600021481514, - "p95": 79.32800054550171, - "p99": 90.84799885749817 + "p50": 84.44800227880478, + "p90": 90.81599861383438, + "p95": 94.01600062847137, + "p99": 97.02400118112564 }, "roundtrip": { - "p50": 128.86400520801544, - "p90": 132.35199451446533, - "p95": 134.91199910640717, - "p99": 140.57600498199463 + "p50": 264.0320062637329, + "p90": 276.92800760269165, + "p95": 279.80801463127136, + "p99": 286.97600960731506 }, "isolatedSum": { - "p50": 145.88799327611923, - "p90": 147.8400006890297, - "p95": 150.55999904870987, - "p99": 167.07199811935425 + "p50": 179.03999984264374, + "p90": 196.06399536132812, + "p95": 203.8400024175644, + "p99": 217.28000044822693 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19367936, - "combineLogicalBytes": 19367936, - "fanoutMean": 5.27734375, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, "recvTokensMax": 182, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9348,35 +9419,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 86.30400151014328, - "p90": 88.86399865150452, - "p95": 89.82399851083755, - "p99": 103.20000350475311 + "p50": 101.02400183677673, + "p90": 111.7440015077591, + "p95": 115.32799899578094, + "p99": 122.97599762678146 }, "combine": { - "p50": 90.87999910116196, - "p90": 92.8959995508194, - "p95": 94.40000355243683, - "p99": 102.4319976568222 + "p50": 100.09600222110748, + "p90": 106.175996363163, + "p95": 108.15999656915665, + "p99": 114.72000181674957 }, "roundtrip": { - "p50": 157.31200575828552, - "p90": 161.56800091266632, - "p95": 163.03999722003937, - "p99": 172.8000044822693 + "p50": 282.943993806839, + "p90": 318.36798787117004, + "p95": 368.6720132827759, + "p99": 443.10399889945984 }, "isolatedSum": { - "p50": 177.18400061130524, - "p90": 181.7599982023239, - "p95": 184.22400206327438, - "p99": 205.63200116157532 + "p50": 201.12000405788422, + "p90": 217.9199978709221, + "p95": 223.4879955649376, + "p99": 237.69599944353104 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38535168, - "combineLogicalBytes": 38535168, - "fanoutMean": 5.25, - "recvTokensMax": 358, - "stragglerRank": 7, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9385,35 +9456,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 92.6079973578453, - "p90": 95.67999839782715, - "p95": 98.94400089979172, - "p99": 109.79200154542923 + "p50": 111.13599687814713, + "p90": 120.35199999809265, + "p95": 124.7360035777092, + "p99": 132.79999792575836 }, "combine": { - "p50": 114.62400108575821, - "p90": 115.35999923944473, - "p95": 116.06399714946747, - "p99": 126.78399682044983 + "p50": 119.45600062608719, + "p90": 125.91999769210815, + "p95": 129.37599420547485, + "p99": 135.93600690364838 }, "roundtrip": { - "p50": 194.94399428367615, - "p90": 198.33600521087646, - "p95": 199.39200580120087, - "p99": 228.70400547981262 + "p50": 304.3839931488037, + "p90": 316.3839876651764, + "p95": 320.51199674606323, + "p99": 329.24801111221313 }, "isolatedSum": { - "p50": 207.23199844360352, - "p90": 211.03999763727188, - "p95": 215.0079980492592, - "p99": 236.57599836587906 + "p50": 230.5919975042343, + "p90": 246.2719976902008, + "p95": 254.11199778318405, + "p99": 268.73600482940674 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76869632, - "combineLogicalBytes": 76869632, - "fanoutMean": 5.236328125, - "recvTokensMax": 688, - "stragglerRank": 7, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9421,47 +9492,48 @@ ] }, { - "id": "cx-29812ef0", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "b300_fdf55523", - "comparisonKey": "941e1d8581ae6b5b", + "id": "cx-cce0f9af", + "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_b0118480", + "comparisonKey": "e52bc4d8d01ec622", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:22.371406+00:00", + "generatedAt": "2026-06-29T13:51:44.095505+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-moderate", + "label": "GB200 EP8 · deepep · fp8", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -9469,58 +9541,58 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:120a8dc1dba92ca9", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285640709", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285640709", - "createdAt": "2026-06-27T09:49:22.371406+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.63999915122986, - "p90": 58.27200040221214, - "p95": 60.28800085186958, - "p99": 68.92800331115723 + "p50": 441.3119852542877, + "p90": 461.91999316215515, + "p95": 468.03200244903564, + "p99": 478.7200093269348 }, "combine": { - "p50": 65.47199934720993, - "p90": 66.27199798822403, - "p95": 66.39999896287918, - "p99": 69.21599805355072 + "p50": 74.94399696588516, + "p90": 81.24800026416779, + "p95": 84.73599702119827, + "p99": 89.72799777984619 }, "roundtrip": { - "p50": 106.84800148010254, - "p90": 112.19199746847153, - "p95": 113.08799684047699, - "p99": 127.96799838542938 + "p50": 487.61600255966187, + "p90": 510.047972202301, + "p95": 515.936017036438, + "p99": 529.5360088348389 }, "isolatedSum": { - "p50": 122.11199849843979, - "p90": 124.54399839043617, - "p95": 126.68799981474876, - "p99": 138.14400136470795 + "p50": 516.2559822201729, + "p90": 543.1679934263229, + "p95": 552.7679994702339, + "p99": 568.448007106781 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9530,34 +9602,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.86400085687637, - "p90": 58.59199911355972, - "p95": 60.99199876189232, - "p99": 64.41599875688553 + "p50": 438.7199878692627, + "p90": 463.9680087566376, + "p95": 599.9680161476135, + "p99": 2199.712038040161 }, "combine": { - "p50": 65.88800251483917, - "p90": 66.52799993753433, - "p95": 67.52000004053116, - "p99": 78.015998005867 + "p50": 76.80000364780426, + "p90": 83.80799740552902, + "p95": 88.67199718952179, + "p99": 111.90400272607803 }, "roundtrip": { - "p50": 105.92000186443329, - "p90": 112.76800185441971, - "p95": 113.56800049543381, - "p99": 130.3360015153885 + "p50": 486.7520034313202, + "p90": 508.1599950790405, + "p95": 513.4720206260681, + "p99": 523.6799716949463 }, "isolatedSum": { - "p50": 122.75200337171555, - "p90": 125.11999905109406, - "p95": 128.51199880242348, - "p99": 142.43199676275253 + "p50": 515.519991517067, + "p90": 547.7760061621666, + "p95": 688.6400133371353, + "p99": 2311.616040766239 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -9567,35 +9639,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 58.400001376867294, - "p90": 61.08799949288368, - "p95": 64.19199705123901, - "p99": 66.72000139951706 + "p50": 444.12800669670105, + "p90": 463.9360010623932, + "p95": 468.51199865341187, + "p99": 480.99198937416077 }, "combine": { - "p50": 66.17599725723267, - "p90": 67.48799979686737, - "p95": 68.54400038719177, - "p99": 76.31999999284744 + "p50": 77.7600035071373, + "p90": 83.77599716186523, + "p95": 85.9839990735054, + "p99": 90.40000289678574 }, "roundtrip": { - "p50": 107.96800255775452, - "p90": 113.0559965968132, - "p95": 115.64800143241882, - "p99": 122.01599776744843 + "p50": 488.319993019104, + "p90": 511.48802042007446, + "p95": 519.648015499115, + "p99": 591.7440056800842 }, "isolatedSum": { - "p50": 124.57599863409996, - "p90": 128.57599928975105, - "p95": 132.7359974384308, - "p99": 143.0400013923645 + "p50": 521.8880102038383, + "p90": 547.7119982242584, + "p95": 554.4959977269173, + "p99": 571.3919922709465 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 4, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9604,35 +9676,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 58.6559996008873, - "p90": 60.575999319553375, - "p95": 62.144000083208084, - "p99": 69.47200000286102 + "p50": 436.8000030517578, + "p90": 455.9360146522522, + "p95": 461.08800172805786, + "p99": 471.19998931884766 }, "combine": { - "p50": 68.00000369548798, - "p90": 76.28799974918365, - "p95": 76.54400169849396, - "p99": 77.60000228881836 + "p50": 79.6160027384758, + "p90": 85.63199639320374, + "p95": 89.15200084447861, + "p99": 96.3520035147667 }, "roundtrip": { - "p50": 117.95199662446976, - "p90": 123.6800029873848, - "p95": 125.11999905109406, - "p99": 143.2960033416748 + "p50": 485.4399859905243, + "p90": 505.5999755859375, + "p95": 511.2000107765198, + "p99": 520.5119848251343 }, "isolatedSum": { - "p50": 126.65600329637527, - "p90": 136.86399906873703, - "p95": 138.68800178170204, - "p99": 147.07200229167938 + "p50": 516.4160057902336, + "p90": 541.5680110454559, + "p95": 550.2400025725365, + "p99": 567.5519928336143 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9641,35 +9713,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 59.93599817156792, - "p90": 65.8240020275116, - "p95": 69.40799951553345, - "p99": 84.54400300979614 + "p50": 441.2800073623657, + "p90": 459.83999967575073, + "p95": 464.4800126552582, + "p99": 475.48800706863403 }, "combine": { - "p50": 68.12799721956253, - "p90": 76.12799853086472, - "p95": 76.7040029168129, - "p99": 77.85599678754807 + "p50": 82.11199939250946, + "p90": 87.77599781751633, + "p95": 90.59199690818787, + "p99": 96.25600278377533 }, "roundtrip": { - "p50": 122.97599762678146, - "p90": 126.43200159072876, - "p95": 127.32799351215363, - "p99": 132.47999548912048 + "p50": 490.27198553085327, + "p90": 511.1039876937866, + "p95": 517.2799825668335, + "p99": 531.1999917030334 }, "isolatedSum": { - "p50": 128.06399539113045, - "p90": 141.9520005583763, - "p95": 146.11200243234634, - "p99": 162.3999997973442 + "p50": 523.3920067548752, + "p90": 547.6159974932671, + "p95": 555.072009563446, + "p99": 571.7440098524094 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 7, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9678,35 +9750,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 74.43200051784515, - "p90": 77.37600058317184, - "p95": 78.17599922418594, - "p99": 81.4720019698143 + "p50": 438.84798884391785, + "p90": 456.6720128059387, + "p95": 461.760014295578, + "p99": 474.3039906024933 }, "combine": { - "p50": 77.72800326347351, - "p90": 78.52800190448761, - "p95": 78.68800312280655, - "p99": 90.68799763917923 + "p50": 87.16800063848495, + "p90": 93.79199892282486, + "p95": 97.75999933481216, + "p99": 103.93600165843964 }, "roundtrip": { - "p50": 129.2479932308197, - "p90": 133.59999656677246, - "p95": 134.8160058259964, - "p99": 141.63200557231903 + "p50": 489.984005689621, + "p90": 509.7919702529907, + "p95": 515.1360034942627, + "p99": 524.511992931366 }, "isolatedSum": { - "p50": 152.16000378131866, - "p90": 155.90400248765945, - "p95": 156.8640023469925, - "p99": 172.15999960899353 + "p50": 526.0159894824028, + "p90": 550.4640117287636, + "p95": 559.5200136303902, + "p99": 578.2399922609329 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 4, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9715,35 +9787,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 79.52000200748444, - "p90": 84.86399799585342, - "p95": 85.69599688053131, - "p99": 103.00800204277039 + "p50": 432.73600935935974, + "p90": 452.5119960308075, + "p95": 458.9439928531647, + "p99": 469.92000937461853 }, "combine": { - "p50": 90.68799763917923, - "p90": 92.22400188446045, - "p95": 93.72799843549728, - "p99": 102.27199643850327 + "p50": 101.47199779748917, + "p90": 107.90400207042694, + "p95": 109.82400178909302, + "p99": 115.7120019197464 }, "roundtrip": { - "p50": 158.07999670505524, - "p90": 162.84799575805664, - "p95": 163.68000209331512, - "p99": 179.80800569057465 + "p50": 508.54402780532837, + "p90": 527.3920297622681, + "p95": 533.1199765205383, + "p99": 546.8479990959167 }, "isolatedSum": { - "p50": 170.20799964666367, - "p90": 177.08799988031387, - "p95": 179.4239953160286, - "p99": 205.27999848127365 + "p50": 534.2080071568489, + "p90": 560.4159981012344, + "p95": 568.7679946422577, + "p99": 585.6320112943649 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9752,35 +9824,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 102.52799838781357, - "p90": 109.79200154542923, - "p95": 111.68000102043152, - "p99": 126.5919953584671 + "p50": 440.19201397895813, + "p90": 462.1120095252991, + "p95": 466.75199270248413, + "p99": 481.7279875278473 }, "combine": { - "p50": 126.3359934091568, - "p90": 127.55200266838074, - "p95": 127.74400413036346, - "p99": 134.3040019273758 + "p50": 120.64000219106674, + "p90": 127.71199643611908, + "p95": 130.94399869441986, + "p99": 136.63999736309052 }, "roundtrip": { - "p50": 209.50399339199066, - "p90": 217.3759937286377, - "p95": 220.92799842357635, - "p99": 231.55200481414795 + "p50": 521.664023399353, + "p90": 539.0080213546753, + "p95": 542.0799851417542, + "p99": 557.5680136680603 }, "isolatedSum": { - "p50": 228.86399179697037, - "p90": 237.34400421380997, - "p95": 239.42400515079498, - "p99": 260.8959972858429 + "p50": 560.8320161700249, + "p90": 589.8240059614182, + "p95": 597.695991396904, + "p99": 618.3679848909378 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9788,47 +9860,48 @@ ] }, { - "id": "cx-b49699d8", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "b300_4eade0db", - "comparisonKey": "4a0af3f3eb467c05", + "id": "cx-b9eefa34", + "identity": "gb200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb200_b0118480", + "comparisonKey": "b951703b8acd7879", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:28.247452+00:00", + "generatedAt": "2026-06-29T13:52:42.590848+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB200 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -9836,59 +9909,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:120a8dc1dba92ca9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285643524", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285643524", - "createdAt": "2026-06-27T09:49:28.247452+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.8560009598732, - "p90": 60.32000109553337, - "p95": 62.55999952554703, - "p99": 75.26399940252304 + "p50": 355.29598593711853, + "p90": 370.7199990749359, + "p95": 376.15999579429626, + "p99": 386.4000141620636 }, "combine": { - "p50": 66.01600348949432, - "p90": 66.39999896287918, - "p95": 66.56000018119812, - "p99": 69.82400268316269 + "p50": 71.26399874687195, + "p90": 76.06399804353714, + "p95": 78.46400141716003, + "p99": 82.71999657154083 }, "roundtrip": { - "p50": 107.77600109577179, - "p90": 113.98400366306305, - "p95": 115.10399729013443, - "p99": 123.80799651145935 + "p50": 395.04000544548035, + "p90": 405.69600462913513, + "p95": 407.74399042129517, + "p99": 414.5919978618622 }, "isolatedSum": { - "p50": 123.87200444936752, - "p90": 126.72000005841255, - "p95": 129.11999970674515, - "p99": 145.08800208568573 + "p50": 426.5599846839905, + "p90": 446.78399711847305, + "p95": 454.6239972114563, + "p99": 469.12001073360443 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9897,35 +9970,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 57.95200169086456, - "p90": 59.55199897289276, - "p95": 61.02399900555611, - "p99": 63.35999816656113 + "p50": 353.0240058898926, + "p90": 365.9200072288513, + "p95": 368.0959939956665, + "p99": 376.800000667572 }, "combine": { - "p50": 66.23999774456024, - "p90": 67.9360032081604, - "p95": 69.11999732255936, - "p99": 78.20799946784973 + "p50": 72.80000299215317, + "p90": 77.98399776220322, + "p95": 81.34400099515915, + "p99": 86.33600175380707 }, "roundtrip": { - "p50": 107.00800269842148, - "p90": 109.21599715948105, - "p95": 111.13599687814713, - "p99": 122.27199971675873 + "p50": 400.60800313949585, + "p90": 412.06398606300354, + "p95": 416.8640077114105, + "p99": 427.7760088443756 }, "isolatedSum": { - "p50": 124.1919994354248, - "p90": 127.48800218105316, - "p95": 130.14399632811546, - "p99": 141.56799763441086 + "p50": 425.82400888204575, + "p90": 443.90400499105453, + "p95": 449.43999499082565, + "p99": 463.1360024213791 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, "recvTokensMax": 14, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9934,35 +10007,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 59.74400043487549, - "p90": 61.85600161552429, - "p95": 63.968002796173096, - "p99": 73.44000041484833 + "p50": 359.26398634910583, + "p90": 388.92799615859985, + "p95": 398.5919952392578, + "p99": 414.5919978618622 }, "combine": { - "p50": 67.55200028419495, - "p90": 69.24799829721451, - "p95": 71.99999690055847, - "p99": 77.72800326347351 + "p50": 75.16799867153168, + "p90": 80.64000308513641, + "p95": 83.48800241947174, + "p99": 88.86399865150452 }, "roundtrip": { - "p50": 118.27199906110764, - "p90": 124.70400333404541, - "p95": 126.20800733566284, - "p99": 130.11200726032257 + "p50": 406.3360095024109, + "p90": 429.82399463653564, + "p95": 435.07200479507446, + "p99": 447.07199931144714 }, "isolatedSum": { - "p50": 127.29600071907043, - "p90": 131.1039999127388, - "p95": 135.96799969673157, - "p99": 151.16800367832184 + "p50": 434.4319850206375, + "p90": 469.56799924373627, + "p95": 482.07999765872955, + "p99": 503.4559965133667 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 4, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -9971,35 +10044,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 60.22400036454201, - "p90": 62.20800057053566, - "p95": 64.19199705123901, - "p99": 81.11999928951263 + "p50": 355.8720052242279, + "p90": 369.8880076408386, + "p95": 374.752014875412, + "p99": 403.4239947795868 }, "combine": { - "p50": 68.06399673223495, - "p90": 70.0799971818924, - "p95": 77.15199887752533, - "p99": 79.16799932718277 + "p50": 76.38400048017502, + "p90": 81.50400221347809, + "p95": 85.56800335645676, + "p99": 93.91999989748001 }, "roundtrip": { - "p50": 123.00799787044525, - "p90": 127.10399925708771, - "p95": 127.68000364303589, - "p99": 130.46400249004364 + "p50": 403.6799967288971, + "p90": 417.05599427223206, + "p95": 421.63199186325073, + "p99": 437.3759925365448 }, "isolatedSum": { - "p50": 128.28799709677696, - "p90": 132.28799775242805, - "p95": 141.34399592876434, - "p99": 160.2879986166954 + "p50": 432.2560057044029, + "p90": 451.3920098543167, + "p95": 460.32001823186874, + "p99": 497.3439946770668 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10008,35 +10081,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 65.95200300216675, - "p90": 69.72800195217133, - "p95": 72.41600006818771, - "p99": 76.09599828720093 + "p50": 354.8159897327423, + "p90": 369.6640133857727, + "p95": 374.5279908180237, + "p99": 380.22398948669434 }, "combine": { - "p50": 68.70400160551071, - "p90": 77.63200253248215, - "p95": 78.3040001988411, - "p99": 92.0960009098053 + "p50": 78.14399898052216, + "p90": 82.56000280380249, + "p95": 85.82399785518646, + "p99": 90.27200192213058 }, "roundtrip": { - "p50": 120.99199742078781, - "p90": 126.08000636100769, - "p95": 127.20000743865967, - "p99": 143.13599467277527 + "p50": 402.3680090904236, + "p90": 413.91998529434204, + "p95": 418.14398765563965, + "p99": 423.96798729896545 }, "isolatedSum": { - "p50": 134.65600460767746, - "p90": 147.36000448465347, - "p95": 150.7200002670288, - "p99": 168.19199919700623 + "p50": 432.95998871326447, + "p90": 452.2240161895752, + "p95": 460.35198867321014, + "p99": 470.4959914088249 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, "recvTokensMax": 91, - "stragglerRank": 3, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10045,35 +10118,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.11199742555618, - "p90": 74.43200051784515, - "p95": 75.42400062084198, - "p99": 80.25600016117096 + "p50": 353.66401076316833, + "p90": 369.2159950733185, + "p95": 373.53599071502686, + "p99": 381.1520040035248 }, "combine": { - "p50": 78.40000092983246, - "p90": 79.1039988398552, - "p95": 80.12799918651581, - "p99": 90.46400338411331 + "p50": 83.39200168848038, + "p90": 88.60799670219421, + "p95": 91.61599725484848, + "p99": 96.8639999628067 }, "roundtrip": { - "p50": 133.37600231170654, - "p90": 137.2160017490387, - "p95": 138.20800185203552, - "p99": 140.86399972438812 + "p50": 408.9280068874359, + "p90": 420.54399847984314, + "p95": 423.93600940704346, + "p99": 454.23999428749084 }, "isolatedSum": { - "p50": 148.51199835538864, - "p90": 153.53599935770035, - "p95": 155.5519998073578, - "p99": 170.72000354528427 + "p50": 437.0560124516487, + "p90": 457.8239917755127, + "p95": 465.15198796987534, + "p99": 478.0160039663315 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, "recvTokensMax": 178, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10082,35 +10155,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 88.44800293445587, - "p90": 90.7519981265068, - "p95": 91.51999652385712, - "p99": 95.32800316810608 + "p50": 365.34398794174194, + "p90": 422.87999391555786, + "p95": 429.3760061264038, + "p99": 442.30398535728455 }, "combine": { - "p50": 92.3520028591156, - "p90": 101.24800354242325, - "p95": 101.82400047779083, - "p99": 115.07199704647064 + "p50": 100.44799745082855, + "p90": 105.8880016207695, + "p95": 108.38399827480316, + "p99": 115.42399972677231 }, "roundtrip": { - "p50": 161.40800714492798, - "p90": 165.3759926557541, - "p95": 166.72000288963318, - "p99": 173.21600019931793 + "p50": 439.10399079322815, + "p90": 491.10400676727295, + "p95": 499.424010515213, + "p99": 508.4480047225952 }, "isolatedSum": { - "p50": 180.80000579357147, - "p90": 192.00000166893005, - "p95": 193.34399700164795, - "p99": 210.40000021457672 + "p50": 465.7919853925705, + "p90": 528.7679955363274, + "p95": 537.760004401207, + "p99": 557.7279850840569 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 4, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10119,35 +10192,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.81599926948547, - "p90": 97.63199836015701, - "p95": 99.80800002813339, - "p99": 106.84800148010254 + "p50": 355.29598593711853, + "p90": 372.8640079498291, + "p95": 379.39199805259705, + "p99": 402.8159976005554 }, "combine": { - "p50": 115.23199826478958, - "p90": 116.12799763679504, - "p95": 117.3119992017746, - "p99": 179.83999848365784 + "p50": 119.23199892044067, + "p90": 124.03199821710587, + "p95": 126.68800354003906, + "p99": 130.52800297737122 }, "roundtrip": { - "p50": 193.53599846363068, - "p90": 199.16799664497375, - "p95": 200.41599869728088, - "p99": 207.48800039291382 + "p50": 448.3200013637543, + "p90": 460.5120122432709, + "p95": 463.03999423980713, + "p99": 469.760000705719 }, "isolatedSum": { - "p50": 210.04799753427505, - "p90": 213.75999599695206, - "p95": 217.119999229908, - "p99": 286.6879999637604 + "p50": 474.5279848575592, + "p90": 496.89600616693497, + "p95": 506.0800015926361, + "p99": 533.3440005779266 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 6, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10155,47 +10228,48 @@ ] }, { - "id": "cx-686fd558", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "b300_f1ea991b", - "comparisonKey": "72d679cfb4846306", + "id": "cx-8173576b", + "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_7e970144", + "comparisonKey": "04982d471558d8a8", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:52.585093+00:00", + "generatedAt": "2026-06-29T13:50:48.119671+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_02", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -10203,59 +10277,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285622991", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285622991", - "createdAt": "2026-06-27T09:48:52.585093+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 58.01599845290184, - "p90": 59.58399921655655, - "p95": 60.896001756191254, - "p99": 72.35199958086014 + "p50": 69.11999732255936, + "p90": 79.16799932718277, + "p95": 82.40000158548355, + "p99": 91.20000153779984 }, "combine": { - "p50": 66.17599725723267, - "p90": 66.880002617836, - "p95": 67.4239993095398, - "p99": 80.73599636554718 + "p50": 71.42399996519089, + "p90": 75.52000135183334, + "p95": 79.8719972372055, + "p99": 84.3840017914772 }, "roundtrip": { - "p50": 107.55199939012527, - "p90": 113.56800049543381, - "p95": 114.9120032787323, - "p99": 131.8719983100891 + "p50": 202.4639993906021, + "p90": 214.9440050125122, + "p95": 218.49599480628967, + "p99": 231.83999955654144 }, "isolatedSum": { - "p50": 124.1919957101345, - "p90": 126.46400183439255, - "p95": 128.32000106573105, - "p99": 153.08799594640732 + "p50": 140.54399728775024, + "p90": 154.6880006790161, + "p95": 162.27199882268906, + "p99": 175.58400332927704 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, "recvTokensMax": 7, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10264,35 +10338,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 58.27200040221214, - "p90": 59.99999865889549, - "p95": 60.7680007815361, - "p99": 74.0479975938797 + "p50": 69.31199878454208, + "p90": 78.33600044250488, + "p95": 81.63200318813324, + "p99": 88.51200342178345 }, "combine": { - "p50": 66.59200042486191, - "p90": 67.52000004053116, - "p95": 68.64000111818314, - "p99": 70.88000327348709 + "p50": 72.28799909353256, + "p90": 76.31999999284744, + "p95": 78.5600021481514, + "p99": 82.56000280380249 }, "roundtrip": { - "p50": 107.744000852108, - "p90": 109.79200154542923, - "p95": 111.29599809646606, - "p99": 121.72800302505493 + "p50": 203.0079960823059, + "p90": 215.55200219154358, + "p95": 218.6560034751892, + "p99": 230.0799936056137 }, "isolatedSum": { - "p50": 124.86400082707405, - "p90": 127.51999869942665, - "p95": 129.40800189971924, - "p99": 144.9280008673668 + "p50": 141.59999787807465, + "p90": 154.65600043535233, + "p95": 160.19200533628464, + "p99": 171.07200622558594 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 4, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10301,35 +10375,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 61.664000153541565, - "p90": 82.40000158548355, - "p95": 86.36800199747086, - "p99": 92.92799979448318 + "p50": 70.01599669456482, + "p90": 79.48800176382065, + "p95": 82.49600231647491, + "p99": 96.09600156545639 }, "combine": { - "p50": 68.54400038719177, - "p90": 76.4160007238388, - "p95": 77.15199887752533, - "p99": 79.29600030183792 + "p50": 74.94399696588516, + "p90": 80.03199845552444, + "p95": 83.23200047016144, + "p99": 87.2960016131401 }, "roundtrip": { - "p50": 124.15999919176102, - "p90": 127.36000120639801, - "p95": 128.48000228405, - "p99": 144.57599818706512 + "p50": 207.64799416065216, + "p90": 219.55199539661407, + "p95": 222.59199619293213, + "p99": 229.40799593925476 }, "isolatedSum": { - "p50": 130.20800054073334, - "p90": 158.81600230932236, - "p95": 163.52000087499619, - "p99": 172.2240000963211 + "p50": 144.95999366044998, + "p90": 159.5200002193451, + "p95": 165.72800278663635, + "p99": 183.3920031785965 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 5, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10338,35 +10412,35 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 60.32000109553337, - "p90": 62.49599903821945, - "p95": 64.00000303983688, - "p99": 79.23199981451035 + "p50": 70.17599791288376, + "p90": 79.71200346946716, + "p95": 82.5280025601387, + "p99": 91.23200178146362 }, "combine": { - "p50": 68.44799965620041, - "p90": 77.44000107049942, - "p95": 78.23999971151352, - "p99": 79.55200225114822 + "p50": 76.1599987745285, + "p90": 82.04799890518188, + "p95": 84.6719965338707, + "p99": 94.81599926948547 }, "roundtrip": { - "p50": 121.44000083208084, - "p90": 126.43200159072876, - "p95": 127.61600315570831, - "p99": 132.03200697898865 + "p50": 207.39200711250305, + "p90": 218.36799383163452, + "p95": 222.01600670814514, + "p99": 227.03999280929565 }, "isolatedSum": { - "p50": 128.76800075173378, - "p90": 139.93600010871887, - "p95": 142.2400027513504, - "p99": 158.78400206565857 + "p50": 146.33599668741226, + "p90": 161.76000237464905, + "p95": 167.1999990940094, + "p99": 186.0480010509491 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10375,35 +10449,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 66.78400188684464, - "p90": 73.11999797821045, - "p95": 74.81600344181061, - "p99": 79.19999957084656 + "p50": 70.97599655389786, + "p90": 80.48000186681747, + "p95": 83.96799862384796, + "p99": 99.61599856615067 }, "combine": { - "p50": 77.40800082683563, - "p90": 78.72000336647034, - "p95": 79.16799932718277, - "p99": 81.50400221347809 + "p50": 78.23999971151352, + "p90": 83.5840031504631, + "p95": 86.56000345945358, + "p99": 91.00800007581711 }, "roundtrip": { - "p50": 120.64000219106674, - "p90": 123.77600371837616, - "p95": 125.95200538635254, - "p99": 143.0719941854477 + "p50": 209.75999534130096, + "p90": 221.0559993982315, + "p95": 223.36000204086304, + "p99": 232.35200345516205 }, "isolatedSum": { - "p50": 144.19200271368027, - "p90": 151.8400013446808, - "p95": 153.98400276899338, - "p99": 160.70400178432465 + "p50": 149.21599626541138, + "p90": 164.06400501728058, + "p95": 170.52800208330154, + "p99": 190.62399864196777 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 7, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10412,35 +10486,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 70.20799815654755, - "p90": 72.95999675989151, - "p95": 76.48000121116638, - "p99": 82.94399827718735 + "p50": 73.69600236415863, + "p90": 82.49600231647491, + "p95": 85.88799834251404, + "p99": 92.92799979448318 }, "combine": { - "p50": 78.78399640321732, - "p90": 79.80799674987793, - "p95": 80.99199831485748, - "p99": 89.91999924182892 + "p50": 83.0719992518425, + "p90": 88.128000497818, + "p95": 91.07200056314468, + "p99": 95.16800194978714 }, "roundtrip": { - "p50": 133.12000036239624, - "p90": 137.7280056476593, - "p95": 138.36799561977386, - "p99": 155.10399639606476 + "p50": 214.62400257587433, + "p90": 225.92000663280487, + "p95": 229.5680046081543, + "p99": 237.5359982252121 }, "isolatedSum": { - "p50": 148.99199455976486, - "p90": 152.76799350976944, - "p95": 157.47199952602386, - "p99": 172.86399751901627 + "p50": 156.76800161600113, + "p90": 170.6240028142929, + "p95": 176.95999890565872, + "p99": 188.09600174427032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 6, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10449,35 +10523,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 89.66399729251862, - "p90": 91.93599969148636, - "p95": 92.8959995508194, - "p99": 102.30399668216705 + "p50": 82.14399963617325, + "p90": 90.4960036277771, + "p95": 93.63199770450592, + "p99": 100.25600343942642 }, "combine": { - "p50": 92.06400066614151, - "p90": 96.67199850082397, - "p95": 101.72799974679947, - "p99": 103.7760004401207 + "p50": 98.4639972448349, + "p90": 104.16000336408615, + "p95": 107.16799646615982, + "p99": 111.55200004577637 }, "roundtrip": { - "p50": 161.6320013999939, - "p90": 165.43999314308167, - "p95": 166.52800142765045, - "p99": 182.68799781799316 + "p50": 232.96000063419342, + "p90": 243.8720017671585, + "p95": 249.6960014104843, + "p99": 258.6880028247833 }, "isolatedSum": { - "p50": 181.72799795866013, - "p90": 188.60799819231033, - "p95": 194.62399929761887, - "p99": 206.07999712228775 + "p50": 180.60799688100815, + "p90": 194.65600699186325, + "p95": 200.79999417066574, + "p99": 211.8080034852028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10486,35 +10560,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.94400024414062, - "p90": 97.4079966545105, - "p95": 100.00000149011612, - "p99": 108.96000266075134 + "p50": 93.21600198745728, + "p90": 99.10400211811066, + "p95": 102.08000242710114, + "p99": 108.15999656915665 }, "combine": { - "p50": 115.58400094509125, - "p90": 117.21599847078323, - "p95": 118.56000125408173, - "p99": 138.3039951324463 + "p50": 117.72800236940384, + "p90": 122.30399996042252, + "p95": 125.82400441169739, + "p99": 130.68799674510956 }, "roundtrip": { - "p50": 197.2160041332245, - "p90": 202.39999890327454, - "p95": 204.0960043668747, - "p99": 209.6640020608902 + "p50": 253.6959946155548, + "p90": 265.1520073413849, + "p95": 268.0639922618866, + "p99": 276.2880027294159 }, "isolatedSum": { - "p50": 210.52800118923187, - "p90": 214.62399512529373, - "p95": 218.56000274419785, - "p99": 247.26399779319763 + "p50": 210.94400435686111, + "p90": 221.40800207853317, + "p95": 227.90400683879852, + "p99": 238.8479933142662 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10522,28 +10596,28 @@ ] }, { - "id": "cx-f0dd83d8", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_c1ad910f", - "comparisonKey": "80e2eefb7447672f", + "id": "cx-7963bbb8", + "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_1849fea1", + "comparisonKey": "b109657d01c98324", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:41:08.828331+00:00", + "generatedAt": "2026-06-29T13:56:37.073274+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "diagnostic", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm)", + "label": "GB200 EP8 · deepep · fp8 LL", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -10554,74 +10628,75 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254469772", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", - "createdAt": "2026-06-26T17:41:08.828331+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 56.992001831531525, - "p90": 59.039998799562454, - "p95": 61.824001371860504, - "p99": 73.44000041484833 + "p50": 79.74400371313095, + "p90": 300.6080090999603, + "p95": 342.272013425827, + "p99": 381.53600692749023 }, "combine": { - "p50": 66.3359984755516, - "p90": 67.4239993095398, - "p95": 68.15999746322632, - "p99": 77.47200131416321 + "p50": 115.4559999704361, + "p90": 120.7360029220581, + "p95": 122.36800044775009, + "p99": 124.41600114107132 }, "roundtrip": { - "p50": 106.81600123643875, - "p90": 113.08799684047699, - "p95": 114.23999816179276, - "p99": 135.6479972600937 + "p50": 1608.9279651641846, + "p90": 1614.2079830169678, + "p95": 1618.5280084609985, + "p99": 1940.4159784317017 }, "isolatedSum": { - "p50": 123.32800030708313, - "p90": 126.46399810910225, - "p95": 129.98399883508682, - "p99": 150.91200172901154 + "p50": 195.20000368356705, + "p90": 421.34401202201843, + "p95": 464.6400138735771, + "p99": 505.95200806856155 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, + "dispatchLogicalBytes": 315392, "combineLogicalBytes": 630784, "fanoutMean": 5.5, - "recvTokensMax": 7, + "recvTokensMax": 14, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -10631,34 +10706,34 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 56.992001831531525, - "p90": 58.78400057554245, - "p95": 60.92799827456474, - "p99": 73.21599870920181 + "p50": 84.60800349712372, + "p90": 295.5839931964874, + "p95": 336.9919955730438, + "p99": 394.0800130367279 }, "combine": { - "p50": 67.32799857854843, - "p90": 69.11999732255936, - "p95": 70.65600156784058, - "p99": 79.93599772453308 + "p50": 117.08799749612808, + "p90": 121.98399752378464, + "p95": 123.45600128173828, + "p99": 128.25599312782288 }, "roundtrip": { - "p50": 106.9440022110939, - "p90": 109.40799862146378, - "p95": 110.88000237941742, - "p99": 119.39200013875961 + "p50": 1611.2960577011108, + "p90": 1616.6399717330933, + "p95": 1622.0159530639648, + "p99": 1949.6959447860718 }, "isolatedSum": { - "p50": 124.32000041007996, - "p90": 127.9039978981018, - "p95": 131.58399984240532, - "p99": 153.1519964337349 + "p50": 201.6960009932518, + "p90": 417.56799072027206, + "p95": 460.4479968547821, + "p99": 522.3360061645508 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, + "dispatchLogicalBytes": 616448, "combineLogicalBytes": 1232896, "fanoutMean": 5.375, - "recvTokensMax": 13, + "recvTokensMax": 21, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -10668,34 +10743,34 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 57.792000472545624, - "p90": 59.39200147986412, - "p95": 61.28000095486641, - "p99": 68.09599697589874 + "p50": 87.80799806118011, + "p90": 282.01600909233093, + "p95": 331.32800459861755, + "p99": 385.18399000167847 }, "combine": { - "p50": 67.80800223350525, - "p90": 69.66400146484375, - "p95": 76.99199765920639, - "p99": 78.75200361013412 + "p50": 120.06399780511856, + "p90": 125.18399953842163, + "p95": 127.13600695133209, + "p99": 131.071999669075 }, "roundtrip": { - "p50": 116.22399836778641, - "p90": 122.68800288438797, - "p95": 124.35200065374374, - "p99": 127.93600559234619 + "p50": 1614.8799657821655, + "p90": 1622.2399473190308, + "p95": 1897.3759412765503, + "p99": 1955.23202419281 }, "isolatedSum": { - "p50": 125.60000270605087, - "p90": 129.05600294470787, - "p95": 138.2719986140728, - "p99": 146.84800058603287 + "p50": 207.87199586629868, + "p90": 407.20000863075256, + "p95": 458.46401154994965, + "p99": 516.2559896707535 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, + "dispatchLogicalBytes": 1240064, "combineLogicalBytes": 2480128, "fanoutMean": 5.40625, - "recvTokensMax": 29, + "recvTokensMax": 39, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -10705,34 +10780,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.29600074887276, - "p90": 61.15199998021126, - "p95": 62.39999830722809, - "p99": 68.1919977068901 + "p50": 92.19200164079666, + "p90": 323.4559893608093, + "p95": 354.6240031719208, + "p99": 389.18399810791016 }, "combine": { - "p50": 68.38399916887283, - "p90": 77.31200009584427, - "p95": 77.72800326347351, - "p99": 78.78399640321732 + "p50": 126.14400684833527, + "p90": 130.62399625778198, + "p95": 132.09599256515503, + "p99": 135.6160044670105 }, "roundtrip": { - "p50": 120.25599926710129, - "p90": 125.82400441169739, - "p95": 126.75200402736664, - "p99": 133.44000279903412 + "p50": 1621.4079856872559, + "p90": 1628.5439729690552, + "p95": 1897.92001247406, + "p99": 1957.4719667434692 }, "isolatedSum": { - "p50": 127.67999991774559, - "p90": 138.46400007605553, - "p95": 140.1280015707016, - "p99": 146.97599411010742 + "p50": 218.33600848913193, + "p90": 454.0799856185913, + "p95": 486.7199957370758, + "p99": 524.8000025749207 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, + "dispatchLogicalBytes": 2487296, "combineLogicalBytes": 4974592, "fanoutMean": 5.421875, - "recvTokensMax": 47, + "recvTokensMax": 74, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -10742,35 +10817,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 62.78400123119354, - "p90": 69.023996591568, - "p95": 71.03999704122543, - "p99": 76.73600316047668 + "p50": 99.39199686050415, + "p90": 334.1760039329529, + "p95": 375.0079870223999, + "p99": 404.4800102710724 }, "combine": { - "p50": 77.2479996085167, - "p90": 78.5600021481514, - "p95": 78.72000336647034, - "p99": 80.86399734020233 + "p50": 137.95199990272522, + "p90": 143.19999516010284, + "p95": 145.02400159835815, + "p99": 151.13599598407745 }, "roundtrip": { - "p50": 119.61600184440613, - "p90": 122.72000312805176, - "p95": 124.35200065374374, - "p99": 131.29599392414093 + "p50": 1634.7839832305908, + "p90": 1642.0799493789673, + "p95": 1649.2480039596558, + "p99": 1971.295952796936 }, "isolatedSum": { - "p50": 140.03200083971024, - "p90": 147.5839987397194, - "p95": 149.76000040769577, - "p99": 157.60000050067902 + "p50": 237.34399676322937, + "p90": 477.3759990930557, + "p95": 520.031988620758, + "p99": 555.6160062551498 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, + "dispatchLogicalBytes": 4960256, "combineLogicalBytes": 9920512, "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, + "recvTokensMax": 145, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10779,35 +10854,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 69.24799829721451, - "p90": 70.91200351715088, - "p95": 73.69600236415863, - "p99": 81.69600367546082 + "p50": 118.30399930477142, + "p90": 343.23200583457947, + "p95": 377.82400846481323, + "p99": 433.1200122833252 }, "combine": { - "p50": 78.59200239181519, - "p90": 79.80799674987793, - "p95": 80.73599636554718, - "p99": 90.94399958848953 + "p50": 145.21600306034088, + "p90": 157.95199573040009, + "p95": 170.6559956073761, + "p99": 204.8960030078888 }, "roundtrip": { - "p50": 130.68799674510956, - "p90": 135.23200154304504, - "p95": 136.51199638843536, - "p99": 140.47999680042267 + "p50": 1674.9759912490845, + "p90": 1684.4160556793213, + "p95": 1693.9200162887573, + "p99": 2003.2958984375 }, "isolatedSum": { - "p50": 147.8400006890297, - "p90": 150.7200002670288, - "p95": 154.4319987297058, - "p99": 172.64000326395035 + "p50": 263.5200023651123, + "p90": 501.18400156497955, + "p95": 548.4800040721893, + "p99": 638.016015291214 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, + "dispatchLogicalBytes": 9863168, "combineLogicalBytes": 19726336, "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "recvTokensMax": 287, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10816,35 +10891,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 82.49600231647491, - "p90": 92.70399808883667, - "p95": 95.0080007314682, - "p99": 99.45599734783173 + "p50": 160.3199988603592, + "p90": 346.24001383781433, + "p95": 395.6800103187561, + "p99": 445.6000030040741 }, "combine": { - "p50": 92.25600212812424, - "p90": 100.09600222110748, - "p95": 102.36799716949463, - "p99": 106.65600001811981 + "p50": 190.7840073108673, + "p90": 201.4079988002777, + "p95": 204.79999482631683, + "p99": 210.81599593162537 }, "roundtrip": { - "p50": 158.65600109100342, - "p90": 163.00800442695618, - "p95": 164.19200599193573, - "p99": 169.50400173664093 + "p50": 1793.12002658844, + "p90": 1805.2159547805786, + "p95": 2026.304006576538, + "p99": 2088.223934173584 }, "isolatedSum": { - "p50": 174.75200444459915, - "p90": 192.80000030994415, - "p95": 197.37599790096283, - "p99": 206.11199736595154 + "p50": 351.1040061712265, + "p90": 547.648012638092, + "p95": 600.4800051450729, + "p99": 656.4159989356995 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, + "dispatchLogicalBytes": 19496960, "combineLogicalBytes": 38993920, "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "recvTokensMax": 564, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10853,35 +10928,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 93.91999989748001, - "p90": 95.83999961614609, - "p95": 98.04800152778625, - "p99": 104.99200224876404 + "p50": 347.80800342559814, + "p90": 371.5839982032776, + "p95": 581.8560123443604, + "p99": 651.0080099105835 }, "combine": { - "p50": 115.35999923944473, - "p90": 115.93600362539291, - "p95": 116.60800129175186, - "p99": 119.45600062608719 + "p50": 349.37599301338196, + "p90": 364.0640079975128, + "p95": 370.65601348876953, + "p99": 383.13600420951843 }, "roundtrip": { - "p50": 192.51200556755066, - "p90": 198.88000190258026, - "p95": 199.48799908161163, - "p99": 209.47200059890747 + "p50": 2056.3199520111084, + "p90": 2088.3519649505615, + "p95": 2184.4160556793213, + "p99": 2264.4801139831543 }, "isolatedSum": { - "p50": 209.27999913692474, - "p90": 211.776003241539, - "p95": 214.65600281953812, - "p99": 224.44800287485123 + "p50": 697.1839964389801, + "p90": 735.6480062007904, + "p95": 952.5120258331299, + "p99": 1034.144014120102 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "recvTokensMax": 1104, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10889,107 +10964,108 @@ ] }, { - "id": "cx-dede7717", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", - "colorKey": "b300_0622d929", - "comparisonKey": "c4ede73885f09b56", + "id": "cx-2bd302a0", + "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb200_653f32d1", + "comparisonKey": "63dcfe2a21df3808", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:12:16.850895+00:00", + "generatedAt": "2026-06-29T13:57:26.133416+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_17", - "sku": "b300", + "publicationStatus": "diagnostic", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · balanced", + "label": "GB200 EP8 · deepep · fp8 LL", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254508907", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", - "createdAt": "2026-06-26T18:12:16.850895+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 1, "globalTokens": 8, "dispatch": { - "p50": 57.69599974155426, - "p90": 60.06399914622307, - "p95": 61.664000153541565, - "p99": 77.7600035071373 + "p50": 79.8719972372055, + "p90": 307.6480031013489, + "p95": 347.26399183273315, + "p99": 385.5679929256439 }, "combine": { - "p50": 68.03199648857117, - "p90": 69.76000219583511, - "p95": 76.92799717187881, - "p99": 78.52800190448761 + "p50": 119.4240003824234, + "p90": 124.83199685811996, + "p95": 126.88000500202179, + "p99": 131.77600502967834 }, "roundtrip": { - "p50": 107.80800133943558, - "p90": 110.59200018644333, - "p95": 112.19199746847153, - "p99": 128.76799702644348 + "p50": 1610.8160018920898, + "p90": 1618.5920238494873, + "p95": 1887.55202293396, + "p99": 1946.0480213165283 }, "isolatedSum": { - "p50": 125.72799623012543, - "p90": 129.82400134205818, - "p95": 138.59199732542038, - "p99": 156.2880054116249 + "p50": 199.2959976196289, + "p90": 432.47999995946884, + "p95": 474.14399683475494, + "p99": 517.3439979553223 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -10998,35 +11074,35 @@ "tokensPerRank": 2, "globalTokens": 16, "dispatch": { - "p50": 58.559998869895935, - "p90": 60.15999987721443, - "p95": 61.664000153541565, - "p99": 72.76800274848938 + "p50": 82.04799890518188, + "p90": 311.7760121822357, + "p95": 348.5119938850403, + "p99": 394.9120044708252 }, "combine": { - "p50": 68.25599819421768, - "p90": 76.86399668455124, - "p95": 77.53600180149078, - "p99": 79.9039974808693 + "p50": 120.19199877977371, + "p90": 124.89599734544754, + "p95": 126.46399438381195, + "p99": 128.92800569534302 }, "roundtrip": { - "p50": 116.22399836778641, - "p90": 122.11199849843979, - "p95": 123.07199835777283, - "p99": 127.9039978981018 + "p50": 1612.9599809646606, + "p90": 1620.8959817886353, + "p95": 1906.6879749298096, + "p99": 1956.32004737854 }, "isolatedSum": { - "p50": 126.81599706411362, - "p90": 137.02399656176567, - "p95": 139.20000195503235, - "p99": 152.67200022935867 + "p50": 202.2399976849556, + "p90": 436.67200952768326, + "p95": 474.97598826885223, + "p99": 523.8400101661682 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 4, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11035,35 +11111,35 @@ "tokensPerRank": 4, "globalTokens": 32, "dispatch": { - "p50": 58.59199911355972, - "p90": 60.5119988322258, - "p95": 61.664000153541565, - "p99": 69.66400146484375 + "p50": 87.71199733018875, + "p90": 322.56001234054565, + "p95": 345.37601470947266, + "p99": 387.3920142650604 }, "combine": { - "p50": 70.01599669456482, - "p90": 78.40000092983246, - "p95": 78.52800190448761, - "p99": 81.216000020504 + "p50": 122.72000312805176, + "p90": 127.77599692344666, + "p95": 129.31199371814728, + "p99": 143.93599331378937 }, "roundtrip": { - "p50": 121.66400253772736, - "p90": 125.37600100040436, - "p95": 127.20000743865967, - "p99": 135.74400544166565 + "p50": 1616.703987121582, + "p90": 1624.2239475250244, + "p95": 1915.2319431304932, + "p99": 2350.0161170959473 }, "isolatedSum": { - "p50": 128.60799580812454, - "p90": 138.91199976205826, - "p95": 140.19200205802917, - "p99": 150.88000148534775 + "p50": 210.4320004582405, + "p90": 450.3360092639923, + "p95": 474.68800842761993, + "p99": 531.3280075788498 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 4, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11072,34 +11148,34 @@ "tokensPerRank": 8, "globalTokens": 64, "dispatch": { - "p50": 59.61599946022034, - "p90": 61.95199862122536, - "p95": 63.90400230884552, - "p99": 71.52000069618225 + "p50": 91.16800129413605, + "p90": 333.1199884414673, + "p95": 358.88001322746277, + "p99": 392.5119936466217 }, "combine": { - "p50": 77.40800082683563, - "p90": 78.65600287914276, - "p95": 78.94399762153625, - "p99": 89.28000181913376 + "p50": 130.23999333381653, + "p90": 135.1040005683899, + "p95": 136.48000359535217, + "p99": 138.33600282669067 }, "roundtrip": { - "p50": 119.80800330638885, - "p90": 122.65600264072418, - "p95": 124.83199685811996, - "p99": 136.83199882507324 + "p50": 1622.5279569625854, + "p90": 1628.383994102478, + "p95": 1637.120008468628, + "p99": 1966.271996498108 }, "isolatedSum": { - "p50": 137.02400028705597, - "p90": 140.60800150036812, - "p95": 142.84799993038177, - "p99": 160.800002515316 + "p50": 221.40799462795258, + "p90": 468.2239890098572, + "p95": 495.36001682281494, + "p99": 530.8479964733124 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -11109,35 +11185,35 @@ "tokensPerRank": 16, "globalTokens": 128, "dispatch": { - "p50": 73.91999661922455, - "p90": 76.09599828720093, - "p95": 78.04799824953079, - "p99": 85.24800091981888 + "p50": 97.21600264310837, + "p90": 331.743985414505, + "p95": 363.48798871040344, + "p99": 419.3280041217804 }, "combine": { - "p50": 78.40000092983246, - "p90": 79.1039988398552, - "p95": 79.39200103282928, - "p99": 85.08799970149994 + "p50": 141.7279988527298, + "p90": 147.23199605941772, + "p95": 149.6960073709488, + "p99": 274.97598528862 }, "roundtrip": { - "p50": 121.44000083208084, - "p90": 126.94400548934937, - "p95": 128.92800569534302, - "p99": 145.31199634075165 + "p50": 1634.6240043640137, + "p90": 1641.0239934921265, + "p95": 1651.8720388412476, + "p99": 1987.328052520752 }, "isolatedSum": { - "p50": 152.319997549057, - "p90": 155.19999712705612, - "p95": 157.43999928236008, - "p99": 170.33600062131882 + "p50": 238.94400149583817, + "p90": 478.97598147392273, + "p95": 513.1839960813522, + "p99": 694.3039894104004 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 5, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11146,35 +11222,35 @@ "tokensPerRank": 32, "globalTokens": 256, "dispatch": { - "p50": 71.07199728488922, - "p90": 72.86400347948074, - "p95": 73.47200065851212, - "p99": 82.40000158548355 + "p50": 113.69600147008896, + "p90": 342.848002910614, + "p95": 374.2400109767914, + "p99": 416.48000478744507 }, "combine": { - "p50": 80.06399869918823, - "p90": 81.37600123882294, - "p95": 81.82399719953537, - "p99": 89.88799899816513 + "p50": 142.97600090503693, + "p90": 149.1519957780838, + "p95": 152.16000378131866, + "p99": 173.0239987373352 }, "roundtrip": { - "p50": 134.36800241470337, - "p90": 141.56800508499146, - "p95": 143.99999380111694, - "p99": 148.80000054836273 + "p50": 1678.2399415969849, + "p90": 1688.3200407028198, + "p95": 1698.6240148544312, + "p99": 2008.512020111084 }, "isolatedSum": { - "p50": 151.13599598407745, - "p90": 154.24000471830368, - "p95": 155.29599785804749, - "p99": 172.28800058364868 + "p50": 256.6720023751259, + "p90": 491.9999986886978, + "p95": 526.40001475811, + "p99": 589.5040035247803 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 7, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11183,35 +11259,35 @@ "tokensPerRank": 64, "globalTokens": 512, "dispatch": { - "p50": 85.34400165081024, - "p90": 89.63199704885483, - "p95": 91.93599969148636, - "p99": 96.57599776983261 + "p50": 162.4000072479248, + "p90": 326.4639973640442, + "p95": 385.6959939002991, + "p99": 463.48801255226135 }, "combine": { - "p50": 93.98400038480759, - "p90": 103.10400277376175, - "p95": 103.29599678516388, - "p99": 105.92000186443329 + "p50": 201.88799500465393, + "p90": 211.39200031757355, + "p95": 213.82400393486023, + "p99": 217.8560048341751 }, "roundtrip": { - "p50": 169.3439930677414, - "p90": 172.89599776268005, - "p95": 175.87199807167053, - "p99": 196.16000354290009 + "p50": 1803.5839796066284, + "p90": 1823.7119913101196, + "p95": 2049.0241050720215, + "p99": 2119.391918182373 }, "isolatedSum": { - "p50": 179.32800203561783, - "p90": 192.73599982261658, - "p95": 195.23199647665024, - "p99": 202.4959996342659 + "p50": 364.28800225257874, + "p90": 537.8559976816177, + "p95": 599.5199978351593, + "p99": 681.3440173864365 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 6, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11220,35 +11296,35 @@ "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 107.13600367307663, - "p90": 109.79200154542923, - "p95": 111.7120012640953, - "p99": 131.96800649166107 + "p50": 351.00799798965454, + "p90": 390.04799723625183, + "p95": 620.9920048713684, + "p99": 682.0160150527954 }, "combine": { - "p50": 130.49599528312683, - "p90": 139.52000439167023, - "p95": 139.8719996213913, - "p99": 140.54399728775024 + "p50": 355.23200035095215, + "p90": 373.4720051288605, + "p95": 378.9120018482208, + "p99": 387.36000657081604 }, "roundtrip": { - "p50": 231.1680018901825, - "p90": 235.00800132751465, - "p95": 236.7040067911148, - "p99": 257.6960027217865 + "p50": 2065.2480125427246, + "p90": 2107.7120304107666, + "p95": 2237.247943878174, + "p99": 2313.823938369751 }, "isolatedSum": { - "p50": 237.63199895620346, - "p90": 249.31200593709946, - "p95": 251.5840008854866, - "p99": 272.5120037794113 + "p50": 706.2399983406067, + "p90": 763.5200023651123, + "p95": 999.9040067195892, + "p99": 1069.3760216236115 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11256,366 +11332,293 @@ ] }, { - "id": "cx-e56568fe", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", - "colorKey": "b300_01ab5b1a", - "comparisonKey": "1f56c3705f670037", + "id": "cx-d5cc743d", + "identity": "gb200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "3c60cc7bd418443a", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:38:03.696815+00:00", + "generatedAt": "2026-06-29T14:00:33.410141+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271231753", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", - "createdAt": "2026-06-26T23:38:03.696815+00:00", - "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 55.904000997543335, - "p90": 59.776000678539276, - "p95": 65.72800129652023, - "p99": 85.11999994516373 - }, - "combine": { - "p50": 65.60000032186508, - "p90": 66.3679987192154, - "p95": 66.91200286149979, - "p99": 76.86399668455124 - }, - "roundtrip": { - "p50": 105.05600273609161, - "p90": 111.35999858379364, - "p95": 112.96000331640244, - "p99": 121.05599790811539 - }, - "isolatedSum": { - "p50": 121.50400131940842, - "p90": 126.14399939775467, - "p95": 132.64000415802002, - "p99": 161.98399662971497 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 55.84000051021576, - "p90": 57.56799876689911, - "p95": 60.095999389886856, - "p99": 72.4480003118515 - }, - "combine": { - "p50": 65.69600105285645, - "p90": 66.3679987192154, - "p95": 66.84800237417221, - "p99": 69.2799985408783 - }, - "roundtrip": { - "p50": 104.76800054311752, - "p90": 109.40799862146378, - "p95": 112.03200370073318, - "p99": 159.19999778270721 - }, - "isolatedSum": { - "p50": 121.5360015630722, - "p90": 123.9359974861145, - "p95": 126.94400176405907, - "p99": 141.7279988527298 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 57.37600103020668, - "p90": 60.80000102519989, - "p95": 65.76000154018402, - "p99": 95.8079993724823 + "p50": 115.84000289440155, + "p90": 131.99999928474426, + "p95": 371.45599722862244, + "p99": 415.1360094547272 }, "combine": { - "p50": 66.59200042486191, - "p90": 77.18399912118912, - "p95": 77.82399654388428, - "p99": 79.16799932718277 + "p50": 108.0000028014183, + "p90": 338.81598711013794, + "p95": 352.7359962463379, + "p99": 389.2799913883209 }, "roundtrip": { - "p50": 106.91200196743011, - "p90": 112.38399893045425, - "p95": 115.23199826478958, - "p99": 124.22399967908859 + "p50": 193.7599927186966, + "p90": 210.4319930076599, + "p95": 443.6799883842468, + "p99": 480.5760085582733 }, "isolatedSum": { - "p50": 123.96800145506859, - "p90": 137.984000146389, - "p95": 143.5839980840683, - "p99": 174.97599869966507 + "p50": 223.84000569581985, + "p90": 470.8159863948822, + "p95": 724.1919934749603, + "p99": 804.4160008430481 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 7, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 58.6559996008873, - "p90": 63.231997191905975, - "p95": 65.60000032186508, - "p99": 69.47200000286102 + "p50": 148.0959951877594, + "p90": 417.5359904766083, + "p95": 451.1680006980896, + "p99": 469.6640074253082 }, "combine": { - "p50": 68.12799721956253, - "p90": 76.48000121116638, - "p95": 77.15199887752533, - "p99": 84.1279998421669 + "p50": 145.6959992647171, + "p90": 363.74399065971375, + "p95": 406.5600037574768, + "p99": 438.1760060787201 }, "roundtrip": { - "p50": 122.11199849843979, - "p90": 125.34399330615997, - "p95": 128.4479945898056, - "p99": 151.5520066022873 + "p50": 265.76000452041626, + "p90": 477.3760139942169, + "p95": 510.3359818458557, + "p99": 544.2559719085693 }, "isolatedSum": { - "p50": 126.78399682044983, - "p90": 139.71199840307236, - "p95": 142.7519991993904, - "p99": 153.59999984502792 + "p50": 293.7919944524765, + "p90": 781.279981136322, + "p95": 857.7280044555664, + "p99": 907.8400135040283 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 4, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 59.487998485565186, - "p90": 65.24799764156342, - "p95": 67.00800359249115, - "p99": 73.56800138950348 + "p50": 197.37599790096283, + "p90": 463.9360010623932, + "p95": 495.64799666404724, + "p99": 526.0800123214722 }, "combine": { - "p50": 68.12799721956253, - "p90": 77.34400033950806, - "p95": 77.88799703121185, - "p99": 89.53599631786346 + "p50": 216.99200570583344, + "p90": 425.1840114593506, + "p95": 496.5119957923889, + "p99": 524.2879986763 }, "roundtrip": { - "p50": 119.1679984331131, - "p90": 124.67200309038162, - "p95": 125.69600343704224, - "p99": 134.5600038766861 + "p50": 385.5679929256439, + "p90": 398.6240029335022, + "p95": 652.4800062179565, + "p99": 695.5519914627075 }, "isolatedSum": { - "p50": 127.61599570512772, - "p90": 142.59199798107147, - "p95": 144.896000623703, - "p99": 163.10399770736694 + "p50": 414.36800360679626, + "p90": 889.1200125217438, + "p95": 992.1599924564362, + "p99": 1050.3680109977722 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 4, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 74.52800124883652, - "p90": 76.51200145483017, - "p95": 77.18399912118912, - "p99": 81.7599967122078 + "p50": 308.22399258613586, + "p90": 601.248025894165, + "p95": 629.8879981040955, + "p99": 645.1519727706909 }, "combine": { - "p50": 77.91999727487564, - "p90": 78.78399640321732, - "p95": 79.26400005817413, - "p99": 81.85599744319916 + "p50": 416.31999611854553, + "p90": 667.4559712409973, + "p95": 718.1119918823242, + "p99": 746.7520236968994 }, "roundtrip": { - "p50": 132.32000172138214, - "p90": 135.6160044670105, - "p95": 136.31999492645264, - "p99": 141.66399836540222 + "p50": 639.3280029296875, + "p90": 916.1279797554016, + "p95": 946.5280175209045, + "p99": 973.4079837799072 }, "isolatedSum": { - "p50": 152.44799852371216, - "p90": 155.29599785804749, - "p95": 156.44799917936325, - "p99": 163.61599415540695 + "p50": 724.5439887046814, + "p90": 1268.7039971351624, + "p95": 1347.9999899864197, + "p99": 1391.9039964675903 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 7, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 80.19199967384338, - "p90": 81.88799768686295, - "p95": 83.52000266313553, - "p99": 90.30400216579437 + "p50": 528.7359952926636, + "p90": 803.6159873008728, + "p95": 846.560001373291, + "p99": 884.223997592926 }, "combine": { - "p50": 90.59199690818787, - "p90": 91.67999774217606, - "p95": 92.57599711418152, - "p99": 101.21600329875946 + "p50": 820.1280236244202, + "p90": 835.9360098838806, + "p95": 1127.9040575027466, + "p99": 1175.6160259246826 }, "roundtrip": { - "p50": 155.45600652694702, - "p90": 160.5760008096695, - "p95": 161.98399662971497, - "p99": 169.53599452972412 + "p50": 1312.2559785842896, + "p90": 1328.5759687423706, + "p95": 1621.0240125656128, + "p99": 1657.696008682251 }, "isolatedSum": { - "p50": 170.78399658203125, - "p90": 173.567995429039, - "p95": 176.09599977731705, - "p99": 191.52000546455383 + "p50": 1348.8640189170837, + "p90": 1639.5519971847534, + "p95": 1974.4640588760376, + "p99": 2059.8400235176086 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 7, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 101.69599950313568, - "p90": 105.15200346708298, - "p95": 106.04800283908844, - "p99": 115.167997777462 + "p50": 978.7840247154236, + "p90": 1006.4640045166016, + "p95": 1288.7680530548096, + "p99": 1324.5760202407837 }, "combine": { - "p50": 126.81600451469421, - "p90": 127.77599692344666, - "p95": 128.12800705432892, - "p99": 131.71200454235077 + "p50": 1550.3360033035278, + "p90": 1569.7599649429321, + "p95": 1864.5440340042114, + "p99": 1903.8079977035522 }, "roundtrip": { - "p50": 207.58399367332458, - "p90": 212.41599321365356, - "p95": 215.45599400997162, - "p99": 240.79999327659607 + "p50": 2488.6720180511475, + "p90": 2510.3039741516113, + "p95": 2800.9281158447266, + "p99": 2841.599941253662 }, "isolatedSum": { - "p50": 228.5120040178299, - "p90": 232.92800039052963, - "p95": 234.17600989341736, - "p99": 246.88000231981277 + "p50": 2529.1200280189514, + "p90": 2576.2239694595337, + "p95": 3153.312087059021, + "p99": 3228.384017944336 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 7, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11623,366 +11626,293 @@ ] }, { - "id": "cx-a499b6fe", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", - "colorKey": "b300_085c12d4", - "comparisonKey": "f41671f558a3c8d2", + "id": "cx-149387f5", + "identity": "gb200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "dd9e945599d67e5f", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:23:15.234137+00:00", + "generatedAt": "2026-06-29T14:02:20.934185+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255311146", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", - "createdAt": "2026-06-26T18:23:15.234137+00:00", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 56.86400085687637, - "p90": 59.7120001912117, - "p95": 63.32799792289734, - "p99": 72.64000177383423 - }, - "combine": { - "p50": 64.83200192451477, - "p90": 66.46399945020676, - "p95": 66.94400310516357, - "p99": 76.51200145483017 - }, - "roundtrip": { - "p50": 105.12000322341919, - "p90": 110.72000116109848, - "p95": 111.7440015077591, - "p99": 122.56000190973282 - }, - "isolatedSum": { - "p50": 121.69600278139114, - "p90": 126.17599964141846, - "p95": 130.2720010280609, - "p99": 149.1520032286644 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 57.5999990105629, - "p90": 59.808000922203064, - "p95": 62.07999959588051, - "p99": 71.45600020885468 - }, - "combine": { - "p50": 66.27199798822403, - "p90": 67.00800359249115, - "p95": 67.29599833488464, - "p99": 76.92799717187881 - }, - "roundtrip": { - "p50": 106.27199709415436, - "p90": 108.22399705648422, - "p95": 110.01600325107574, - "p99": 132.54399597644806 - }, - "isolatedSum": { - "p50": 123.87199699878693, - "p90": 126.81600451469421, - "p95": 129.37599793076515, - "p99": 148.3839973807335 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 57.53599852323532, - "p90": 59.808000922203064, - "p95": 60.70400029420853, - "p99": 67.87200272083282 + "p50": 120.19199877977371, + "p90": 181.43999576568604, + "p95": 387.00801134109497, + "p99": 433.0559968948364 }, "combine": { - "p50": 66.43199920654297, - "p90": 67.45599955320358, - "p95": 69.31199878454208, - "p99": 78.78399640321732 + "p50": 116.12799763679504, + "p90": 120.86399644613266, + "p95": 127.13600695133209, + "p99": 377.7279853820801 }, "roundtrip": { - "p50": 106.6880002617836, - "p90": 109.50399935245514, - "p95": 111.87200248241425, - "p99": 125.08800625801086 + "p50": 205.47200739383698, + "p90": 461.34400367736816, + "p95": 483.42400789260864, + "p99": 499.0079998970032 }, "isolatedSum": { - "p50": 123.96799772977829, - "p90": 127.26400047540665, - "p95": 130.0159990787506, - "p99": 146.65599912405014 + "p50": 236.31999641656876, + "p90": 302.3039922118187, + "p95": 514.1440182924271, + "p99": 810.7839822769165 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 7, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 58.848001062870026, - "p90": 61.15199998021126, - "p95": 64.41599875688553, - "p99": 78.14399898052216 + "p50": 152.25599706172943, + "p90": 415.3279960155487, + "p95": 451.775997877121, + "p99": 482.36799240112305 }, "combine": { - "p50": 68.28799843788147, - "p90": 76.25599950551987, - "p95": 76.92799717187881, - "p99": 79.64800298213959 + "p50": 155.90399503707886, + "p90": 361.63198947906494, + "p95": 404.63998913764954, + "p99": 450.9119987487793 }, "roundtrip": { - "p50": 116.28799885511398, - "p90": 122.8799968957901, - "p95": 124.70400333404541, - "p99": 145.08800208568573 + "p50": 275.90399980545044, + "p90": 290.6239926815033, + "p95": 537.5999808311462, + "p99": 569.3439841270447 }, "isolatedSum": { - "p50": 127.1359995007515, - "p90": 137.40799948573112, - "p95": 141.34399592876434, - "p99": 157.79200196266174 + "p50": 308.1599920988083, + "p90": 776.9599854946136, + "p95": 856.4159870147705, + "p99": 933.2799911499023 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 7, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 60.54399907588959, - "p90": 66.14399701356888, - "p95": 68.67200136184692, - "p99": 83.29600095748901 + "p50": 208.064004778862, + "p90": 467.5840139389038, + "p95": 495.5520033836365, + "p99": 534.5600247383118 }, "combine": { - "p50": 68.64000111818314, - "p90": 77.2159993648529, - "p95": 77.82399654388428, - "p99": 78.91199737787247 + "p50": 232.31999576091766, + "p90": 476.8959879875183, + "p95": 504.7039985656738, + "p99": 537.5040173530579 }, "roundtrip": { - "p50": 123.16799908876419, - "p90": 126.0479986667633, - "p95": 127.16799974441528, - "p99": 131.1040073633194 + "p50": 404.32000160217285, + "p90": 418.9760088920593, + "p95": 679.4559955596924, + "p99": 724.2559790611267 }, "isolatedSum": { - "p50": 129.18400019407272, - "p90": 143.35999637842178, - "p95": 146.4959979057312, - "p99": 162.20799833536148 + "p50": 440.38400053977966, + "p90": 944.4800019264221, + "p95": 1000.2560019493103, + "p99": 1072.0640420913696 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 7, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 69.31199878454208, - "p90": 75.52000135183334, - "p95": 76.4160007238388, - "p99": 83.20000022649765 + "p50": 325.8880078792572, + "p90": 601.535975933075, + "p95": 628.3199787139893, + "p99": 661.5679860115051 }, "combine": { - "p50": 78.46400141716003, - "p90": 79.26400005817413, - "p95": 79.45600152015686, - "p99": 82.40000158548355 + "p50": 475.19999742507935, + "p90": 740.2880191802979, + "p95": 786.4320278167725, + "p99": 820.5119967460632 }, "roundtrip": { - "p50": 132.192000746727, - "p90": 135.6479972600937, - "p95": 136.3839954137802, - "p99": 147.20000326633453 + "p50": 724.9600291252136, + "p90": 740.3839826583862, + "p95": 1014.3040418624878, + "p99": 1059.1039657592773 }, "isolatedSum": { - "p50": 147.77600020170212, - "p90": 154.78400141000748, - "p95": 155.87200224399567, - "p99": 165.6000018119812 + "p50": 801.0880053043365, + "p90": 1341.8239951133728, + "p95": 1414.7520065307617, + "p99": 1482.0799827575684 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 7, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 87.8399983048439, - "p90": 90.30400216579437, - "p95": 91.87199920415878, - "p99": 100.0640019774437 + "p50": 568.1920051574707, + "p90": 587.1040225028992, + "p95": 874.8160004615784, + "p99": 909.6639752388 }, "combine": { - "p50": 91.2960022687912, - "p90": 93.08800101280212, - "p95": 93.85599941015244, - "p99": 108.12799632549286 + "p50": 848.0319976806641, + "p90": 876.8640160560608, + "p95": 1146.720051765442, + "p99": 1191.3599967956543 }, "roundtrip": { - "p50": 157.44000673294067, - "p90": 162.4639928340912, - "p95": 163.71199488639832, - "p99": 168.89600455760956 + "p50": 1380.4479837417603, + "p90": 1396.448016166687, + "p95": 1698.8799571990967, + "p99": 1734.1760396957397 }, "isolatedSum": { - "p50": 179.1360005736351, - "p90": 183.3920031785965, - "p95": 185.72799861431122, - "p99": 208.19199830293655 + "p50": 1416.2240028381348, + "p90": 1463.96803855896, + "p95": 2021.5360522270203, + "p99": 2101.0239720344543 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 93.9520001411438, - "p90": 98.39999675750732, - "p95": 100.00000149011612, - "p99": 105.53599894046783 + "p50": 1051.6480207443237, + "p90": 1330.4640054702759, + "p95": 1371.7119693756104, + "p99": 1398.2720375061035 }, "combine": { - "p50": 115.29599875211716, - "p90": 116.12799763679504, - "p95": 116.48000031709671, - "p99": 127.87200510501862 + "p50": 1592.8959846496582, + "p90": 1619.0719604492188, + "p95": 1900.1920223236084, + "p99": 1945.7279443740845 }, "roundtrip": { - "p50": 193.08799505233765, - "p90": 199.90399479866028, - "p95": 201.50400698184967, - "p99": 214.1759991645813 + "p50": 2600.7039546966553, + "p90": 2650.304079055786, + "p95": 2929.663896560669, + "p99": 2953.632116317749 }, "isolatedSum": { - "p50": 209.24799889326096, - "p90": 214.52799439430237, - "p95": 216.48000180721283, - "p99": 233.40800404548645 + "p50": 2644.544005393982, + "p90": 2949.5359659194946, + "p95": 3271.9039916992188, + "p99": 3343.999981880188 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 7, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -11990,31 +11920,31 @@ ] }, { - "id": "cx-8481f6a4", - "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_63f1354f", - "comparisonKey": "63f9b5a5300d4d4b", + "id": "cx-08f535b7", + "identity": "gb200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "cc4f254d990410d2", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:09:35.317427+00:00", + "generatedAt": "2026-06-29T14:04:11.209616+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) [cl]", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -12023,333 +11953,260 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254489726", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", - "createdAt": "2026-06-26T18:09:35.317427+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 50.303999334573746, - "p90": 52.06400156021118, - "p95": 53.82400006055832, - "p99": 65.05600363016129 - }, - "combine": { - "p50": 66.56000018119812, - "p90": 68.2239979505539, - "p95": 68.76800209283829, - "p99": 77.95199751853943 - }, - "roundtrip": { - "p50": 99.84000027179718, - "p90": 103.90400141477585, - "p95": 107.51999914646149, - "p99": 117.11999773979187 - }, - "isolatedSum": { - "p50": 116.86399951577187, - "p90": 120.28799951076508, - "p95": 122.5920021533966, - "p99": 143.0080011487007 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 51.263999193906784, - "p90": 52.89600044488907, - "p95": 55.32800033688545, - "p99": 65.18399715423584 - }, - "combine": { - "p50": 66.97600334882736, - "p90": 68.7360018491745, - "p95": 69.11999732255936, - "p99": 78.11199873685837 - }, - "roundtrip": { - "p50": 100.99200159311295, - "p90": 103.26399654150009, - "p95": 105.76000064611435, - "p99": 113.6000007390976 - }, - "isolatedSum": { - "p50": 118.24000254273415, - "p90": 121.63200229406357, - "p95": 124.44799765944481, - "p99": 143.2959958910942 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 51.4880008995533, - "p90": 53.408000618219376, - "p95": 54.9440011382103, - "p99": 61.63199990987778 + "p50": 122.75200337171555, + "p90": 372.54399061203003, + "p95": 404.1920006275177, + "p99": 428.3199906349182 }, "combine": { - "p50": 67.6800012588501, - "p90": 69.60000097751617, - "p95": 76.89599692821503, - "p99": 79.16799932718277 + "p50": 120.60800194740295, + "p90": 188.48000466823578, + "p95": 386.9760036468506, + "p99": 412.3840034008026 }, "roundtrip": { - "p50": 108.73600095510483, - "p90": 115.80800265073776, - "p95": 117.0239970088005, - "p99": 124.35200065374374 + "p50": 214.39999341964722, + "p90": 436.6399943828583, + "p95": 488.0639910697937, + "p99": 523.6480236053467 }, "isolatedSum": { - "p50": 119.1680021584034, - "p90": 123.00800159573555, - "p95": 131.83999806642532, - "p99": 140.79999923706055 + "p50": 243.3600053191185, + "p90": 561.0239952802658, + "p95": 791.1680042743683, + "p99": 840.7039940357208 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 52.639998495578766, - "p90": 55.64799904823303, - "p95": 59.39200147986412, - "p99": 68.00000369548798 + "p50": 157.60000050067902, + "p90": 420.6080138683319, + "p95": 449.0239918231964, + "p99": 488.8960123062134 }, "combine": { - "p50": 68.25599819421768, - "p90": 77.08799839019775, - "p95": 77.60000228881836, - "p99": 78.94399762153625 + "p50": 162.6880019903183, + "p90": 391.29599928855896, + "p95": 432.2879910469055, + "p99": 457.69599080085754 }, "roundtrip": { - "p50": 113.69600147008896, - "p90": 117.66400188207626, - "p95": 118.72000247240067, - "p99": 121.18399888277054 + "p50": 290.0480031967163, + "p90": 529.4719934463501, + "p95": 567.3279762268066, + "p99": 593.7280058860779 }, "isolatedSum": { - "p50": 120.89599668979645, - "p90": 132.7359974384308, - "p95": 136.99200376868248, - "p99": 146.94400131702423 + "p50": 320.2880024909973, + "p90": 811.9040131568909, + "p95": 881.3119828701019, + "p99": 946.5920031070709 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 61.792001128196716, - "p90": 67.90400296449661, - "p95": 68.67200136184692, - "p99": 71.1359977722168 + "p50": 223.13599288463593, + "p90": 504.2880177497864, + "p95": 543.9680218696594, + "p99": 947.6799964904785 }, "combine": { - "p50": 70.46400010585785, - "p90": 78.40000092983246, - "p95": 78.59200239181519, - "p99": 81.44000172615051 + "p50": 261.50399446487427, + "p90": 513.8239860534668, + "p95": 548.255980014801, + "p99": 2365.855932235718 }, "roundtrip": { - "p50": 113.18399757146835, - "p90": 115.9679964184761, - "p95": 117.53600090742111, - "p99": 127.87200510501862 + "p50": 440.2559995651245, + "p90": 453.95201444625854, + "p95": 705.6000232696533, + "p99": 817.6000118255615 }, "isolatedSum": { - "p50": 132.25600123405457, - "p90": 146.30400389432907, - "p95": 147.2640037536621, - "p99": 152.5759994983673 + "p50": 484.6399873495102, + "p90": 1018.1120038032532, + "p95": 1092.2240018844604, + "p99": 3313.5359287261963 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 62.65600025653839, - "p90": 64.92800265550613, - "p95": 66.880002617836, - "p99": 73.69600236415863 + "p50": 349.2160141468048, + "p90": 614.4000291824341, + "p95": 651.3599753379822, + "p99": 686.9440078735352 }, "combine": { - "p50": 78.59200239181519, - "p90": 79.74400371313095, - "p95": 80.64000308513641, - "p99": 85.63199639320374 + "p50": 479.0399968624115, + "p90": 748.3519911766052, + "p95": 785.2159738540649, + "p99": 819.4559812545776 }, "roundtrip": { - "p50": 124.28800016641617, - "p90": 127.93600559234619, - "p95": 130.43199479579926, - "p99": 138.5599970817566 + "p50": 791.7439937591553, + "p90": 805.6960105895996, + "p95": 1106.7839860916138, + "p99": 1150.048017501831 }, "isolatedSum": { - "p50": 141.24800264835358, - "p90": 144.67200636863708, - "p95": 147.5200057029724, - "p99": 159.32799875736237 + "p50": 828.2560110092163, + "p90": 1362.7520203590393, + "p95": 1436.5759491920471, + "p99": 1506.3999891281128 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 75.77600330114365, - "p90": 83.16799998283386, - "p95": 83.96799862384796, - "p99": 96.3520035147667 + "p50": 599.6479988098145, + "p90": 615.4239773750305, + "p95": 909.1519713401794, + "p99": 959.3600034713745 }, "combine": { - "p50": 91.48799628019333, - "p90": 93.6959981918335, - "p95": 95.90400010347366, - "p99": 104.76800054311752 + "p50": 861.1199855804443, + "p90": 877.5680065155029, + "p95": 1169.2479848861694, + "p99": 1214.2720222473145 }, "roundtrip": { - "p50": 150.11200308799744, - "p90": 153.28000485897064, - "p95": 154.91199493408203, - "p99": 159.96800363063812 + "p50": 1430.6880235671997, + "p90": 1540.4800176620483, + "p95": 1742.8159713745117, + "p99": 1778.9440155029297 }, "isolatedSum": { - "p50": 167.26399958133698, - "p90": 176.86399817466736, - "p95": 179.87199872732162, - "p99": 201.12000405788422 + "p50": 1460.7679843902588, + "p90": 1492.9919838905334, + "p95": 2078.399956226349, + "p99": 2173.632025718689 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 87.36000210046768, - "p90": 89.31200206279755, - "p95": 92.3520028591156, - "p99": 98.36799651384354 + "p50": 1109.7919940948486, + "p90": 1361.88805103302, + "p95": 1428.671956062317, + "p99": 1470.1440334320068 }, "combine": { - "p50": 115.32799899578094, - "p90": 115.9679964184761, - "p95": 117.21599847078323, - "p99": 126.49600207805634 + "p50": 1612.3520135879517, + "p90": 1633.4079504013062, + "p95": 1926.4960289001465, + "p99": 1954.7840356826782 }, "roundtrip": { - "p50": 186.14399433135986, - "p90": 191.67999923229218, - "p95": 193.05600225925446, - "p99": 199.072003364563 + "p50": 2689.6960735321045, + "p90": 2706.239938735962, + "p95": 2997.312068939209, + "p99": 3036.191940307617 }, "isolatedSum": { - "p50": 202.68800109624863, - "p90": 205.27999848127365, - "p95": 209.56800132989883, - "p99": 224.86399859189987 + "p50": 2722.1440076828003, + "p90": 2995.296001434326, + "p95": 3355.1679849624634, + "p99": 3424.928069114685 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12357,28 +12214,28 @@ ] }, { - "id": "cx-00895a92", - "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_33311fdc", - "comparisonKey": "fb96ce98136947bb", + "id": "cx-a5b9e896", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_f1783455", + "comparisonKey": "3c02cc9216dfe92d", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:46:23.442699+00:00", + "generatedAt": "2026-06-29T13:53:19.000387+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 [cl]", + "label": "GB200 EP8 · deepep · bf16", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -12390,333 +12247,260 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285573016", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285573016", - "createdAt": "2026-06-27T09:46:23.442699+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 49.6320016682148, - "p90": 50.97600072622299, - "p95": 52.319999784231186, - "p99": 56.992001831531525 - }, - "combine": { - "p50": 65.66400080919266, - "p90": 66.52799993753433, - "p95": 67.26399809122086, - "p99": 77.2479996085167 - }, - "roundtrip": { - "p50": 98.62399846315384, - "p90": 101.31199657917023, - "p95": 103.39199751615524, - "p99": 122.3360002040863 - }, - "isolatedSum": { - "p50": 115.29600247740746, - "p90": 117.50400066375732, - "p95": 119.58399787545204, - "p99": 134.24000144004822 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 50.144001841545105, - "p90": 51.872000098228455, - "p95": 53.37600037455559, - "p99": 63.00800293684006 + "p50": 123.99999797344208, + "p90": 367.5839900970459, + "p95": 395.26399970054626, + "p99": 433.56800079345703 }, "combine": { - "p50": 67.07199662923813, - "p90": 68.7360018491745, - "p95": 69.24799829721451, - "p99": 79.1039988398552 + "p50": 124.95999783277512, + "p90": 361.34400963783264, + "p95": 400.4479944705963, + "p99": 415.16798734664917 }, "roundtrip": { - "p50": 99.80800002813339, - "p90": 102.01600193977356, - "p95": 103.7760004401207, - "p99": 110.20799726247787 + "p50": 221.50400280952454, + "p90": 250.65600872039795, + "p95": 482.4959933757782, + "p99": 526.7519950866699 }, "isolatedSum": { - "p50": 117.21599847078323, - "p90": 120.60800194740295, - "p95": 122.6239986717701, - "p99": 142.11200177669525 + "p50": 248.9599958062172, + "p90": 728.9279997348785, + "p95": 795.7119941711426, + "p99": 848.7359881401062 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 50.97600072622299, - "p90": 52.799999713897705, - "p95": 54.91200089454651, - "p99": 61.11999973654747 + "p50": 161.3440066576004, + "p90": 455.7119905948639, + "p95": 468.80000829696655, + "p99": 482.015997171402 }, "combine": { - "p50": 68.4799998998642, - "p90": 89.63199704885483, - "p95": 92.28800237178802, - "p99": 102.4319976568222 + "p50": 168.35199296474457, + "p90": 428.73600125312805, + "p95": 442.68798828125, + "p99": 465.05600214004517 }, "roundtrip": { - "p50": 112.86400258541107, - "p90": 117.08799749612808, - "p95": 118.23999881744385, - "p99": 121.95199728012085 + "p50": 299.4239926338196, + "p90": 541.6319966316223, + "p95": 581.6320180892944, + "p99": 611.4240288734436 }, "isolatedSum": { - "p50": 119.45600062608719, - "p90": 142.43199676275253, - "p95": 147.20000326633453, - "p99": 163.55199739336967 + "p50": 329.69599962234497, + "p90": 884.447991847992, + "p95": 911.4879965782166, + "p99": 947.0719993114471 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 55.296000093221664, - "p90": 70.30399888753891, - "p95": 75.1039981842041, - "p99": 115.58400094509125 + "p50": 231.7119985818863, + "p90": 335.07201075553894, + "p95": 539.1680002212524, + "p99": 568.6079859733582 }, "combine": { - "p50": 68.89600306749344, - "p90": 77.79199630022049, - "p95": 78.33600044250488, - "p99": 82.33600109815598 + "p50": 293.69598627090454, + "p90": 538.0799770355225, + "p95": 578.2399773597717, + "p99": 609.5679998397827 }, "roundtrip": { - "p50": 113.6000007390976, - "p90": 117.91999638080597, - "p95": 118.97599697113037, - "p99": 125.18399953842163 + "p50": 472.03201055526733, + "p90": 486.04801297187805, + "p95": 770.0799703598022, + "p99": 807.6159954071045 }, "isolatedSum": { - "p50": 124.1920031607151, - "p90": 148.0959951877594, - "p95": 153.43999862670898, - "p99": 197.92000204324722 + "p50": 525.4079848527908, + "p90": 873.1519877910614, + "p95": 1117.4079775810242, + "p99": 1178.1759858131409 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 60.06399914622307, - "p90": 67.19999760389328, - "p95": 68.03199648857117, - "p99": 71.87200337648392 + "p50": 361.1519932746887, + "p90": 370.88000774383545, + "p95": 662.015974521637, + "p99": 707.7760100364685 }, "combine": { - "p50": 68.89600306749344, - "p90": 77.85599678754807, - "p95": 78.3040001988411, - "p99": 81.4720019698143 + "p50": 494.6880042552948, + "p90": 760.9279751777649, + "p95": 812.9600286483765, + "p99": 842.9120182991028 }, "roundtrip": { - "p50": 112.09599673748016, - "p90": 114.9120032787323, - "p95": 116.54400080442429, - "p99": 128.25599312782288 + "p50": 828.3519744873047, + "p90": 839.4240140914917, + "p95": 1141.7280435562134, + "p99": 1192.6079988479614 }, "isolatedSum": { - "p50": 128.9600022137165, - "p90": 145.05599439144135, - "p95": 146.33599668741226, - "p99": 153.34400534629822 + "p50": 855.8399975299835, + "p90": 1131.8079829216003, + "p95": 1474.9760031700134, + "p99": 1550.6880283355713 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 61.792001128196716, - "p90": 63.45599889755249, - "p95": 66.49599969387054, - "p99": 72.03199714422226 + "p50": 619.5520162582397, + "p90": 636.5119814872742, + "p95": 949.4079947471619, + "p99": 986.0799908638 }, "combine": { - "p50": 78.11199873685837, - "p90": 79.42400127649307, - "p95": 80.35200089216232, - "p99": 83.48800241947174 + "p50": 884.3520283699036, + "p90": 899.4879722595215, + "p95": 1200.1279592514038, + "p99": 1233.2799434661865 }, "roundtrip": { - "p50": 122.81599640846252, - "p90": 124.95999783277512, - "p95": 127.00800597667694, - "p99": 132.76800513267517 + "p50": 1478.4640073776245, + "p90": 1499.168038368225, + "p95": 1800.3840446472168, + "p99": 1831.6800594329834 }, "isolatedSum": { - "p50": 139.90399986505508, - "p90": 142.88000017404556, - "p95": 146.84800058603287, - "p99": 155.519999563694 + "p50": 1503.9040446281433, + "p90": 1535.9999537467957, + "p95": 2149.5359539985657, + "p99": 2219.3599343299866 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 74.5920017361641, - "p90": 82.14399963617325, - "p95": 83.20000022649765, - "p99": 97.69599884748459 + "p50": 1144.1600322723389, + "p90": 1158.687949180603, + "p95": 1444.2239999771118, + "p99": 1490.7519817352295 }, "combine": { - "p50": 91.93599969148636, - "p90": 100.63999891281128, - "p95": 101.75999999046326, - "p99": 108.22399705648422 + "p50": 1646.016001701355, + "p90": 1906.432032585144, + "p95": 1952.9279470443726, + "p99": 2000.7359981536865 }, "roundtrip": { - "p50": 148.60799908638, - "p90": 151.96800231933594, - "p95": 153.1199961900711, - "p99": 155.93600273132324 + "p50": 2764.256000518799, + "p90": 2780.3521156311035, + "p95": 3060.800075531006, + "p99": 3094.752073287964 }, "isolatedSum": { - "p50": 166.52800142765045, - "p90": 182.78399854898453, - "p95": 184.9600002169609, - "p99": 205.9199959039688 + "p50": 2790.176033973694, + "p90": 3065.119981765747, + "p95": 3397.1519470214844, + "p99": 3491.487979888916 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.5280032157898, - "p90": 87.99999952316284, - "p95": 90.11200070381165, - "p99": 99.84000027179718 - }, - "combine": { - "p50": 114.97599631547928, - "p90": 116.28799885511398, - "p95": 117.18399822711945, - "p99": 126.49600207805634 - }, - "roundtrip": { - "p50": 185.2799952030182, - "p90": 191.00800156593323, - "p95": 192.76799261569977, - "p99": 203.23200523853302 - }, - "isolatedSum": { - "p50": 201.50399953126907, - "p90": 204.28799837827682, - "p95": 207.2959989309311, - "p99": 226.33600234985352 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -12724,47 +12508,48 @@ ] }, { - "id": "cx-34fdfa58", - "identity": "b300|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "e31dbd692115f689", + "id": "cx-b1e9f61a", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "20a70c4abe2b89cf", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:20.626757+00:00", + "generatedAt": "2026-06-29T13:49:33.863915+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_04", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Qwen3.5", + "label": "GB200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -12772,318 +12557,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287509502", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287509502", - "createdAt": "2026-06-27T11:14:20.626757+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 54.655998945236206, - "p90": 56.60799890756607, - "p95": 57.40800127387047, - "p99": 63.80800157785416 - }, - "combine": { - "p50": 49.92000013589859, - "p90": 51.16799846291542, - "p95": 52.12799832224846, - "p99": 59.10399928689003 - }, - "roundtrip": { - "p50": 107.07200318574905, - "p90": 109.6000000834465, - "p95": 111.84000223875046, - "p99": 129.56799566745758 - }, - "isolatedSum": { - "p50": 104.5759990811348, - "p90": 107.77599737048149, - "p95": 109.53599959611893, - "p99": 122.91200086474419 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 55.135998874902725, - "p90": 57.24800005555153, - "p95": 58.720000088214874, - "p99": 64.80000168085098 - }, - "combine": { - "p50": 50.75199902057648, - "p90": 52.480001002550125, - "p95": 52.83199995756149, - "p99": 63.90400230884552 - }, - "roundtrip": { - "p50": 108.83200168609619, - "p90": 112.12799698114395, - "p95": 115.26399850845337, - "p99": 229.40799593925476 - }, - "isolatedSum": { - "p50": 105.8879978954792, - "p90": 109.72800105810165, - "p95": 111.55200004577637, - "p99": 128.7040039896965 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 56.671999394893646, - "p90": 76.38400048017502, - "p95": 85.69599688053131, - "p99": 199.10399615764618 + "p50": 122.43200093507767, + "p90": 130.14400005340576, + "p95": 133.7279975414276, + "p99": 138.97599279880524 }, "combine": { - "p50": 53.15199866890907, - "p90": 57.11999908089638, - "p95": 58.62399935722351, - "p99": 63.07200342416763 + "p50": 125.98399817943573, + "p90": 131.52000308036804, + "p95": 132.9919993877411, + "p99": 140.06400108337402 }, "roundtrip": { - "p50": 111.42399907112122, - "p90": 114.33599889278412, - "p95": 116.48000031709671, - "p99": 125.5359947681427 + "p50": 220.70400416851044, + "p90": 227.39200294017792, + "p95": 230.52799701690674, + "p99": 235.167995095253 }, "isolatedSum": { - "p50": 109.82399806380272, - "p90": 133.5039995610714, - "p95": 144.31999623775482, - "p99": 262.1759995818138 + "p50": 248.4159991145134, + "p90": 261.6640031337738, + "p95": 266.7199969291687, + "p99": 279.03999388217926 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 57.760000228881836, - "p90": 59.93599817156792, - "p95": 61.184000223875046, - "p99": 67.26399809122086 + "p50": 160.73599457740784, + "p90": 170.0800061225891, + "p95": 174.8799979686737, + "p99": 7247.3602294921875 }, "combine": { - "p50": 54.91200089454651, - "p90": 56.96000158786774, - "p95": 57.28000029921532, - "p99": 63.391998410224915 + "p50": 167.77600347995758, + "p90": 171.48800194263458, + "p95": 175.58400332927704, + "p99": 183.48799645900726 }, "roundtrip": { - "p50": 114.78400230407715, - "p90": 116.99199676513672, - "p95": 118.43200027942657, - "p99": 134.94400680065155 + "p50": 300.86401104927063, + "p90": 309.471994638443, + "p95": 313.1519854068756, + "p99": 326.9439935684204 }, "isolatedSum": { - "p50": 112.67200112342834, - "p90": 116.89599975943565, - "p95": 118.46400052309036, - "p99": 130.65599650144577 + "p50": 328.5119980573654, + "p90": 341.5680080652237, + "p95": 350.46400129795074, + "p99": 7430.848225951195 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 4, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 57.8560009598732, - "p90": 59.93599817156792, - "p95": 61.216000467538834, - "p99": 79.80799674987793 + "p50": 232.09600150585175, + "p90": 240.57599902153015, + "p95": 243.80800127983093, + "p99": 250.0160038471222 }, "combine": { - "p50": 54.91200089454651, - "p90": 56.73599988222122, - "p95": 57.023998349905014, - "p99": 60.06399914622307 + "p50": 292.959988117218, + "p90": 300.5119860172272, + "p95": 303.26399207115173, + "p99": 307.8719973564148 }, "roundtrip": { - "p50": 115.13599753379822, - "p90": 117.34399944543839, - "p95": 118.6240017414093, - "p99": 126.08000636100769 + "p50": 472.896009683609, + "p90": 479.99998927116394, + "p95": 482.7519953250885, + "p99": 488.7999892234802 }, "isolatedSum": { - "p50": 112.76800185441971, - "p90": 116.67199805378914, - "p95": 118.23999881744385, - "p99": 139.871995896101 + "p50": 525.0559896230698, + "p90": 541.0879850387573, + "p95": 547.0719933509827, + "p99": 557.888001203537 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 7, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 61.08799949288368, - "p90": 63.26399743556976, - "p95": 64.4799992442131, - "p99": 74.43200051784515 + "p50": 360.79999804496765, + "p90": 371.2320029735565, + "p95": 380.2880048751831, + "p99": 451.6800045967102 }, "combine": { - "p50": 58.27200040221214, - "p90": 60.28800085186958, - "p95": 60.92799827456474, - "p99": 65.0240033864975 + "p50": 493.120014667511, + "p90": 500.5120038986206, + "p95": 501.9199848175049, + "p99": 508.86398553848267 }, "roundtrip": { - "p50": 122.6240023970604, - "p90": 125.56800246238708, - "p95": 127.26399302482605, - "p99": 150.36800503730774 + "p50": 825.6000280380249, + "p90": 832.4480056762695, + "p95": 834.2080116271973, + "p99": 841.1200046539307 }, "isolatedSum": { - "p50": 119.35999989509583, - "p90": 123.55199828743935, - "p95": 125.40799751877785, - "p99": 139.45600390434265 + "p50": 853.9200127124786, + "p90": 871.7440068721771, + "p95": 882.207989692688, + "p99": 960.5439901351929 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 4, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 67.80800223350525, - "p90": 70.62400132417679, - "p95": 71.3919997215271, - "p99": 76.99199765920639 + "p50": 619.8400259017944, + "p90": 626.36798620224, + "p95": 628.1279921531677, + "p99": 635.3920102119446 }, "combine": { - "p50": 71.29599899053574, - "p90": 73.40800017118454, - "p95": 74.23999905586243, - "p99": 77.15199887752533 + "p50": 880.7359933853149, + "p90": 885.1199746131897, + "p95": 886.3999843597412, + "p99": 893.0879831314087 }, "roundtrip": { - "p50": 146.91199362277985, - "p90": 150.176003575325, - "p95": 151.90400183200836, - "p99": 180.51199615001678 + "p50": 1470.2080488204956, + "p90": 1478.559970855713, + "p95": 1480.8000326156616, + "p99": 1488.5120391845703 }, "isolatedSum": { - "p50": 139.10400122404099, - "p90": 144.03200149536133, - "p95": 145.63199877738953, - "p99": 154.14399653673172 + "p50": 1500.5760192871094, + "p90": 1511.4879608154297, + "p95": 1514.527976512909, + "p99": 1528.4799933433533 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 4, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 77.11999863386154, - "p90": 79.52000200748444, - "p95": 80.70400357246399, - "p99": 101.1200025677681 + "p50": 1146.6879844665527, + "p90": 1153.7920236587524, + "p95": 1155.9360027313232, + "p99": 1161.56804561615 }, "combine": { - "p50": 87.74399757385254, - "p90": 89.82399851083755, - "p95": 90.91199934482574, - "p99": 96.12800180912018 + "p50": 1635.5199813842773, + "p90": 1693.6320066452026, + "p95": 1704.7040462493896, + "p99": 1926.751971244812 }, "roundtrip": { - "p50": 178.8800060749054, - "p90": 181.5679967403412, - "p95": 182.8799992799759, - "p99": 190.68799912929535 + "p50": 2760.3840827941895, + "p90": 2840.9600257873535, + "p95": 3140.928030014038, + "p99": 7563.263893127441 }, "isolatedSum": { - "p50": 164.86399620771408, - "p90": 169.344000518322, - "p95": 171.61600291728973, - "p99": 197.24800437688828 + "p50": 2782.20796585083, + "p90": 2847.424030303955, + "p95": 2860.640048980713, + "p99": 3088.320016860962 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13091,47 +12802,48 @@ ] }, { - "id": "cx-3b501b50", - "identity": "b300|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "b300_c4c63f07", - "comparisonKey": "b3fe3e767199861f", + "id": "cx-a58e57e5", + "identity": "gb200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb200_7c2da03d", + "comparisonKey": "6f8e0fa22252756a", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:42.086775+00:00", + "generatedAt": "2026-06-29T13:58:46.178899+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Qwen3.5", + "label": "GB200 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -13139,318 +12851,427 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285693587", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285693587", - "createdAt": "2026-06-27T09:51:42.086775+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 112.89600282907486, - "p90": 116.22399836778641, - "p95": 118.40000003576279, - "p99": 145.11999487876892 + "p50": 124.79999661445618, + "p90": 371.3279962539673, + "p95": 393.5360014438629, + "p99": 431.2959909439087 }, "combine": { - "p50": 50.71999877691269, - "p90": 52.57600173354149, - "p95": 53.119998425245285, - "p99": 63.07200342416763 + "p50": 126.46399438381195, + "p90": 362.4640107154846, + "p95": 400.9599983692169, + "p99": 422.4959909915924 }, "roundtrip": { - "p50": 155.45600652694702, - "p90": 158.62399339675903, - "p95": 161.05599701404572, - "p99": 178.27199399471283 + "p50": 224.09600019454956, + "p90": 444.7680115699768, + "p95": 486.4319860935211, + "p99": 527.9039740562439 }, "isolatedSum": { - "p50": 163.61600160598755, - "p90": 168.8000001013279, - "p95": 171.51999846100807, - "p99": 208.19199830293655 + "p50": 251.26399099826813, + "p90": 733.7920069694519, + "p95": 794.4959998130798, + "p99": 853.7919819355011 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 7, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 113.82400244474411, - "p90": 117.18399822711945, - "p95": 119.80800330638885, - "p99": 142.62400567531586 + "p50": 166.33599996566772, + "p90": 457.92001485824585, + "p95": 493.0880069732666, + "p99": 2744.0319061279297 }, "combine": { - "p50": 52.000001072883606, - "p90": 53.0879981815815, - "p95": 54.11199852824211, - "p99": 62.3680017888546 + "p50": 170.04799842834473, + "p90": 437.5999867916107, + "p95": 457.3119878768921, + "p99": 676.4479875564575 }, "roundtrip": { - "p50": 156.8640023469925, - "p90": 161.85599565505981, - "p95": 168.44800114631653, - "p99": 217.50399470329285 + "p50": 307.0720136165619, + "p90": 557.7600002288818, + "p95": 587.9039764404297, + "p99": 620.6719875335693 }, "isolatedSum": { - "p50": 165.82400351762772, - "p90": 170.27199640870094, - "p95": 173.92000183463097, - "p99": 204.99200746417046 + "p50": 336.38399839401245, + "p90": 895.5200016498566, + "p95": 950.3999948501587, + "p99": 3420.479893684387 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 7, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 115.13599753379822, - "p90": 118.75200271606445, - "p95": 120.86399644613266, - "p99": 133.2480013370514 + "p50": 235.20000278949738, + "p90": 256.00001215934753, + "p95": 539.9360060691833, + "p99": 572.7360248565674 }, "combine": { - "p50": 54.75199967622757, - "p90": 56.703999638557434, - "p95": 56.992001831531525, - "p99": 60.127999633550644 + "p50": 287.58400678634644, + "p90": 295.9679961204529, + "p95": 540.0639772415161, + "p99": 600.1600027084351 }, "roundtrip": { - "p50": 161.21600568294525, - "p90": 165.0879979133606, - "p95": 167.00799763202667, - "p99": 186.91200017929077 + "p50": 475.77598690986633, + "p90": 494.04799938201904, + "p95": 776.6079902648926, + "p99": 803.1359910964966 }, "isolatedSum": { - "p50": 169.8879972100258, - "p90": 175.4560023546219, - "p95": 177.85599827766418, - "p99": 193.37600097060204 + "p50": 522.7840095758438, + "p90": 551.9680082798004, + "p95": 1079.9999833106995, + "p99": 1172.8960275650024 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 7, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 116.31999909877777, - "p90": 118.9119964838028, - "p95": 120.70400267839432, - "p99": 126.49600207805634 + "p50": 365.4719889163971, + "p90": 382.88000226020813, + "p95": 667.0719981193542, + "p99": 700.4799842834473 }, "combine": { - "p50": 56.2559999525547, - "p90": 57.88800120353699, - "p95": 58.559998869895935, - "p99": 72.7040022611618 + "p50": 497.44001030921936, + "p90": 772.5120186805725, + "p95": 816.3520097732544, + "p99": 837.2160196304321 }, "roundtrip": { - "p50": 163.00800442695618, - "p90": 166.20799899101257, - "p95": 167.93599724769592, - "p99": 180.86400628089905 + "p50": 834.272027015686, + "p90": 849.5360016822815, + "p95": 1139.5200490951538, + "p99": 1173.9519834518433 }, "isolatedSum": { - "p50": 172.57599905133247, - "p90": 176.79999768733978, - "p95": 179.26400154829025, - "p99": 199.20000433921814 + "p50": 862.9119992256165, + "p90": 1155.3920209407806, + "p95": 1483.4240078926086, + "p99": 1537.6960039138794 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 7, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 116.70400202274323, - "p90": 119.58400160074234, - "p95": 121.76000326871872, - "p99": 145.88800072669983 + "p50": 616.927981376648, + "p90": 901.6000032424927, + "p95": 947.8719830513, + "p99": 985.0239753723145 }, "combine": { - "p50": 56.832000613212585, - "p90": 58.62399935722351, - "p95": 59.007998555898666, - "p99": 66.880002617836 + "p50": 875.328004360199, + "p90": 893.0559754371643, + "p95": 1183.840036392212, + "p99": 1216.70401096344 }, "roundtrip": { - "p50": 165.72800278663635, - "p90": 169.21600699424744, - "p95": 170.8800047636032, - "p99": 185.72799861431122 + "p50": 1467.2960042953491, + "p90": 1478.943943977356, + "p95": 1765.4080390930176, + "p99": 1812.7360343933105 }, "isolatedSum": { - "p50": 173.5360026359558, - "p90": 178.20800095796585, - "p95": 180.7680018246174, - "p99": 212.76800334453583 + "p50": 1492.255985736847, + "p90": 1794.655978679657, + "p95": 2131.712019443512, + "p99": 2201.7279863357544 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 7, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 119.52000111341476, - "p90": 122.72000312805176, - "p95": 124.60800260305405, - "p99": 160.5439931154251 + "p50": 1133.952021598816, + "p90": 1419.3919897079468, + "p95": 1456.928014755249, + "p99": 1488.4480237960815 }, "combine": { - "p50": 59.67999994754791, - "p90": 61.63199990987778, - "p95": 62.24000081419945, - "p99": 65.79200178384781 + "p50": 1633.952021598816, + "p90": 1904.4159650802612, + "p95": 1942.1759843826294, + "p99": 1971.9359874725342 }, "roundtrip": { - "p50": 170.68800330162048, - "p90": 173.98400604724884, - "p95": 175.64800381660461, - "p99": 187.6160055398941 + "p50": 2741.7280673980713, + "p90": 2753.1518936157227, + "p95": 3059.648036956787, + "p99": 3083.904027938843 }, "isolatedSum": { - "p50": 179.20000106096268, - "p90": 184.35200303792953, - "p95": 186.8480034172535, - "p99": 226.33599489927292 + "p50": 2767.904043197632, + "p90": 3323.807954788208, + "p95": 3399.1039991378784, + "p99": 3460.3840112686157 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 7, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8aff0e36", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||8bf55e36863f028", + "colorKey": "gb200_62dbe147", + "comparisonKey": "24459de50e73a419", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:56:17.788357+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8bf55e36863f028", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 140.47999680042267, + "p90": 152.22400426864624, + "p95": 158.1760048866272, + "p99": 166.4000004529953 + }, + "combine": { + "p50": 145.88800072669983, + "p90": 154.62400019168854, + "p95": 156.3200056552887, + "p99": 163.29599916934967 + }, + "roundtrip": { + "p50": 259.8400115966797, + "p90": 268.0320143699646, + "p95": 271.2959945201874, + "p99": 276.2239873409271 + }, + "isolatedSum": { + "p50": 286.3679975271225, + "p90": 306.8480044603348, + "p95": 314.4960105419159, + "p99": 329.69599962234497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 130.68799674510956, - "p90": 134.97599959373474, - "p95": 138.5280042886734, - "p99": 150.84800124168396 + "p50": 275.7120132446289, + "p90": 284.5120131969452, + "p95": 288.2879972457886, + "p99": 296.28801345825195 }, "combine": { - "p50": 72.51200079917908, - "p90": 74.5600014925003, - "p95": 75.23199915885925, - "p99": 79.19999957084656 + "p50": 369.79201436042786, + "p90": 377.27999687194824, + "p95": 379.040002822876, + "p99": 384.2880129814148 }, "roundtrip": { - "p50": 195.10400295257568, - "p90": 199.42399859428406, - "p95": 202.04800367355347, - "p99": 217.95199811458588 + "p50": 610.2079749107361, + "p90": 618.1439757347107, + "p95": 620.5120086669922, + "p99": 625.4400014877319 }, "isolatedSum": { - "p50": 203.19999754428864, - "p90": 209.53600108623505, - "p95": 213.76000344753265, - "p99": 230.04800081253052 + "p50": 645.5040276050568, + "p90": 661.7920100688934, + "p95": 667.3280000686646, + "p99": 680.5760264396667 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 7, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 145.53600549697876, - "p90": 148.70400726795197, - "p95": 150.07999539375305, - "p99": 157.47199952602386 + "p50": 816.6080117225647, + "p90": 829.3759822845459, + "p95": 832.1920037269592, + "p99": 840.7359719276428 }, "combine": { - "p50": 89.6959975361824, - "p90": 92.12800115346909, - "p95": 93.56799721717834, - "p99": 105.12000322341919 + "p50": 1204.800009727478, + "p90": 1208.9279890060425, + "p95": 1212.8640413284302, + "p99": 1217.1200513839722 }, "roundtrip": { - "p50": 228.28799486160278, - "p90": 232.35200345516205, - "p95": 234.23999547958374, - "p99": 251.3279914855957 + "p50": 1997.1200227737427, + "p90": 2019.0720558166504, + "p95": 2026.2401103973389, + "p99": 2036.2560749053955 }, "isolatedSum": { - "p50": 235.23200303316116, - "p90": 240.83200842142105, - "p95": 243.6479926109314, - "p99": 262.59200274944305 + "p50": 2021.4080214500427, + "p90": 2038.3039712905884, + "p95": 2045.0560450553894, + "p99": 2057.856023311615 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -13458,47 +13279,48 @@ ] }, { - "id": "cx-59d44b57", - "identity": "b300|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "b300_c4c63f07", - "comparisonKey": "16e666f429329305", + "id": "cx-9defed89", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb200_3028258e", + "comparisonKey": "3fa6024dd84d5535", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:10.269764+00:00", + "generatedAt": "2026-06-29T13:56:59.647021+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "shape 5120/8/160", + "label": "GB200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 5120, + "hidden": 7168, "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -13506,366 +13328,476 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", - "workloadSource": "canonical-serialized", + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285705053", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285705053", - "createdAt": "2026-06-27T09:52:10.269764+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 111.7120012640953, - "p90": 115.52000045776367, - "p95": 118.17599833011627, - "p99": 129.08799946308136 + "p50": 98.68799895048141, + "p90": 108.2879975438118, + "p95": 110.97600311040878, + "p99": 121.8239963054657 }, "combine": { - "p50": 54.62399870157242, - "p90": 55.93600124120712, - "p95": 56.89600110054016, - "p99": 59.74400043487549 + "p50": 82.2720006108284, + "p90": 96.73599898815155, + "p95": 103.20000350475311, + "p99": 108.19199681282043 }, "roundtrip": { - "p50": 156.0640037059784, - "p90": 160.35200655460358, - "p95": 163.13600540161133, - "p99": 179.967999458313 + "p50": 158.4639996290207, + "p90": 186.3359957933426, + "p95": 198.97599518299103, + "p99": 221.98399901390076 }, "isolatedSum": { - "p50": 166.33599996566772, - "p90": 171.4560016989708, - "p95": 175.07199943065643, - "p99": 188.83199989795685 + "p50": 180.95999956130981, + "p90": 205.02399653196335, + "p95": 214.1760066151619, + "p99": 230.01599311828613 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 215040, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 112.15999722480774, - "p90": 115.7120019197464, - "p95": 117.60000139474869, - "p99": 126.24000012874603 + "p50": 123.83999675512314, + "p90": 136.51199638843536, + "p95": 139.45600390434265, + "p99": 181.21600151062012 }, "combine": { - "p50": 55.64799904823303, - "p90": 57.24800005555153, - "p95": 58.079998940229416, - "p99": 65.11999666690826 + "p50": 127.36000120639801, + "p90": 131.84000551700592, + "p95": 134.36800241470337, + "p99": 141.31200313568115 }, "roundtrip": { - "p50": 158.720001578331, - "p90": 163.07200491428375, - "p95": 167.26399958133698, - "p99": 184.28799510002136 + "p50": 223.51999580860138, + "p90": 243.93600225448608, + "p95": 252.3840069770813, + "p99": 264.51200246810913 }, "isolatedSum": { - "p50": 167.80799627304077, - "p90": 172.96000197529793, - "p95": 175.6800003349781, - "p99": 191.3599967956543 + "p50": 251.19999796152115, + "p90": 268.3520019054413, + "p95": 273.824006319046, + "p99": 322.52800464630127 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 440320, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 114.43199962377548, - "p90": 120.35199999809265, - "p95": 123.74400347471237, - "p99": 146.68799936771393 + "p50": 212.64000236988068, + "p90": 225.69599747657776, + "p95": 234.14400219917297, + "p99": 249.85599517822266 }, "combine": { - "p50": 60.127999633550644, - "p90": 61.85600161552429, - "p95": 63.07200342416763, - "p99": 65.43999910354614 + "p50": 256.0639977455139, + "p90": 266.1440074443817, + "p95": 268.0320143699646, + "p99": 276.3200104236603 }, "roundtrip": { - "p50": 164.99200463294983, - "p90": 170.1119989156723, - "p95": 173.5360026359558, - "p99": 206.7520022392273 + "p50": 451.07200741767883, + "p90": 459.6799910068512, + "p95": 463.8400077819824, + "p99": 725.6320118904114 }, "isolatedSum": { - "p50": 174.55999925732613, - "p90": 182.20800161361694, - "p95": 186.81600689888, - "p99": 212.12799847126007 + "p50": 468.7040001153946, + "p90": 491.8400049209595, + "p95": 502.1760165691376, + "p99": 526.1760056018829 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 870400, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 4, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 114.59200084209442, - "p90": 117.72800236940384, - "p95": 119.4240003824234, - "p99": 133.5040032863617 - }, - "combine": { - "p50": 59.58399921655655, - "p90": 61.40799820423126, - "p95": 62.111999839544296, - "p99": 65.18399715423584 - }, - "roundtrip": { - "p50": 166.17600619792938, - "p90": 170.33599317073822, - "p95": 173.21600019931793, - "p99": 191.48799777030945 + } + ] + }, + { + "id": "cx-9efef357", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb200_71fc8a17", + "comparisonKey": "684eae7793ca35a1", + "schemaVersion": 3, + "generatedAt": "2026-06-29T14:00:56.504573+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.44799810647964, + "p90": 113.18399757146835, + "p95": 116.41599982976913, + "p99": 129.95199859142303 + }, + "combine": { + "p50": 96.19200229644775, + "p90": 105.82400113344193, + "p95": 107.90400207042694, + "p99": 130.62399625778198 + }, + "roundtrip": { + "p50": 175.52000284194946, + "p90": 183.07200074195862, + "p95": 185.2799952030182, + "p99": 190.5599981546402 }, "isolatedSum": { - "p50": 174.17600005865097, - "p90": 179.1360005736351, - "p95": 181.5360002219677, - "p99": 198.68800044059753 + "p50": 200.6400004029274, + "p90": 219.00799870491028, + "p95": 224.32000190019608, + "p99": 260.575994849205 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1735680, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 4, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 115.55200070142746, - "p90": 119.84000355005264, - "p95": 121.79200351238251, - "p99": 141.50400459766388 + "p50": 120.80000340938568, + "p90": 128.57599556446075, + "p95": 131.67999684810638, + "p99": 137.1839940547943 }, "combine": { - "p50": 61.28000095486641, - "p90": 63.231997191905975, - "p95": 63.840001821517944, - "p99": 69.88800317049026 + "p50": 118.65600198507309, + "p90": 122.78400361537933, + "p95": 128.1599998474121, + "p99": 133.82400572299957 }, "roundtrip": { - "p50": 167.1999990940094, - "p90": 171.55200242996216, - "p95": 174.43199455738068, - "p99": 185.12000143527985 + "p50": 214.1759991645813, + "p90": 221.40799462795258, + "p95": 223.7440049648285, + "p99": 229.63200509548187 }, "isolatedSum": { - "p50": 176.83200165629387, - "p90": 183.07200074195862, - "p95": 185.63200533390045, - "p99": 211.39200776815414 + "p50": 239.45600539445877, + "p90": 251.3599991798401, + "p95": 259.8399966955185, + "p99": 271.0079997777939 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3456000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 4, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 120.51200121641159, - "p90": 123.6800029873848, - "p95": 125.59999525547028, - "p99": 131.32800161838531 + "p50": 158.1760048866272, + "p90": 166.24000668525696, + "p95": 169.79199647903442, + "p99": 174.0799993276596 }, "combine": { - "p50": 64.96000289916992, - "p90": 66.94400310516357, - "p95": 67.29599833488464, - "p99": 75.39200037717819 + "p50": 155.68000078201294, + "p90": 159.16800498962402, + "p95": 162.88000345230103, + "p99": 169.24799978733063 }, "roundtrip": { - "p50": 175.20000040531158, - "p90": 179.32799458503723, - "p95": 183.77600610256195, - "p99": 198.62399995326996 + "p50": 286.17599606513977, + "p90": 293.66400837898254, + "p95": 296.2239980697632, + "p99": 299.51998591423035 }, "isolatedSum": { - "p50": 185.4720041155815, - "p90": 190.62400609254837, - "p95": 192.89599359035492, - "p99": 206.7200019955635 + "p50": 313.85600566864014, + "p90": 325.408011674881, + "p95": 332.67199993133545, + "p99": 343.32799911499023 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6988800, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 4, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 131.71200454235077, - "p90": 136.60800457000732, - "p95": 139.13600146770477, - "p99": 152.96000242233276 + "p50": 226.78400576114655, + "p90": 234.40000414848328, + "p95": 236.89599335193634, + "p99": 243.00800263881683 }, "combine": { - "p50": 77.85599678754807, - "p90": 79.99999821186066, - "p95": 80.64000308513641, - "p99": 85.02399921417236 + "p50": 284.0000092983246, + "p90": 291.0720109939575, + "p95": 292.1600043773651, + "p99": 296.671986579895 }, "roundtrip": { - "p50": 201.664000749588, - "p90": 206.4639925956726, - "p95": 208.19200575351715, - "p99": 221.98399901390076 + "p50": 475.8400022983551, + "p90": 484.73599553108215, + "p95": 487.7760112285614, + "p99": 491.61601066589355 }, "isolatedSum": { - "p50": 209.56800132989883, - "p90": 216.60800278186798, - "p95": 219.7760045528412, - "p99": 237.98400163650513 + "p50": 510.78401505947113, + "p90": 525.4720151424408, + "p95": 529.0559977293015, + "p99": 539.6799892187119 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 13987840, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 4, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 151.7760008573532, - "p90": 157.05600380897522, - "p95": 159.7760021686554, - "p99": 171.58399522304535 + "p50": 368.4160113334656, + "p90": 375.36001205444336, + "p95": 377.7279853820801, + "p99": 381.47199153900146 }, "combine": { - "p50": 98.39999675750732, - "p90": 104.38399761915207, - "p95": 108.51199924945831, - "p99": 120.38400024175644 + "p50": 500.15997886657715, + "p90": 503.4880042076111, + "p95": 504.863977432251, + "p99": 510.0160241127014 }, "roundtrip": { - "p50": 242.0479953289032, - "p90": 246.59200012683868, - "p95": 248.51199984550476, - "p99": 264.384001493454 + "p50": 839.9680256843567, + "p90": 846.3360071182251, + "p95": 848.1919765472412, + "p99": 853.3440232276917 }, "isolatedSum": { - "p50": 250.17599761486053, - "p90": 261.4400014281273, - "p95": 268.2880014181137, - "p99": 291.9679954648018 + "p50": 868.5759902000427, + "p90": 878.8480162620544, + "p95": 882.591962814331, + "p99": 891.4880156517029 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 668.287992477417, + "p90": 675.9999990463257, + "p95": 678.7199974060059, + "p99": 685.4400038719177 + }, + "combine": { + "p50": 898.144006729126, + "p90": 905.6959748268127, + "p95": 906.7519903182983, + "p99": 908.6080193519592 + }, + "roundtrip": { + "p50": 1543.455958366394, + "p90": 1638.1440162658691, + "p95": 1646.1759805679321, + "p99": 1760.0959539413452 + }, + "isolatedSum": { + "p50": 1566.431999206543, + "p90": 1581.6959738731384, + "p95": 1585.4719877243042, + "p99": 1594.048023223877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-a0445944", - "identity": "b300|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "e7da15664ffcf0f8", + "id": "cx-06be5389", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||eb906a106a6cb71", + "colorKey": "gb200_d945a181", + "comparisonKey": "029c20e625903daf", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:45.257215+00:00", + "generatedAt": "2026-06-29T13:58:49.951164+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_05", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "MiniMax-M3", + "label": "GB200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -13873,318 +13805,427 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", + "traceSignature": "eb906a106a6cb71", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287498289", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287498289", - "createdAt": "2026-06-27T11:13:45.257215+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 55.03999814391136, - "p90": 57.312000542879105, - "p95": 58.78400057554245, - "p99": 68.12799721956253 + "p50": 127.93600559234619, + "p90": 135.45599579811096, + "p95": 138.40000331401825, + "p99": 145.1520025730133 }, "combine": { - "p50": 56.48000165820122, - "p90": 57.920001447200775, - "p95": 58.720000088214874, - "p99": 66.52799993753433 + "p50": 140.03199338912964, + "p90": 144.70399916172028, + "p95": 146.01600170135498, + "p99": 152.25599706172943 }, "roundtrip": { - "p50": 114.656001329422, - "p90": 116.99199676513672, - "p95": 118.9119964838028, - "p99": 136.19199395179749 + "p50": 244.47999894618988, + "p90": 251.71199440956116, + "p95": 254.14401292800903, + "p99": 259.93600487709045 }, "isolatedSum": { - "p50": 111.51999980211258, - "p90": 115.23200199007988, - "p95": 117.50400066375732, - "p99": 134.65599715709686 + "p50": 267.96799898147583, + "p90": 280.15999495983124, + "p95": 284.41600501537323, + "p99": 297.40799963474274 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 55.96800148487091, - "p90": 58.079998940229416, - "p95": 59.23200026154518, - "p99": 67.07199662923813 + "p50": 251.583993434906, + "p90": 259.20000672340393, + "p95": 262.4959945678711, + "p99": 267.8399980068207 }, "combine": { - "p50": 58.720000088214874, - "p90": 60.06399914622307, - "p95": 60.70400029420853, - "p99": 68.35199892520905 + "p50": 363.96801471710205, + "p90": 368.4479892253876, + "p95": 373.21600317955017, + "p99": 377.56800651550293 }, "roundtrip": { - "p50": 117.88800358772278, - "p90": 120.19199877977371, - "p95": 122.3360002040863, - "p99": 133.760005235672 + "p50": 582.0159912109375, + "p90": 590.2079939842224, + "p95": 592.3839807510376, + "p99": 601.1520028114319 }, "isolatedSum": { - "p50": 114.68800157308578, - "p90": 118.14399808645248, - "p95": 119.93600055575371, - "p99": 135.42399555444717 + "p50": 615.552008152008, + "p90": 627.6479959487915, + "p95": 635.7119977474213, + "p99": 645.4080045223236 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 57.98399820923805, - "p90": 60.15999987721443, - "p95": 61.40799820423126, - "p99": 68.44799965620041 + "p50": 729.1839718818665, + "p90": 736.8959784507751, + "p95": 740.0000095367432, + "p99": 747.2000122070312 }, "combine": { - "p50": 60.896001756191254, - "p90": 62.94400244951248, - "p95": 63.4239986538887, - "p99": 69.023996591568 + "p50": 1165.9200191497803, + "p90": 1170.2079772949219, + "p95": 1173.9200353622437, + "p99": 1177.6000261306763 }, "roundtrip": { - "p50": 121.47200107574463, - "p90": 123.87199699878693, - "p95": 125.05599856376648, - "p99": 135.48800349235535 + "p50": 1869.088053703308, + "p90": 1877.72798538208, + "p95": 1880.1599740982056, + "p99": 1886.3999843597412 }, "isolatedSum": { - "p50": 118.8799999654293, - "p90": 123.10400232672691, - "p95": 124.83199685811996, - "p99": 137.4719962477684 + "p50": 1895.1039910316467, + "p90": 1907.103955745697, + "p95": 1913.9200448989868, + "p99": 1924.8000383377075 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bdd9bb8d", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb200_d826ab8d", + "comparisonKey": "1a323c0d685e8d2e", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:52:19.710696+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.14400684833527, + "p90": 381.21598958969116, + "p95": 413.2159948348999, + "p99": 437.5999867916107 + }, + "combine": { + "p50": 128.86400520801544, + "p90": 384.19198989868164, + "p95": 403.7120044231415, + "p99": 416.22400283813477 + }, + "roundtrip": { + "p50": 225.47200322151184, + "p90": 238.20799589157104, + "p95": 481.0880124568939, + "p99": 537.5040173530579 + }, + "isolatedSum": { + "p50": 255.0080120563507, + "p90": 765.4079794883728, + "p95": 816.9279992580414, + "p99": 853.8239896297455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 59.039998799562454, - "p90": 61.08799949288368, - "p95": 62.3680017888546, - "p99": 72.89600372314453 + "p50": 164.41600024700165, + "p90": 437.79200315475464, + "p95": 472.6719856262207, + "p99": 497.3120093345642 }, "combine": { - "p50": 62.94400244951248, - "p90": 64.41599875688553, - "p95": 65.05600363016129, - "p99": 69.15199756622314 + "p50": 171.9360053539276, + "p90": 408.06400775909424, + "p95": 444.64001059532166, + "p99": 481.0880124568939 }, "roundtrip": { - "p50": 125.08800625801086, - "p90": 127.13600695133209, - "p95": 130.23999333381653, - "p99": 145.9520012140274 + "p50": 308.351993560791, + "p90": 320.51199674606323, + "p95": 572.0639824867249, + "p99": 616.096019744873 }, "isolatedSum": { - "p50": 121.98400124907494, - "p90": 125.50399824976921, - "p95": 127.42400541901588, - "p99": 142.04800128936768 + "p50": 336.35200560092926, + "p90": 845.8560109138489, + "p95": 917.3119962215424, + "p99": 978.4000217914581 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 59.776000678539276, - "p90": 61.91999837756157, - "p95": 63.00800293684006, - "p99": 73.27999919652939 + "p50": 236.38400435447693, + "p90": 246.33599817752838, + "p95": 524.0319967269897, + "p99": 579.0719985961914 }, "combine": { - "p50": 63.551999628543854, - "p90": 65.50399959087372, - "p95": 66.97600334882736, - "p99": 72.03199714422226 + "p50": 292.7039861679077, + "p90": 304.32000756263733, + "p95": 552.5760054588318, + "p99": 583.1040143966675 }, "roundtrip": { - "p50": 126.39999389648438, - "p90": 128.86400520801544, - "p95": 130.3360015153885, - "p99": 143.74400675296783 + "p50": 471.2640047073364, + "p90": 486.7199957370758, + "p95": 773.9840149879456, + "p99": 810.2080225944519 }, "isolatedSum": { - "p50": 123.32800030708313, - "p90": 127.42399796843529, - "p95": 129.98400628566742, - "p99": 145.31199634075165 + "p50": 529.0879905223846, + "p90": 550.6560057401657, + "p95": 1076.6080021858215, + "p99": 1162.176012992859 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 63.1679967045784, - "p90": 65.8240020275116, - "p95": 67.03999638557434, - "p99": 75.48800110816956 + "p50": 362.527996301651, + "p90": 372.1599876880646, + "p95": 657.2800278663635, + "p99": 706.5920233726501 }, "combine": { - "p50": 68.41599941253662, - "p90": 70.81600278615952, - "p95": 71.52000069618225, - "p99": 95.04000097513199 + "p50": 501.3120174407959, + "p90": 775.9680151939392, + "p95": 822.1439719200134, + "p99": 843.1040048599243 }, "roundtrip": { - "p50": 135.96799969673157, - "p90": 138.59200477600098, - "p95": 140.25600254535675, - "p99": 151.32799744606018 + "p50": 833.4720134735107, + "p90": 1112.3199462890625, + "p95": 1156.3199758529663, + "p99": 1182.5920343399048 }, "isolatedSum": { - "p50": 131.58399611711502, - "p90": 136.6400048136711, - "p95": 138.5599970817566, - "p99": 170.52800208330154 + "p50": 863.8400137424469, + "p90": 1148.1280028820038, + "p95": 1479.423999786377, + "p99": 1549.6960282325745 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 70.52800059318542, - "p90": 73.31199944019318, - "p95": 75.3600001335144, - "p99": 84.83199775218964 + "p50": 617.5040006637573, + "p90": 909.6959829330444, + "p95": 947.2320079803467, + "p99": 972.3520278930664 }, "combine": { - "p50": 82.30400085449219, - "p90": 84.19200032949448, - "p95": 85.28000116348267, - "p99": 99.61599856615067 + "p50": 874.2719888687134, + "p90": 911.9679927825928, + "p95": 1204.4800519943237, + "p99": 1231.9999933242798 }, "roundtrip": { - "p50": 163.35999965667725, - "p90": 165.8560037612915, - "p95": 167.71200299263, - "p99": 189.11999464035034 + "p50": 1461.1519575119019, + "p90": 1474.5919704437256, + "p95": 1765.023946762085, + "p99": 1823.1680393218994 }, "isolatedSum": { - "p50": 152.8320014476776, - "p90": 157.50399976968765, - "p95": 160.64000129699707, - "p99": 184.4479963183403 + "p50": 1491.7759895324707, + "p90": 1821.6639757156372, + "p95": 2151.7120599746704, + "p99": 2204.352021217346 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 81.40800148248672, - "p90": 84.09599959850311, - "p95": 85.66399663686752, - "p99": 95.29600292444229 + "p50": 1139.9680376052856, + "p90": 1151.360034942627, + "p95": 1454.751968383789, + "p99": 1494.879961013794 }, "combine": { - "p50": 102.14400291442871, - "p90": 104.3199971318245, - "p95": 105.72800040245056, - "p99": 114.72000181674957 + "p50": 1625.5359649658203, + "p90": 1638.592004776001, + "p95": 1942.4320459365845, + "p99": 1977.4080514907837 }, "roundtrip": { - "p50": 205.9839963912964, - "p90": 208.99200439453125, - "p95": 210.4959934949875, - "p99": 222.04799950122833 + "p50": 2741.7280673980713, + "p90": 2755.6159496307373, + "p95": 3060.800075531006, + "p99": 3102.976083755493 }, "isolatedSum": { - "p50": 183.55200439691544, - "p90": 188.4159967303276, - "p95": 191.39199703931808, - "p99": 210.01600474119186 + "p50": 2765.504002571106, + "p90": 2789.952039718628, + "p95": 3397.1840143203735, + "p99": 3472.2880125045776 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -14192,47 +14233,48 @@ ] }, { - "id": "cx-429a4a40", - "identity": "b300|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_c4c63f07", - "comparisonKey": "fe452cc5767ffbdd", + "id": "cx-17c4723d", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb200_8703b849", + "comparisonKey": "5356d58a72408ddf", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:37.801228+00:00", + "generatedAt": "2026-06-29T14:00:10.030819+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "MiniMax-M3", + "label": "GB200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 288, "routing": "uniform", - "routingLabel": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -14240,318 +14282,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285716223", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285716223", - "createdAt": "2026-06-27T09:52:37.801228+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 114.84800279140472, - "p90": 118.6240017414093, - "p95": 120.51200121641159, - "p99": 149.3760049343109 + "p50": 125.50400197505951, + "p90": 134.5919966697693, + "p95": 137.79200613498688, + "p99": 147.32800424098969 }, "combine": { - "p50": 58.49599838256836, - "p90": 60.22400036454201, - "p95": 60.95999851822853, - "p99": 64.64000046253204 + "p50": 129.2479932308197, + "p90": 133.18400084972382, + "p95": 134.39999520778656, + "p99": 139.55199718475342 }, "roundtrip": { - "p50": 165.0879979133606, - "p90": 168.2880073785782, - "p95": 170.30400037765503, - "p99": 177.34399437904358 + "p50": 225.63199698925018, + "p90": 233.18399488925934, + "p95": 235.71200668811798, + "p99": 239.80799317359924 }, "isolatedSum": { - "p50": 173.34400117397308, - "p90": 178.8480021059513, - "p95": 181.47199973464012, - "p99": 214.01600539684296 + "p50": 254.7519952058792, + "p90": 267.7759975194931, + "p95": 272.19200134277344, + "p99": 286.8800014257431 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 115.167997777462, - "p90": 118.17599833011627, - "p95": 120.06399780511856, - "p99": 135.16800105571747 + "p50": 162.78399527072906, + "p90": 171.55200242996216, + "p95": 174.01599884033203, + "p99": 180.03199994564056 }, "combine": { - "p50": 59.55199897289276, - "p90": 61.15199998021126, - "p95": 62.04799935221672, - "p99": 64.31999802589417 + "p50": 169.18399930000305, + "p90": 176.60799622535706, + "p95": 179.00800704956055, + "p99": 182.5920045375824 }, "roundtrip": { - "p50": 164.57599401474, - "p90": 168.35199296474457, - "p95": 170.46399414539337, - "p99": 185.47199666500092 + "p50": 305.7920038700104, + "p90": 313.85600566864014, + "p95": 316.1279857158661, + "p99": 320.44801115989685 }, "isolatedSum": { - "p50": 174.71999675035477, - "p90": 179.32799831032753, - "p95": 182.11199715733528, - "p99": 199.48799908161163 + "p50": 331.9679945707321, + "p90": 348.1599986553192, + "p95": 353.0240058898926, + "p99": 362.62400448322296 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 118.75200271606445, - "p90": 124.32000041007996, - "p95": 128.80000472068787, - "p99": 145.56799829006195 + "p50": 235.48799753189087, + "p90": 242.3039972782135, + "p95": 244.6720004081726, + "p99": 253.82399559020996 }, "combine": { - "p50": 62.68800050020218, - "p90": 64.41599875688553, - "p95": 65.0240033864975, - "p99": 73.82400333881378 + "p50": 289.72798585891724, + "p90": 294.94398832321167, + "p95": 297.85600304603577, + "p99": 303.1359910964966 }, "roundtrip": { - "p50": 170.6559956073761, - "p90": 174.6560037136078, - "p95": 176.83200538158417, - "p99": 186.88000738620758 + "p50": 470.5600142478943, + "p90": 479.5520007610321, + "p95": 482.56000876426697, + "p99": 487.0719909667969 }, "isolatedSum": { - "p50": 181.44000321626663, - "p90": 188.73599916696548, - "p95": 193.82400810718536, - "p99": 219.39200162887573 + "p50": 525.2159833908081, + "p90": 537.2479856014252, + "p95": 542.5280034542084, + "p99": 556.9599866867065 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 118.04799735546112, - "p90": 121.2799996137619, - "p95": 123.61600250005722, - "p99": 135.3600025177002 + "p50": 359.20000076293945, + "p90": 367.0719861984253, + "p95": 369.4399893283844, + "p99": 376.15999579429626 }, "combine": { - "p50": 63.231997191905975, - "p90": 64.99200314283371, - "p95": 65.24799764156342, - "p99": 72.51200079917908 + "p50": 498.879998922348, + "p90": 504.09597158432007, + "p95": 507.58397579193115, + "p99": 511.58398389816284 }, "roundtrip": { - "p50": 172.54400253295898, - "p90": 176.15999281406403, - "p95": 177.59999632835388, - "p99": 187.51999735832214 + "p50": 824.5440125465393, + "p90": 832.1920037269592, + "p95": 834.0799808502197, + "p99": 837.6320004463196 }, "isolatedSum": { - "p50": 181.2799945473671, - "p90": 186.2720027565956, - "p95": 188.86400014162064, - "p99": 207.87200331687927 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 118.9119964838028, - "p90": 122.94399738311768, - "p95": 127.42400169372559, - "p99": 143.48800480365753 - }, - "combine": { - "p50": 64.35199826955795, - "p90": 65.79200178384781, - "p95": 66.68800115585327, - "p99": 70.27199864387512 - }, - "roundtrip": { - "p50": 173.40800166130066, - "p90": 176.83200538158417, - "p95": 178.5919964313507, - "p99": 190.7840073108673 - }, - "isolatedSum": { - "p50": 183.26399475336075, - "p90": 188.73599916696548, - "p95": 194.11200284957886, - "p99": 213.76000344753265 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 124.9919980764389, - "p90": 128.1599998474121, - "p95": 130.23999333381653, - "p99": 156.5759927034378 - }, - "combine": { - "p50": 68.64000111818314, - "p90": 70.14399766921997, - "p95": 70.78400254249573, - "p99": 75.39200037717819 - }, - "roundtrip": { - "p50": 185.56800484657288, - "p90": 189.18399512767792, - "p95": 191.52000546455383, - "p99": 204.83200252056122 - }, - "isolatedSum": { - "p50": 193.63199919462204, - "p90": 198.30399751663208, - "p95": 201.02399587631226, - "p99": 231.967993080616 + "p50": 858.0799996852875, + "p90": 871.1679577827454, + "p95": 877.0239651203156, + "p99": 887.7439796924591 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 135.42400300502777, - "p90": 139.5840048789978, - "p95": 143.45599710941315, - "p99": 153.6960005760193 + "p50": 615.4239773750305, + "p90": 623.520016670227, + "p95": 625.760018825531, + "p99": 631.1360001564026 }, "combine": { - "p50": 82.75199681520462, - "p90": 85.02399921417236, - "p95": 85.85599809885025, - "p99": 96.19200229644775 + "p50": 872.5759983062744, + "p90": 881.7600011825562, + "p95": 883.4879994392395, + "p99": 887.5839710235596 }, "roundtrip": { - "p50": 210.207998752594, - "p90": 215.2319997549057, - "p95": 217.6000028848648, - "p99": 234.72000658512115 + "p50": 1458.400011062622, + "p90": 1466.6240215301514, + "p95": 1469.4080352783203, + "p99": 1476.1919975280762 }, "isolatedSum": { - "p50": 218.1759998202324, - "p90": 224.60800409317017, - "p95": 229.3119952082634, - "p99": 249.88800287246704 + "p50": 1487.999975681305, + "p90": 1505.2800178527832, + "p95": 1509.2480182647705, + "p99": 1518.7199711799622 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 157.151997089386, - "p90": 161.02400422096252, - "p95": 163.29599916934967, - "p99": 173.0560064315796 + "p50": 1142.8799629211426, + "p90": 1149.664044380188, + "p95": 1152.1600484848022, + "p99": 1156.7039489746094 }, "combine": { - "p50": 102.1760031580925, - "p90": 104.35199737548828, - "p95": 105.43999820947647, - "p99": 116.06399714946747 + "p50": 1636.5760564804077, + "p90": 1647.3599672317505, + "p95": 1649.183988571167, + "p99": 1657.5679779052734 }, "roundtrip": { - "p50": 253.02401185035706, - "p90": 257.60000944137573, - "p95": 260.8320116996765, - "p99": 278.9439857006073 + "p50": 2753.82399559021, + "p90": 2762.6240253448486, + "p95": 2765.727996826172, + "p99": 2771.0399627685547 }, "isolatedSum": { - "p50": 259.3280002474785, - "p90": 265.3760015964508, - "p95": 268.73599737882614, - "p99": 289.12000358104706 + "p50": 2779.4560194015503, + "p90": 2797.0240116119385, + "p95": 2801.3440370559692, + "p99": 2814.271926879883 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -14559,366 +14527,293 @@ ] }, { - "id": "cx-c27e2cad", - "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "ac13ebc2bb2c560a", + "id": "cx-c9c3e331", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb200_62fd6d04", + "comparisonKey": "dc09c891587fd8b9", "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:01.213105+00:00", + "generatedAt": "2026-06-29T13:54:20.997256+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", + "label": "GB200 EP8 · deepep · bf16 · zipf", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28286436120", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120", - "createdAt": "2026-06-27T10:26:01.213105+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 56.2559999525547, - "p90": 58.78400057554245, - "p95": 61.28000095486641, - "p99": 77.69600301980972 - }, - "combine": { - "p50": 61.983998864889145, - "p90": 78.8159966468811, - "p95": 86.87999844551086, - "p99": 95.10400146245956 - }, - "roundtrip": { - "p50": 120.44800072908401, - "p90": 123.19999933242798, - "p95": 125.82400441169739, - "p99": 144.03200149536133 - }, - "isolatedSum": { - "p50": 118.23999881744385, - "p90": 137.59999722242355, - "p95": 148.15999940037727, - "p99": 172.8000044822693 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 57.151999324560165, - "p90": 59.039998799562454, - "p95": 59.99999865889549, - "p99": 73.11999797821045 - }, - "combine": { - "p50": 64.54399973154068, - "p90": 66.17599725723267, - "p95": 67.16799736022949, - "p99": 74.23999905586243 - }, - "roundtrip": { - "p50": 124.15999919176102, - "p90": 126.39999389648438, - "p95": 129.60000336170197, - "p99": 138.49599659442902 - }, - "isolatedSum": { - "p50": 121.69599905610085, - "p90": 125.21599605679512, - "p95": 127.16799601912498, - "p99": 147.35999703407288 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 58.88000130653381, - "p90": 61.37600168585777, - "p95": 63.10400366783142, - "p99": 91.10400080680847 + "p50": 126.52799487113953, + "p90": 361.1840009689331, + "p95": 402.43199467658997, + "p99": 425.8880019187927 }, "combine": { - "p50": 67.35999882221222, - "p90": 69.50400024652481, - "p95": 70.14399766921997, - "p99": 86.30400151014328 + "p50": 133.15199315547943, + "p90": 365.2159869670868, + "p95": 404.54399585723877, + "p99": 423.2960045337677 }, "roundtrip": { - "p50": 127.68000364303589, - "p90": 130.14400005340576, - "p95": 131.55199587345123, - "p99": 137.08800077438354 + "p50": 234.40000414848328, + "p90": 251.0719895362854, + "p95": 486.62400245666504, + "p99": 529.151976108551 }, "isolatedSum": { - "p50": 126.24000012874603, - "p90": 130.88000193238258, - "p95": 133.2480013370514, - "p99": 177.40800231695175 + "p50": 259.67998802661896, + "p90": 726.3999879360199, + "p95": 806.9759905338287, + "p99": 849.1840064525604 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 60.03199890255928, - "p90": 62.30400130152702, - "p95": 63.26399743556976, - "p99": 69.2799985408783 + "p50": 162.1759980916977, + "p90": 430.07999658584595, + "p95": 449.7919976711273, + "p99": 491.5199875831604 }, "combine": { - "p50": 68.76800209283829, - "p90": 70.46400010585785, - "p95": 71.3919997215271, - "p99": 87.74399757385254 + "p50": 183.23199450969696, + "p90": 452.60798931121826, + "p95": 461.2799882888794, + "p99": 474.91198778152466 }, "roundtrip": { - "p50": 130.62399625778198, - "p90": 133.08799266815186, - "p95": 134.94400680065155, - "p99": 141.88799262046814 + "p50": 320.70401310920715, + "p90": 572.9600191116333, + "p95": 596.1920022964478, + "p99": 618.5280084609985 }, "isolatedSum": { - "p50": 128.80000099539757, - "p90": 132.76800140738487, - "p95": 134.65599715709686, - "p99": 157.02399611473083 + "p50": 345.40799260139465, + "p90": 882.6879858970642, + "p95": 911.0719859600067, + "p99": 966.4319753646851 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 61.28000095486641, - "p90": 63.551999628543854, - "p95": 64.89600241184235, - "p99": 75.58400183916092 + "p50": 231.00799322128296, + "p90": 476.1599898338318, + "p95": 523.0720043182373, + "p99": 548.192024230957 }, "combine": { - "p50": 69.47200000286102, - "p90": 71.45600020885468, - "p95": 72.38399982452393, - "p99": 76.67200267314911 + "p50": 353.11999917030334, + "p90": 600.1920104026794, + "p95": 622.7840185165405, + "p99": 637.1520161628723 }, "roundtrip": { - "p50": 132.9919993877411, - "p90": 135.55200397968292, - "p95": 137.37599551677704, - "p99": 149.63200688362122 + "p50": 551.3920187950134, + "p90": 824.5120048522949, + "p95": 847.648024559021, + "p99": 2337.5680446624756 }, "isolatedSum": { - "p50": 130.75200095772743, - "p90": 135.00799983739853, - "p95": 137.28000223636627, - "p99": 152.25600451231003 + "p50": 584.1279923915863, + "p90": 1076.3520002365112, + "p95": 1145.8560228347778, + "p99": 1185.3440403938293 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 64.2239972949028, - "p90": 66.39999896287918, - "p95": 67.87200272083282, - "p99": 82.8159973025322 + "p50": 378.59201431274414, + "p90": 640.2239799499512, + "p95": 671.3280081748962, + "p99": 694.9759721755981 }, "combine": { - "p50": 75.39200037717819, - "p90": 77.02399790287018, - "p95": 77.72800326347351, - "p99": 85.82399785518646 + "p50": 634.4000101089478, + "p90": 651.8719792366028, + "p95": 928.0319809913635, + "p99": 957.0879936218262 }, "roundtrip": { - "p50": 145.37599682807922, - "p90": 147.8399932384491, - "p95": 148.83199334144592, - "p99": 160.41600704193115 + "p50": 977.3439764976501, + "p90": 1243.1360483169556, + "p95": 1284.4799757003784, + "p99": 1316.5760040283203 }, "isolatedSum": { - "p50": 139.615997672081, - "p90": 143.42399686574936, - "p95": 145.60000598430634, - "p99": 168.63999515771866 + "p50": 1012.9920244216919, + "p90": 1292.095959186554, + "p95": 1599.3599891662598, + "p99": 1652.0639657974243 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 72.9919970035553, - "p90": 75.6480023264885, - "p95": 76.89599692821503, - "p99": 89.79199826717377 + "p50": 700.0640034675598, + "p90": 935.2319836616516, + "p95": 967.5840139389038, + "p99": 997.6639747619629 }, "combine": { - "p50": 89.24800157546997, - "p90": 91.2960022687912, - "p95": 92.99200028181076, - "p99": 104.76800054311752 + "p50": 1141.6319608688354, + "p90": 1441.1519765853882, + "p95": 1468.991994857788, + "p99": 1488.927960395813 }, "roundtrip": { - "p50": 173.92000555992126, - "p90": 176.9919991493225, - "p95": 179.1040003299713, - "p99": 198.08000326156616 + "p50": 1814.2720460891724, + "p90": 1917.3760414123535, + "p95": 2131.6800117492676, + "p99": 2175.1039028167725 }, "isolatedSum": { - "p50": 162.23999857902527, - "p90": 166.9440045952797, - "p95": 169.8879972100258, - "p99": 194.5599988102913 + "p50": 1841.6959643363953, + "p90": 2376.38396024704, + "p95": 2436.576008796692, + "p99": 2486.591935157776 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 83.90399813652039, - "p90": 86.65599673986435, - "p95": 87.96799927949905, - "p99": 94.2080020904541 + "p50": 1366.047978401184, + "p90": 1532.0639610290527, + "p95": 1574.5279788970947, + "p99": 1611.232042312622 }, "combine": { - "p50": 110.20799726247787, - "p90": 112.92800307273865, - "p95": 113.88800293207169, - "p99": 120.92799693346024 + "p50": 2185.5039596557617, + "p90": 2457.98397064209, + "p95": 2500.159978866577, + "p99": 2538.048028945923 }, "roundtrip": { - "p50": 220.19200026988983, - "p90": 223.4240025281906, - "p95": 224.99200701713562, - "p99": 245.08799612522125 + "p50": 3528.8639068603516, + "p90": 3623.5198974609375, + "p95": 3858.8480949401855, + "p99": 3893.8560485839844 }, "isolatedSum": { - "p50": 194.11199539899826, - "p90": 199.583999812603, - "p95": 201.85600221157074, - "p99": 215.13599902391434 + "p50": 3551.551938056946, + "p90": 3990.0479316711426, + "p95": 4074.687957763672, + "p99": 4149.280071258545 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -14926,47 +14821,48 @@ ] }, { - "id": "cx-669dd02d", - "identity": "b300|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_c4c63f07", - "comparisonKey": "564ae99a5e9997e8", + "id": "cx-d88669b5", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||e47f9de18e6cabe", + "colorKey": "gb200_62fd6d04", + "comparisonKey": "dc09c891587fd8b9", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:49.099200+00:00", + "generatedAt": "2026-06-29T13:57:23.040418+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", + "label": "GB200 EP8 · deepep · bf16 · zipf", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -14974,317 +14870,315 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "e47f9de18e6cabe", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285671692", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285671692", - "createdAt": "2026-06-27T09:50:49.099200+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 112.35199868679047, - "p90": 115.87200313806534, - "p95": 118.59200149774551, - "p99": 133.215993642807 - }, - "combine": { - "p50": 62.33600154519081, - "p90": 64.35199826955795, - "p95": 64.7680014371872, - "p99": 68.4799998998642 - }, - "roundtrip": { - "p50": 164.92800414562225, - "p90": 168.06399822235107, - "p95": 170.27199268341064, - "p99": 182.6239973306656 - }, - "isolatedSum": { - "p50": 174.68800023198128, - "p90": 180.2240014076233, - "p95": 183.3600029349327, - "p99": 201.6959935426712 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 112.28799819946289, - "p90": 116.09599739313126, - "p95": 119.6800023317337, - "p99": 136.4479959011078 - }, - "combine": { - "p50": 62.39999830722809, - "p90": 64.15999680757523, - "p95": 64.38399851322174, - "p99": 65.92000275850296 - }, - "roundtrip": { - "p50": 167.26399958133698, - "p90": 169.76000368595123, - "p95": 172.4800020456314, - "p99": 185.92000007629395 - }, - "isolatedSum": { - "p50": 174.68799650669098, - "p90": 180.25599420070648, - "p95": 184.06400084495544, - "p99": 202.36799865961075 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 113.92000317573547, - "p90": 117.3119992017746, - "p95": 121.21599912643433, - "p99": 127.68000364303589 + "p50": 125.15200674533844, + "p90": 133.91999900341034, + "p95": 136.86400651931763, + "p99": 145.6640064716339 }, "combine": { - "p50": 63.680000603199005, - "p90": 65.40799885988235, - "p95": 65.95200300216675, - "p99": 78.78399640321732 + "p50": 135.1040005683899, + "p90": 143.2960033416748, + "p95": 144.9279934167862, + "p99": 153.85599434375763 }, "roundtrip": { - "p50": 168.35199296474457, - "p90": 172.35200107097626, - "p95": 174.43199455738068, - "p99": 184.4480037689209 + "p50": 237.88799345493317, + "p90": 246.2719976902008, + "p95": 248.44799935817719, + "p99": 254.20799851417542 }, "isolatedSum": { - "p50": 177.60000377893448, - "p90": 182.71999806165695, - "p95": 187.16800212860107, - "p99": 206.4640000462532 + "p50": 260.25600731372833, + "p90": 277.21600234508514, + "p95": 281.7919999361038, + "p99": 299.52000081539154 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 115.167997777462, - "p90": 118.9119964838028, - "p95": 121.50400131940842, - "p99": 135.26399433612823 + "p50": 235.45600473880768, + "p90": 243.16799640655518, + "p95": 245.728000998497, + "p99": 249.91999566555023 }, "combine": { - "p50": 66.27199798822403, - "p90": 67.4239993095398, - "p95": 68.35199892520905, - "p99": 74.17599856853485 + "p50": 353.5679876804352, + "p90": 363.1359934806824, + "p95": 365.1520013809204, + "p99": 373.05599451065063 }, "roundtrip": { - "p50": 172.0000058412552, - "p90": 174.68799650669098, - "p95": 176.83200538158417, - "p99": 191.6159987449646 + "p50": 547.0399856567383, + "p90": 556.6400289535522, + "p95": 559.2960119247437, + "p99": 571.4560151100159 }, "isolatedSum": { - "p50": 181.43999576568604, - "p90": 186.3359957933426, - "p95": 189.85600024461746, - "p99": 209.4399929046631 + "p50": 589.0239924192429, + "p90": 606.3039898872375, + "p95": 610.8800023794174, + "p99": 622.9759901762009 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 115.9679964184761, - "p90": 118.97599697113037, - "p95": 120.7360029220581, - "p99": 130.68799674510956 + "p50": 706.4639925956726, + "p90": 716.1920070648193, + "p95": 719.2639708518982, + "p99": 726.751983165741 }, "combine": { - "p50": 67.1359971165657, - "p90": 68.9919963479042, - "p95": 69.60000097751617, - "p99": 86.75199747085571 + "p50": 1146.2080478668213, + "p90": 1154.3680429458618, + "p95": 1155.6799411773682, + "p99": 1159.9359512329102 }, "roundtrip": { - "p50": 174.94399845600128, - "p90": 178.17600071430206, - "p95": 179.77599799633026, - "p99": 183.58400464057922 + "p50": 1824.2559432983398, + "p90": 1834.5919847488403, + "p95": 1839.0400409698486, + "p99": 1851.4879941940308 }, "isolatedSum": { - "p50": 183.1039935350418, - "p90": 187.96799331903458, - "p95": 190.33600389957428, - "p99": 217.43999421596527 + "p50": 1852.672040462494, + "p90": 1870.5600500106812, + "p95": 1874.9439120292664, + "p99": 1886.6879343986511 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-acdb86d4", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||9014f8b812bd39e", + "colorKey": "gb200_8855aa26", + "comparisonKey": "971950c12559c2cf", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:58:06.263347+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "9014f8b812bd39e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 122.65600264072418, - "p90": 126.91199779510498, - "p95": 131.42399489879608, - "p99": 145.53600549697876 + "p50": 117.69600212574005, + "p90": 125.44000148773193, + "p95": 128.86400520801544, + "p99": 136.9599997997284 }, "combine": { - "p50": 71.6480016708374, - "p90": 73.44000041484833, - "p95": 73.88799637556076, - "p99": 87.55200356245041 + "p50": 130.14400005340576, + "p90": 133.85599851608276, + "p95": 136.54400408267975, + "p99": 141.02399349212646 }, "roundtrip": { - "p50": 186.81600689888, - "p90": 191.77600741386414, - "p95": 198.08000326156616, - "p99": 232.44799673557281 + "p50": 224.89599883556366, + "p90": 231.64799809455872, + "p95": 234.72000658512115, + "p99": 238.5600060224533 }, "isolatedSum": { - "p50": 194.30400431156158, - "p90": 200.3519982099533, - "p95": 205.31199127435684, - "p99": 233.08800905942917 + "p50": 247.8400021791458, + "p90": 259.2960000038147, + "p95": 265.4080092906952, + "p99": 277.98399329185486 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 136.60800457000732, - "p90": 141.34399592876434, - "p95": 144.57599818706512, - "p99": 157.4079990386963 + "p50": 205.72799444198608, + "p90": 212.41599321365356, + "p95": 214.91199731826782, + "p99": 220.92799842357635 }, "combine": { - "p50": 87.3280018568039, - "p90": 89.4400030374527, - "p95": 89.91999924182892, - "p99": 97.98400104045868 + "p50": 329.0880024433136, + "p90": 336.92800998687744, + "p95": 339.55198526382446, + "p99": 342.5920009613037 }, "roundtrip": { - "p50": 214.4639939069748, - "p90": 220.15999257564545, - "p95": 224.35200214385986, - "p99": 243.23199689388275 + "p50": 506.3040256500244, + "p90": 515.1039958000183, + "p95": 518.3680057525635, + "p99": 527.6479721069336 }, "isolatedSum": { - "p50": 223.93600642681122, - "p90": 230.78399896621704, - "p95": 234.49599742889404, - "p99": 255.39200007915497 + "p50": 534.8159968852997, + "p90": 549.344003200531, + "p95": 554.4639825820923, + "p99": 563.5199993848801 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 159.84000265598297, - "p90": 163.64799439907074, - "p95": 165.56799411773682, - "p99": 176.15999281406403 + "p50": 619.3919777870178, + "p90": 629.8879981040955, + "p95": 634.1120004653931, + "p99": 641.1200165748596 }, "combine": { - "p50": 108.38399827480316, - "p90": 110.68800091743469, - "p95": 111.96800321340561, - "p99": 118.72000247240067 + "p50": 1179.7120571136475, + "p90": 1187.4560117721558, + "p95": 1189.568042755127, + "p99": 1192.639946937561 }, "roundtrip": { - "p50": 262.08001375198364, - "p90": 266.30398631095886, - "p95": 270.81599831581116, - "p99": 283.6799919605255 + "p50": 1733.8240146636963, + "p90": 1743.3279752731323, + "p95": 1746.2719678878784, + "p99": 1752.73597240448 }, "isolatedSum": { - "p50": 268.22400093078613, - "p90": 274.33599531650543, - "p95": 277.5359973311424, - "p99": 294.8799952864647 + "p50": 1799.1040349006653, + "p90": 1817.3440098762512, + "p95": 1823.68004322052, + "p99": 1833.7599635124207 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -15293,47 +15187,48 @@ ] }, { - "id": "cx-67bd51f4", - "identity": "b300|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "df0e0b78e56d7652", + "id": "cx-83202be8", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb200_10fda6e8", + "comparisonKey": "36d11099a6c7305e", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:03.421071+00:00", + "generatedAt": "2026-06-29T13:51:26.745976+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_17", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Kimi-K2", + "label": "GB200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -15341,318 +15236,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287503879", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503879", - "createdAt": "2026-06-27T11:14:03.421071+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 56.63999915122986, - "p90": 58.43200162053108, - "p95": 59.167999774217606, - "p99": 67.4239993095398 + "p50": 125.5359947681427, + "p90": 379.2319893836975, + "p95": 407.99999237060547, + "p99": 436.2240135669708 }, "combine": { - "p50": 59.67999994754791, - "p90": 61.24800071120262, - "p95": 62.463998794555664, - "p99": 73.27999919652939 + "p50": 125.11999905109406, + "p90": 138.40000331401825, + "p95": 396.06401324272156, + "p99": 415.583997964859 }, "roundtrip": { - "p50": 119.80800330638885, - "p90": 122.78400361537933, - "p95": 129.63199615478516, - "p99": 147.74399995803833 + "p50": 222.30400145053864, + "p90": 448.4480023384094, + "p95": 486.9120121002197, + "p99": 527.7760028839111 }, "isolatedSum": { - "p50": 116.31999909877777, - "p90": 119.6800023317337, - "p95": 121.63199856877327, - "p99": 140.70399850606918 + "p50": 250.65599381923676, + "p90": 517.6319926977158, + "p95": 804.064005613327, + "p99": 851.8080115318298 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 57.24800005555153, - "p90": 59.23200026154518, - "p95": 59.87200140953064, - "p99": 67.55200028419495 + "p50": 163.35999965667725, + "p90": 440.8319890499115, + "p95": 466.5600061416626, + "p99": 488.2560074329376 }, "combine": { - "p50": 60.99199876189232, - "p90": 62.880001962184906, - "p95": 63.26399743556976, - "p99": 65.43999910354614 + "p50": 168.86399686336517, + "p90": 416.128009557724, + "p95": 441.8559968471527, + "p99": 466.94400906562805 }, "roundtrip": { - "p50": 121.5360015630722, - "p90": 123.87199699878693, - "p95": 125.44000148773193, - "p99": 139.74399864673615 + "p50": 302.047997713089, + "p90": 568.3519840240479, + "p95": 588.096022605896, + "p99": 614.5280003547668 }, "isolatedSum": { - "p50": 118.23999881744385, - "p90": 122.11200222373009, - "p95": 123.1359988451004, - "p99": 132.9919993877411 + "p50": 332.2239965200424, + "p90": 856.9599986076355, + "p95": 908.4160029888153, + "p99": 955.2000164985657 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 59.23200026154518, - "p90": 61.43999844789505, - "p95": 62.3680017888546, - "p99": 68.00000369548798 + "p50": 231.55200481414795, + "p90": 276.63999795913696, + "p95": 530.1759839057922, + "p99": 586.4959955215454 }, "combine": { - "p50": 65.05600363016129, - "p90": 67.07199662923813, - "p95": 67.391999065876, - "p99": 71.55200093984604 + "p50": 294.8800027370453, + "p90": 542.0479774475098, + "p95": 578.0159831047058, + "p99": 598.4640121459961 }, "roundtrip": { - "p50": 128.03199887275696, - "p90": 133.88800621032715, - "p95": 158.36800634860992, - "p99": 190.0160014629364 + "p50": 472.31999039649963, + "p90": 489.9199903011322, + "p95": 774.4640111923218, + "p99": 814.079999923706 }, "isolatedSum": { - "p50": 124.28800389170647, - "p90": 128.51199507713318, - "p95": 129.7600008547306, - "p99": 139.55200463533401 + "p50": 526.4320075511932, + "p90": 818.6879754066467, + "p95": 1108.191967010498, + "p99": 1184.9600076675415 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 59.776000678539276, - "p90": 61.85600161552429, - "p95": 62.72000074386597, - "p99": 69.40799951553345 - }, - "combine": { - "p50": 65.60000032186508, - "p90": 67.45599955320358, - "p95": 68.7360018491745, - "p99": 75.80800354480743 - }, - "roundtrip": { - "p50": 128.83199751377106, - "p90": 131.71200454235077, - "p95": 135.42400300502777, - "p99": 151.0079950094223 - }, - "isolatedSum": { - "p50": 125.37600100040436, - "p90": 129.31200116872787, - "p95": 131.45600259304047, - "p99": 145.21600306034088 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 61.184000223875046, - "p90": 63.45599889755249, - "p95": 65.47199934720993, - "p99": 85.85599809885025 + "p50": 361.7919981479645, + "p90": 375.61601400375366, + "p95": 677.344024181366, + "p99": 708.9920043945312 }, "combine": { - "p50": 67.35999882221222, - "p90": 69.40799951553345, - "p95": 70.46400010585785, - "p99": 73.79200309515 + "p50": 503.10397148132324, + "p90": 519.2319750785828, + "p95": 810.2399706840515, + "p99": 847.4559783935547 }, "roundtrip": { - "p50": 131.58400356769562, - "p90": 134.11200046539307, - "p95": 135.42400300502777, - "p99": 145.53600549697876 + "p50": 838.1440043449402, + "p90": 1135.0079774856567, + "p95": 1168.6079502105713, + "p99": 1194.815993309021 }, "isolatedSum": { - "p50": 128.54399904608727, - "p90": 132.86399841308594, - "p95": 135.93599945306778, - "p99": 159.64800119400024 + "p50": 864.8959696292877, + "p90": 894.8479890823364, + "p95": 1487.5839948654175, + "p99": 1556.447982788086 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 64.00000303983688, - "p90": 65.60000032186508, - "p95": 66.65600091218948, - "p99": 75.74400305747986 - }, - "combine": { - "p50": 73.27999919652939, - "p90": 75.23199915885925, - "p95": 75.55200159549713, - "p99": 79.29600030183792 - }, - "roundtrip": { - "p50": 144.48000490665436, - "p90": 147.0080018043518, - "p95": 147.93600142002106, - "p99": 156.89599514007568 - }, - "isolatedSum": { - "p50": 137.28000223636627, - "p90": 140.83199948072433, - "p95": 142.20800250768661, - "p99": 155.04000335931778 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 72.38399982452393, - "p90": 74.36800003051758, - "p95": 75.29599964618683, - "p99": 83.99999886751175 + "p50": 623.6799955368042, + "p90": 637.0239853858948, + "p95": 947.8399753570557, + "p99": 981.4079999923706 }, "combine": { - "p50": 88.22400122880936, - "p90": 90.11200070381165, - "p95": 91.58399701118469, - "p99": 97.98400104045868 + "p50": 895.2000141143799, + "p90": 907.263994216919, + "p95": 1206.112027168274, + "p99": 1238.5599613189697 }, "roundtrip": { - "p50": 173.8239973783493, - "p90": 176.54399573802948, - "p95": 177.37600207328796, - "p99": 183.67999792099 + "p50": 1491.6479587554932, + "p90": 1501.9840002059937, + "p95": 1796.447992324829, + "p99": 1847.6159572601318 }, "isolatedSum": { - "p50": 160.60800105333328, - "p90": 164.48000073432922, - "p95": 166.87999665737152, - "p99": 181.98399990797043 + "p50": 1518.880009651184, + "p90": 1544.2879796028137, + "p95": 2153.9520025253296, + "p99": 2219.9679613113403 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 86.36800199747086, - "p90": 89.37600255012512, - "p95": 90.52799642086029, - "p99": 98.11200201511383 + "p50": 1165.6320095062256, + "p90": 1447.4560022354126, + "p95": 1480.1599979400635, + "p99": 1516.0000324249268 }, "combine": { - "p50": 108.70400071144104, - "p90": 111.26399785280228, - "p95": 112.35199868679047, - "p99": 118.81600320339203 + "p50": 1694.4639682769775, + "p90": 1978.559970855713, + "p95": 2011.904001235962, + "p99": 2036.03196144104 }, "roundtrip": { - "p50": 222.59199619293213, - "p90": 226.52800381183624, - "p95": 229.5680046081543, - "p99": 250.5599856376648 + "p50": 2829.9200534820557, + "p90": 2843.2960510253906, + "p95": 3127.7120113372803, + "p99": 3177.40797996521 }, "isolatedSum": { - "p50": 195.0720027089119, - "p90": 200.6400004029274, - "p95": 202.87999510765076, - "p99": 216.92800521850586 + "p50": 2860.095977783203, + "p90": 3426.0159730911255, + "p95": 3492.0639991760254, + "p99": 3552.031993865967 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -15660,47 +15481,48 @@ ] }, { - "id": "cx-4e513884", - "identity": "b300|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "b300_c4c63f07", - "comparisonKey": "cf47e1b064e2e435", + "id": "cx-2d323e00", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb200_0cd6b029", + "comparisonKey": "21940cb240b28c01", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:14.282258+00:00", + "generatedAt": "2026-06-29T14:03:02.250790+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_05", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Kimi-K2", + "label": "GB200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -15708,318 +15530,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285682409", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285682409", - "createdAt": "2026-06-27T09:51:14.282258+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 117.50400066375732, - "p90": 121.0239976644516, - "p95": 123.03999811410904, - "p99": 134.91199910640717 - }, - "combine": { - "p50": 60.19200012087822, - "p90": 61.72800064086914, - "p95": 62.52799928188324, - "p99": 66.94400310516357 - }, - "roundtrip": { - "p50": 167.87199676036835, - "p90": 171.55200242996216, - "p95": 176.09600722789764, - "p99": 186.0799938440323 - }, - "isolatedSum": { - "p50": 177.69600078463554, - "p90": 182.75199830532074, - "p95": 185.56799739599228, - "p99": 201.85600221157074 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 118.9119964838028, - "p90": 122.91199713945389, - "p95": 125.2480000257492, - "p99": 134.5279961824417 - }, - "combine": { - "p50": 62.55999952554703, - "p90": 64.4799992442131, - "p95": 65.05600363016129, - "p99": 74.33599978685379 - }, - "roundtrip": { - "p50": 173.7920045852661, - "p90": 177.0240068435669, - "p95": 179.4240027666092, - "p99": 209.56799387931824 - }, - "isolatedSum": { - "p50": 181.47199600934982, - "p90": 187.391996383667, - "p95": 190.3040036559105, - "p99": 208.8639959692955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 119.45600062608719, - "p90": 124.38400089740753, - "p95": 127.80800461769104, - "p99": 167.77600347995758 + "p50": 130.2720010280609, + "p90": 137.85600662231445, + "p95": 140.35199582576752, + "p99": 145.37599682807922 }, "combine": { - "p50": 63.00800293684006, - "p90": 64.86400216817856, - "p95": 65.37599861621857, - "p99": 77.27999985218048 + "p50": 134.36800241470337, + "p90": 142.5279974937439, + "p95": 143.96800100803375, + "p99": 152.99199521541595 }, "roundtrip": { - "p50": 173.98400604724884, - "p90": 177.85599827766418, - "p95": 180.38399517536163, - "p99": 187.99999356269836 + "p50": 237.37600445747375, + "p90": 244.89599466323853, + "p95": 248.57600033283234, + "p99": 255.74401021003723 }, "isolatedSum": { - "p50": 182.46400356292725, - "p90": 189.2480030655861, - "p95": 193.1840032339096, - "p99": 245.05600333213806 + "p50": 264.6400034427643, + "p90": 280.38400411605835, + "p95": 284.31999683380127, + "p99": 298.3679920434952 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 7, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 121.5360015630722, - "p90": 124.7360035777092, - "p95": 126.46399438381195, - "p99": 140.35199582576752 + "p50": 169.11999881267548, + "p90": 177.3120015859604, + "p95": 180.09600043296814, + "p99": 185.63200533390045 }, "combine": { - "p50": 66.68800115585327, - "p90": 68.4799998998642, - "p95": 69.24799829721451, - "p99": 75.71200281381607 + "p50": 187.74400651454926, + "p90": 192.89599359035492, + "p95": 194.07999515533447, + "p99": 200.00000298023224 }, "roundtrip": { - "p50": 179.51999604701996, - "p90": 184.12800133228302, - "p95": 190.528005361557, - "p99": 432.6399862766266 + "p50": 323.68001341819763, + "p90": 331.07200264930725, + "p95": 333.95200967788696, + "p99": 338.9759957790375 }, "isolatedSum": { - "p50": 188.22400271892548, - "p90": 193.2160034775734, - "p95": 195.71199268102646, - "p99": 216.0639986395836 + "p50": 356.86400532722473, + "p90": 370.2079951763153, + "p95": 374.1759955883026, + "p99": 385.6320083141327 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 7, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 122.43200093507767, - "p90": 126.0479986667633, - "p95": 128.63999605178833, - "p99": 147.71200716495514 + "p50": 246.848002076149, + "p90": 255.93599677085876, + "p95": 259.10401344299316, + "p99": 264.76800441741943 }, "combine": { - "p50": 67.64800101518631, - "p90": 69.43999975919724, - "p95": 71.29599899053574, - "p99": 89.08800035715103 + "p50": 354.8159897327423, + "p90": 363.1359934806824, + "p95": 365.02400040626526, + "p99": 371.2959885597229 }, "roundtrip": { - "p50": 181.11999332904816, - "p90": 185.12000143527985, - "p95": 187.6160055398941, - "p99": 205.28000593185425 + "p50": 558.463990688324, + "p90": 566.9119954109192, + "p95": 570.2080130577087, + "p99": 574.8479962348938 }, "isolatedSum": { - "p50": 190.08000195026398, - "p90": 195.48799842596054, - "p95": 199.93599504232407, - "p99": 236.80000752210617 + "p50": 601.6639918088913, + "p90": 619.0719902515411, + "p95": 624.1280138492584, + "p99": 636.0639929771423 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 7, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 129.2800009250641, - "p90": 132.4159950017929, - "p95": 134.75200533866882, - "p99": 156.80000185966492 + "p50": 395.58398723602295, + "p90": 402.8159976005554, + "p95": 404.35200929641724, + "p99": 407.9360067844391 }, "combine": { - "p50": 73.18399846553802, - "p90": 75.19999891519547, - "p95": 76.25599950551987, - "p99": 83.45600217580795 + "p50": 616.1919832229614, + "p90": 623.7120032310486, + "p95": 625.216007232666, + "p99": 628.4800171852112 }, "roundtrip": { - "p50": 195.26399672031403, - "p90": 199.0399956703186, - "p95": 201.82399451732635, - "p99": 220.12799978256226 + "p50": 986.9760274887085, + "p90": 994.4319725036621, + "p95": 996.5119957923889, + "p99": 1004.6080350875854 }, "isolatedSum": { - "p50": 202.4639993906021, - "p90": 207.61599391698837, - "p95": 211.0080048441887, - "p99": 240.25600403547287 + "p50": 1011.7759704589844, + "p90": 1026.528000831604, + "p95": 1029.5680165290833, + "p99": 1036.4160239696503 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 7, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 142.4960047006607, - "p90": 146.40000462532043, - "p95": 149.53599870204926, - "p99": 164.32000696659088 + "p50": 704.7039866447449, + "p90": 715.391993522644, + "p95": 718.783974647522, + "p99": 727.1680235862732 }, "combine": { - "p50": 88.79999816417694, - "p90": 90.97599983215332, - "p95": 91.839998960495, - "p99": 101.50399804115295 + "p50": 1126.911997795105, + "p90": 1131.6800117492676, + "p95": 1133.7920427322388, + "p99": 1140.9599781036377 }, "roundtrip": { - "p50": 221.37600183486938, - "p90": 225.95199942588806, - "p95": 228.99200022220612, - "p99": 238.17600309848785 + "p50": 1802.4640083312988, + "p90": 1876.63996219635, + "p95": 1890.0799751281738, + "p99": 2370.176076889038 }, "isolatedSum": { - "p50": 231.29600286483765, - "p90": 237.37600445747375, - "p95": 241.37599766254425, - "p99": 265.82400500774384 + "p50": 1831.6159844398499, + "p90": 1847.0720052719116, + "p95": 1852.5760173797607, + "p99": 1868.128001689911 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 7, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 167.04000532627106, - "p90": 171.23199999332428, - "p95": 174.01599884033203, - "p99": 183.1039935350418 + "p50": 1379.3599605560303, + "p90": 1392.0639753341675, + "p95": 1394.752025604248, + "p99": 1403.007984161377 }, "combine": { - "p50": 109.21599715948105, - "p90": 111.455999314785, - "p95": 112.70400136709213, - "p99": 125.91999769210815 + "p50": 2162.0800495147705, + "p90": 2171.583890914917, + "p95": 2175.0400066375732, + "p99": 2370.975971221924 }, "roundtrip": { - "p50": 270.2080011367798, - "p90": 274.2080092430115, - "p95": 276.5119969844818, - "p99": 297.7280020713806 + "p50": 3512.063980102539, + "p90": 3529.439926147461, + "p95": 3536.57603263855, + "p99": 3583.51993560791 }, "isolatedSum": { - "p50": 276.2560024857521, - "p90": 282.6879993081093, - "p95": 286.72000020742416, - "p99": 309.02399122714996 + "p50": 3541.440010070801, + "p90": 3563.6478662490845, + "p95": 3569.7920322418213, + "p99": 3773.983955383301 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16027,366 +15775,293 @@ ] }, { - "id": "cx-1911c35d", - "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_eee29686", - "comparisonKey": "37f5e47990ede677", + "id": "cx-fd383085", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb200_4a0087e5", + "comparisonKey": "a10977d3e6692367", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:41:38.976776+00:00", + "generatedAt": "2026-06-29T14:03:32.180363+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm)", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254479346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", - "createdAt": "2026-06-26T17:41:38.976776+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 56.03199824690819, - "p90": 58.240000158548355, - "p95": 59.61599946022034, - "p99": 69.56800073385239 - }, - "combine": { - "p50": 61.40799820423126, - "p90": 63.4239986538887, - "p95": 64.35199826955795, - "p99": 77.53600180149078 - }, - "roundtrip": { - "p50": 121.18399888277054, - "p90": 123.4240010380745, - "p95": 124.64000284671783, - "p99": 131.48799538612366 - }, - "isolatedSum": { - "p50": 117.43999645113945, - "p90": 121.66399881243706, - "p95": 123.96799772977829, - "p99": 147.10400253534317 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 56.92800134420395, - "p90": 59.23200026154518, - "p95": 60.19200012087822, - "p99": 68.4799998998642 - }, - "combine": { - "p50": 62.24000081419945, - "p90": 64.19199705123901, - "p95": 65.05600363016129, - "p99": 69.69600170850754 - }, - "roundtrip": { - "p50": 122.65600264072418, - "p90": 124.79999661445618, - "p95": 125.98399817943573, - "p99": 135.1040005683899 - }, - "isolatedSum": { - "p50": 119.1680021584034, - "p90": 123.4239973127842, - "p95": 125.2480037510395, - "p99": 138.17600160837173 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 57.631999254226685, - "p90": 60.03199890255928, - "p95": 61.37600168585777, - "p99": 67.16799736022949 + "p50": 126.36800110340118, + "p90": 135.5839967727661, + "p95": 138.87999951839447, + "p99": 143.39199662208557 }, "combine": { - "p50": 63.93600255250931, - "p90": 65.43999910354614, - "p95": 65.88800251483917, - "p99": 69.023996591568 + "p50": 130.36799430847168, + "p90": 134.14399325847626, + "p95": 139.39200341701508, + "p99": 144.25599575042725 }, "roundtrip": { - "p50": 125.50400197505951, - "p90": 128.51199507713318, - "p95": 132.06399977207184, - "p99": 143.10400187969208 + "p50": 227.23199427127838, + "p90": 235.9679937362671, + "p95": 238.71999979019165, + "p99": 245.12000381946564 }, "isolatedSum": { - "p50": 121.56800180673599, - "p90": 125.47199800610542, - "p95": 127.26400420069695, - "p99": 136.19199395179749 + "p50": 256.73599541187286, + "p90": 269.72799003124237, + "p95": 278.27200293540955, + "p99": 287.6479923725128 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 59.487998485565186, - "p90": 61.88800185918808, - "p95": 62.81600147485733, - "p99": 73.2479989528656 + "p50": 164.51199352741241, + "p90": 173.24799299240112, + "p95": 175.29599368572235, + "p99": 179.36000227928162 }, "combine": { - "p50": 66.46399945020676, - "p90": 67.80800223350525, - "p95": 68.89600306749344, - "p99": 71.71200215816498 + "p50": 169.18399930000305, + "p90": 177.88800597190857, + "p95": 179.58399653434753, + "p99": 183.3920031785965 }, "roundtrip": { - "p50": 128.60800325870514, - "p90": 130.65600395202637, - "p95": 131.80799782276154, - "p99": 144.3520039319992 + "p50": 307.8399896621704, + "p90": 315.64798951148987, + "p95": 318.91199946403503, + "p99": 328.0639946460724 }, "isolatedSum": { - "p50": 125.95199793577194, - "p90": 129.69600409269333, - "p95": 131.71200454235077, - "p99": 144.96000111103058 + "p50": 333.69599282741547, + "p90": 351.1359989643097, + "p95": 354.8799902200699, + "p99": 362.7520054578781 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 60.67200005054474, - "p90": 62.880001962184906, - "p95": 63.74400109052658, - "p99": 69.82400268316269 + "p50": 235.71200668811798, + "p90": 243.0720031261444, + "p95": 245.44000625610352, + "p99": 252.16001272201538 }, "combine": { - "p50": 67.64800101518631, - "p90": 69.63200122117996, - "p95": 70.91200351715088, - "p99": 79.71200346946716 + "p50": 289.92000222206116, + "p90": 293.92001032829285, + "p95": 296.09599709510803, + "p99": 301.9520044326782 }, "roundtrip": { - "p50": 130.87999820709229, - "p90": 133.15199315547943, - "p95": 134.43200290203094, - "p99": 141.88799262046814 + "p50": 474.07999634742737, + "p90": 487.0400130748749, + "p95": 492.99201369285583, + "p99": 814.6880269050598 }, "isolatedSum": { - "p50": 128.32000106573105, - "p90": 132.51200318336487, - "p95": 134.65600460767746, - "p99": 149.53600615262985 + "p50": 525.6320089101791, + "p90": 536.9920134544373, + "p95": 541.5360033512115, + "p99": 554.1120171546936 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 63.61600011587143, - "p90": 65.47199934720993, - "p95": 66.23999774456024, - "p99": 72.54400104284286 + "p50": 360.51198840141296, + "p90": 368.19198727607727, + "p95": 371.10400199890137, + "p99": 379.2319893836975 }, "combine": { - "p50": 72.31999933719635, - "p90": 74.14399832487106, - "p95": 75.23199915885925, - "p99": 79.6160027384758 + "p50": 496.89599871635437, + "p90": 502.01600790023804, + "p95": 503.2320022583008, + "p99": 508.67199897766113 }, "roundtrip": { - "p50": 142.87999272346497, - "p90": 145.85599303245544, - "p95": 147.16799557209015, - "p99": 155.29599785804749 + "p50": 829.2480111122131, + "p90": 837.984025478363, + "p95": 840.4800295829773, + "p99": 844.3840146064758 }, "isolatedSum": { - "p50": 135.93599945306778, - "p90": 139.615997672081, - "p95": 141.4719969034195, - "p99": 152.16000378131866 + "p50": 857.4079871177673, + "p90": 870.2079951763153, + "p95": 874.3360042572021, + "p99": 887.9039883613586 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 72.64000177383423, - "p90": 75.3600001335144, - "p95": 76.51200145483017, - "p99": 82.65600353479385 + "p50": 622.7840185165405, + "p90": 630.5279731750488, + "p95": 632.5759887695312, + "p99": 638.4959816932678 }, "combine": { - "p50": 87.90399879217148, - "p90": 90.08000046014786, - "p95": 90.84799885749817, - "p99": 101.15200281143188 + "p50": 882.8480243682861, + "p90": 891.7120099067688, + "p95": 894.1439986228943, + "p99": 896.6400027275085 }, "roundtrip": { - "p50": 172.83199727535248, - "p90": 175.4239946603775, - "p95": 176.41599476337433, - "p99": 181.43999576568604 + "p50": 1476.3519763946533, + "p90": 1485.0560426712036, + "p95": 1487.8400564193726, + "p99": 1497.6320266723633 }, "isolatedSum": { - "p50": 160.5440005660057, - "p90": 165.44000059366226, - "p95": 167.36000031232834, - "p99": 183.80800634622574 + "p50": 1505.6320428848267, + "p90": 1522.2399830818176, + "p95": 1526.7199873924255, + "p99": 1535.1359844207764 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 83.20000022649765, - "p90": 86.07999980449677, - "p95": 87.0399996638298, - "p99": 90.17600119113922 + "p50": 1150.048017501831, + "p90": 1156.6400527954102, + "p95": 1158.30397605896, + "p99": 1162.176012992859 }, "combine": { - "p50": 108.70400071144104, - "p90": 110.97600311040878, - "p95": 112.06399649381638, - "p99": 116.41599982976913 + "p50": 1647.6800441741943, + "p90": 1657.8880548477173, + "p95": 1659.4560146331787, + "p99": 1667.7440404891968 }, "roundtrip": { - "p50": 218.07999908924103, - "p90": 221.343994140625, - "p95": 222.97599911689758, - "p99": 235.52000522613525 + "p50": 2770.2720165252686, + "p90": 2780.384063720703, + "p95": 2783.1039428710938, + "p99": 2787.1360778808594 }, "isolatedSum": { - "p50": 191.9040009379387, - "p90": 197.05600291490555, - "p95": 199.10399615764618, - "p99": 206.59200102090836 + "p50": 2797.7280616760254, + "p90": 2814.5281076431274, + "p95": 2817.7599906921387, + "p99": 2829.9200534820557 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16394,366 +16069,293 @@ ] }, { - "id": "cx-fe6f5351", - "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "b300_84b10b26", - "comparisonKey": "abf92acc41d9d301", + "id": "cx-e422e15d", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb200_ff33b726", + "comparisonKey": "bb683080611997e2", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:10:48.557544+00:00", + "generatedAt": "2026-06-29T14:04:49.835497+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm) [cl]", + "label": "GB200 EP8 · deepep · bf16 · zipf-moderate", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254499301", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", - "createdAt": "2026-06-26T18:10:48.557544+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 49.56800118088722, - "p90": 51.263999193906784, - "p95": 51.83999985456467, - "p99": 57.920001447200775 + "p50": 125.791996717453, + "p90": 134.783998131752, + "p95": 136.9280070066452, + "p99": 143.10400187969208 }, "combine": { - "p50": 62.24000081419945, - "p90": 63.680000603199005, - "p95": 64.51199948787689, - "p99": 66.3679987192154 + "p50": 133.760005235672, + "p90": 142.59199798107147, + "p95": 144.16000247001648, + "p99": 146.4959979057312 }, "roundtrip": { - "p50": 114.78400230407715, - "p90": 116.86400324106216, - "p95": 118.01599711179733, - "p99": 126.68800354003906 + "p50": 233.95200073719025, + "p90": 240.09600281715393, + "p95": 242.43199825286865, + "p99": 246.33599817752838 }, "isolatedSum": { - "p50": 111.80800199508667, - "p90": 114.94399979710579, - "p95": 116.35199934244156, - "p99": 124.28800016641617 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 49.92000013589859, - "p90": 51.80799961090088, - "p95": 52.76799947023392, - "p99": 58.9120015501976 - }, - "combine": { - "p50": 63.040003180503845, - "p90": 64.89600241184235, - "p95": 65.24799764156342, - "p99": 74.11199808120728 - }, - "roundtrip": { - "p50": 116.64000153541565, - "p90": 119.00799721479416, - "p95": 121.08799815177917, - "p99": 136.57599687576294 - }, - "isolatedSum": { - "p50": 112.96000331640244, - "p90": 116.70400202274323, - "p95": 118.01599711179733, - "p99": 133.02399963140488 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 51.42400041222572, - "p90": 53.63199859857559, - "p95": 54.655998945236206, - "p99": 65.76000154018402 - }, - "combine": { - "p50": 63.10400366783142, - "p90": 64.96000289916992, - "p95": 65.63200056552887, - "p99": 75.93599706888199 - }, - "roundtrip": { - "p50": 117.53600090742111, - "p90": 119.87199634313583, - "p95": 120.86399644613266, - "p99": 132.192000746727 - }, - "isolatedSum": { - "p50": 114.52800408005714, - "p90": 118.59200149774551, - "p95": 120.28799951076508, - "p99": 141.695998609066 + "p50": 259.552001953125, + "p90": 277.3759961128235, + "p95": 281.0880094766617, + "p99": 289.5999997854233 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 52.41600051522255, - "p90": 54.496001452207565, - "p95": 55.71199953556061, - "p99": 60.5119988322258 + "p50": 159.90400314331055, + "p90": 167.29600727558136, + "p95": 170.56000232696533, + "p99": 175.20000040531158 }, "combine": { - "p50": 65.72800129652023, - "p90": 67.48799979686737, - "p95": 67.9360032081604, - "p99": 73.21599870920181 + "p50": 182.40000307559967, + "p90": 190.68799912929535, + "p95": 192.57600605487823, + "p99": 194.14399564266205 }, "roundtrip": { - "p50": 122.04799801111221, - "p90": 124.38400089740753, - "p95": 126.52799487113953, - "p99": 147.16799557209015 + "p50": 316.51198863983154, + "p90": 323.4879970550537, + "p95": 325.98400115966797, + "p99": 331.167995929718 }, "isolatedSum": { - "p50": 118.14400181174278, - "p90": 121.98400124907494, - "p95": 123.64800274372101, - "p99": 133.7279975414276 + "p50": 342.3040062189102, + "p90": 357.9840064048767, + "p95": 363.13600838184357, + "p99": 369.34399604797363 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 53.69599908590317, - "p90": 55.84000051021576, - "p95": 56.86400085687637, - "p99": 65.0240033864975 + "p50": 229.12000119686127, + "p90": 236.9920015335083, + "p95": 239.9359941482544, + "p99": 247.1040040254593 }, "combine": { - "p50": 67.16799736022949, - "p90": 68.9919963479042, - "p95": 69.69600170850754, - "p99": 77.98399776220322 + "p50": 346.8160033226013, + "p90": 355.3600013256073, + "p95": 357.34400153160095, + "p99": 361.82400584220886 }, "roundtrip": { - "p50": 123.36000055074692, - "p90": 125.66399574279785, - "p95": 127.16799974441528, - "p99": 140.70400595664978 + "p50": 545.7919836044312, + "p90": 553.4080266952515, + "p95": 556.7359924316406, + "p99": 567.0080184936523 }, "isolatedSum": { - "p50": 120.86399644613266, - "p90": 124.83199685811996, - "p95": 126.56000256538391, - "p99": 143.0080011487007 + "p50": 575.9360045194626, + "p90": 592.3520028591156, + "p95": 597.2799956798553, + "p99": 608.9280098676682 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 57.66399949789047, - "p90": 59.776000678539276, - "p95": 60.63999980688095, - "p99": 65.72800129652023 + "p50": 373.9840090274811, + "p90": 382.81598687171936, + "p95": 386.04798913002014, + "p99": 393.15199851989746 }, "combine": { - "p50": 72.89600372314453, - "p90": 74.14399832487106, - "p95": 75.55200159549713, - "p99": 83.96799862384796 + "p50": 634.7839832305908, + "p90": 638.4639739990234, + "p95": 639.8720145225525, + "p99": 646.1120247840881 }, "roundtrip": { - "p50": 138.40000331401825, - "p90": 140.60799777507782, - "p95": 141.66399836540222, - "p99": 149.53599870204926 + "p50": 975.7440090179443, + "p90": 984.000027179718, + "p95": 986.624002456665, + "p99": 995.7759976387024 }, "isolatedSum": { - "p50": 130.560003221035, - "p90": 133.91999900341034, - "p95": 136.19200140237808, - "p99": 149.6959999203682 + "p50": 1008.7679922580719, + "p90": 1021.2799608707428, + "p95": 1025.9200036525726, + "p99": 1039.2640233039856 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 66.04799628257751, - "p90": 68.15999746322632, - "p95": 69.88800317049026, - "p99": 74.91199672222137 + "p50": 695.3920125961304, + "p90": 706.1120271682739, + "p95": 709.4720005989075, + "p99": 715.4560089111328 }, "combine": { - "p50": 87.93599903583527, - "p90": 90.08000046014786, - "p95": 91.74399822950363, - "p99": 98.24000298976898 + "p50": 1147.0719575881958, + "p90": 1226.3360023498535, + "p95": 1237.9200458526611, + "p99": 1293.7599420547485 }, "roundtrip": { - "p50": 164.76799547672272, - "p90": 167.42399334907532, - "p95": 169.3120002746582, - "p99": 185.92000007629395 + "p50": 1811.743974685669, + "p90": 1821.4720487594604, + "p95": 1824.8319625854492, + "p99": 1829.2800188064575 }, "isolatedSum": { - "p50": 153.98399531841278, - "p90": 158.23999792337418, - "p95": 161.6320013999939, - "p99": 173.15199971199036 + "p50": 1842.4639701843262, + "p90": 1932.4480295181274, + "p95": 1947.3920464515686, + "p99": 2009.2159509658813 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 77.63200253248215, - "p90": 80.1599994301796, - "p95": 81.50400221347809, - "p99": 98.65599870681763 + "p50": 1357.2479486465454, + "p90": 1370.751976966858, + "p95": 1375.615954399109, + "p99": 1384.4480514526367 }, "combine": { - "p50": 108.35199803113937, - "p90": 110.78400164842606, - "p95": 111.84000223875046, - "p99": 126.01600587368011 + "p50": 2192.960023880005, + "p90": 2200.000047683716, + "p95": 2201.3440132141113, + "p99": 2209.887981414795 }, "roundtrip": { - "p50": 211.42399311065674, - "p90": 214.52799439430237, - "p95": 215.87200462818146, - "p99": 223.1999933719635 + "p50": 3524.8639583587646, + "p90": 3537.760019302368, + "p95": 3541.248083114624, + "p99": 3546.5919971466064 }, "isolatedSum": { - "p50": 185.98400056362152, - "p90": 190.94400107860565, - "p95": 193.34400445222855, - "p99": 224.67200458049774 + "p50": 3550.2079725265503, + "p90": 3570.7520246505737, + "p95": 3576.95996761322, + "p99": 3594.3360328674316 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -16761,47 +16363,48 @@ ] }, { - "id": "cx-83d0a7b9", - "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_49e66a7b", - "comparisonKey": "0abec2edede4ab05", + "id": "cx-80278610", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb200_acbc8de8", + "comparisonKey": "54864d16635426aa", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:47:10.185475+00:00", + "generatedAt": "2026-06-29T14:05:19.211189+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 [cl]", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -16809,317 +16412,243 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285590577", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285590577", - "createdAt": "2026-06-27T09:47:10.185475+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 49.247998744249344, - "p90": 51.552001386880875, - "p95": 52.799999713897705, - "p99": 63.35999816656113 - }, - "combine": { - "p50": 61.72800064086914, - "p90": 63.45599889755249, - "p95": 65.47199934720993, - "p99": 80.86399734020233 - }, - "roundtrip": { - "p50": 114.78400230407715, - "p90": 117.40799993276596, - "p95": 120.80000340938568, - "p99": 136.83199882507324 - }, - "isolatedSum": { - "p50": 110.97599938511848, - "p90": 115.00800028443336, - "p95": 118.27199906110764, - "p99": 144.22399550676346 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 50.27199909090996, - "p90": 52.5440014898777, - "p95": 54.46400120854378, - "p99": 61.08799949288368 + "p50": 125.82400441169739, + "p90": 133.95200669765472, + "p95": 136.3839954137802, + "p99": 141.08799397945404 }, "combine": { - "p50": 61.664000153541565, - "p90": 63.551999628543854, - "p95": 65.15199691057205, - "p99": 74.01599735021591 + "p50": 128.38399410247803, + "p90": 131.8719983100891, + "p95": 132.76800513267517, + "p99": 133.98399949073792 }, "roundtrip": { - "p50": 116.12799763679504, - "p90": 118.52800101041794, - "p95": 121.66400253772736, - "p99": 139.26400244235992 + "p50": 224.2880016565323, + "p90": 231.99999332427979, + "p95": 234.047994017601, + "p99": 240.25599658489227 }, "isolatedSum": { - "p50": 111.93599924445152, - "p90": 116.09600111842155, - "p95": 119.61599811911583, - "p99": 135.1039968430996 + "p50": 254.20799851417542, + "p90": 265.82400500774384, + "p95": 269.1520005464554, + "p99": 275.07199347019196 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 51.10400170087814, - "p90": 53.279999643564224, - "p95": 53.98400127887726, - "p99": 61.08799949288368 - }, - "combine": { - "p50": 62.880001962184906, - "p90": 64.86400216817856, - "p95": 65.63200056552887, - "p99": 83.52000266313553 - }, - "roundtrip": { - "p50": 117.91999638080597, - "p90": 121.05599790811539, - "p95": 122.81599640846252, - "p99": 135.16800105571747 - }, - "isolatedSum": { - "p50": 113.98400366306305, - "p90": 118.14400181174278, - "p95": 119.61600184440613, - "p99": 144.6080021560192 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 52.38400027155876, - "p90": 54.687999188899994, - "p95": 55.71199953556061, - "p99": 60.127999633550644 + "p50": 163.13600540161133, + "p90": 171.00800573825836, + "p95": 173.50399494171143, + "p99": 179.19999361038208 }, "combine": { - "p50": 66.91200286149979, - "p90": 73.11999797821045, - "p95": 75.19999891519547, - "p99": 92.16000139713287 + "p50": 168.99199783802032, + "p90": 173.88799786567688, + "p95": 178.01600694656372, + "p99": 181.40800297260284 }, "roundtrip": { - "p50": 121.98399752378464, - "p90": 124.38400089740753, - "p95": 125.76000392436981, - "p99": 145.9520012140274 + "p50": 304.1279911994934, + "p90": 312.48000264167786, + "p95": 315.8400058746338, + "p99": 321.02400064468384 }, "isolatedSum": { - "p50": 119.29600313305855, - "p90": 127.80799716711044, - "p95": 130.91199845075607, - "p99": 152.28800103068352 + "p50": 332.12800323963165, + "p90": 344.89600360393524, + "p95": 351.52000188827515, + "p99": 360.6079965829849 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 53.37600037455559, - "p90": 55.615998804569244, - "p95": 56.63999915122986, - "p99": 63.45599889755249 + "p50": 232.5119972229004, + "p90": 240.79999327659607, + "p95": 244.00000274181366, + "p99": 253.2159984111786 }, "combine": { - "p50": 67.87200272083282, - "p90": 69.76000219583511, - "p95": 70.52800059318542, - "p99": 75.87199658155441 + "p50": 289.15199637413025, + "p90": 294.14400458335876, + "p95": 297.21599817276, + "p99": 302.3039996623993 }, "roundtrip": { - "p50": 124.38400089740753, - "p90": 127.10399925708771, - "p95": 128.76799702644348, - "p99": 143.71199905872345 + "p50": 470.0799882411957, + "p90": 477.85601019859314, + "p95": 479.93600368499756, + "p99": 483.90400409698486 }, "isolatedSum": { - "p50": 121.24800309538841, - "p90": 125.37600100040436, - "p95": 127.16799974441528, - "p99": 139.3279954791069 + "p50": 521.6639935970306, + "p90": 534.9439978599548, + "p95": 541.2160009145737, + "p99": 555.5199980735779 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 57.0559985935688, - "p90": 59.4559982419014, - "p95": 61.47199869155884, - "p99": 80.19199967384338 + "p50": 359.8079979419708, + "p90": 366.5919899940491, + "p95": 369.79201436042786, + "p99": 380.511999130249 }, "combine": { - "p50": 73.21599870920181, - "p90": 75.32799988985062, - "p95": 76.19199901819229, - "p99": 83.52000266313553 + "p50": 492.70400404930115, + "p90": 502.1759867668152, + "p95": 506.84797763824463, + "p99": 544.6400046348572 }, "roundtrip": { - "p50": 137.56799697875977, - "p90": 140.44800400733948, - "p95": 143.51999759674072, - "p99": 164.0319973230362 + "p50": 823.0400085449219, + "p90": 830.1119804382324, + "p95": 832.3839902877808, + "p99": 837.1840119361877 }, "isolatedSum": { - "p50": 130.27199730277061, - "p90": 134.783998131752, - "p95": 137.66399770975113, - "p99": 163.7120023369789 + "p50": 852.512001991272, + "p90": 868.7679767608643, + "p95": 876.6399919986725, + "p99": 925.1520037651062 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 66.46399945020676, - "p90": 69.05599683523178, - "p95": 70.23999840021133, - "p99": 80.9599980711937 + "p50": 620.639979839325, + "p90": 628.2879710197449, + "p95": 630.4640173912048, + "p99": 638.0800008773804 }, "combine": { - "p50": 87.20000088214874, - "p90": 88.95999938249588, - "p95": 89.82399851083755, - "p99": 96.6079980134964 + "p50": 887.2640132904053, + "p90": 895.8079814910889, + "p95": 897.7280259132385, + "p99": 902.1120071411133 }, "roundtrip": { - "p50": 165.43999314308167, - "p90": 167.7439957857132, - "p95": 168.70400309562683, - "p99": 188.9919936656952 + "p50": 1480.2559614181519, + "p90": 1489.5039796829224, + "p95": 1492.192029953003, + "p99": 1496.5440034866333 }, "isolatedSum": { - "p50": 153.6640003323555, - "p90": 158.01599621772766, - "p95": 160.0639969110489, - "p99": 177.5679960846901 + "p50": 1507.9039931297302, + "p90": 1524.0959525108337, + "p95": 1528.1920433044434, + "p99": 1540.1920080184937 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 76.73600316047668, - "p90": 79.23199981451035, - "p95": 80.64000308513641, - "p99": 87.52000331878662 + "p50": 1147.4239826202393, + "p90": 1154.4640064239502, + "p95": 1156.1280488967896, + "p99": 1159.2639684677124 }, "combine": { - "p50": 108.57599973678589, - "p90": 111.10399663448334, - "p95": 112.86400258541107, - "p99": 119.6800023317337 + "p50": 1648.0319499969482, + "p90": 1657.5679779052734, + "p95": 1660.3200435638428, + "p99": 1772.063970565796 }, "roundtrip": { - "p50": 211.2320065498352, - "p90": 214.27200734615326, - "p95": 216.06400609016418, - "p99": 229.8559993505478 + "p50": 2771.8400955200195, + "p90": 2780.0960540771484, + "p95": 2783.3919525146484, + "p99": 2789.8240089416504 }, "isolatedSum": { - "p50": 185.31200289726257, - "p90": 190.33599644899368, - "p95": 193.50400567054749, - "p99": 207.20000565052032 + "p50": 2795.4559326171875, + "p90": 2812.0319843292236, + "p95": 2816.4480924606323, + "p99": 2931.3279390335083 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, "stragglerRank": 5, "correct": true, "samplesPooled": 600, @@ -17128,47 +16657,48 @@ ] }, { - "id": "cx-567c4192", - "identity": "b300|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_8688ff74", - "comparisonKey": "e2dc1b3bb397a94c", + "id": "cx-5fb2396a", + "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb200_be611b2a", + "comparisonKey": "70163b5f1158fbc8", "schemaVersion": 3, - "generatedAt": "2026-06-27T15:56:03.746973+00:00", + "generatedAt": "2026-06-29T14:01:47.949072+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8-directcast", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8-directcast", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -17176,318 +16706,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28294160895", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294160895", - "createdAt": "2026-06-27T15:56:03.746973+00:00", - "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 57.08799883723259, - "p90": 59.20000001788139, - "p95": 61.37600168585777, - "p99": 70.62400132417679 - }, - "combine": { - "p50": 61.63199990987778, - "p90": 63.391998410224915, - "p95": 64.06400352716446, - "p99": 67.61600077152252 - }, - "roundtrip": { - "p50": 121.63200229406357, - "p90": 123.77600371837616, - "p95": 125.791996717453, - "p99": 143.39199662208557 - }, - "isolatedSum": { - "p50": 118.71999874711037, - "p90": 122.59199842810631, - "p95": 125.44000521302223, - "p99": 138.2400020956993 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 57.5999990105629, - "p90": 60.38400158286095, - "p95": 61.85600161552429, - "p99": 71.96799665689468 - }, - "combine": { - "p50": 63.29599767923355, - "p90": 65.15199691057205, - "p95": 65.60000032186508, - "p99": 68.96000355482101 - }, - "roundtrip": { - "p50": 124.86399710178375, - "p90": 127.13600695133209, - "p95": 128.4479945898056, - "p99": 137.15200126171112 - }, - "isolatedSum": { - "p50": 120.89599668979645, - "p90": 125.535998493433, - "p95": 127.45600193738937, - "p99": 140.9280002117157 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 59.84000116586685, - "p90": 62.30400130152702, - "p95": 63.32799792289734, - "p99": 72.1919983625412 + "p50": 124.64000284671783, + "p90": 132.47999548912048, + "p95": 135.96799969673157, + "p99": 145.50399780273438 }, "combine": { - "p50": 64.38399851322174, - "p90": 66.68800115585327, - "p95": 67.48799979686737, - "p99": 74.30399954319 + "p50": 127.68000364303589, + "p90": 132.1280002593994, + "p95": 133.63200426101685, + "p99": 141.2159949541092 }, "roundtrip": { - "p50": 127.29600071907043, - "p90": 130.11200726032257, - "p95": 132.47999548912048, - "p99": 155.74400126934052 + "p50": 223.4559953212738, + "p90": 230.9119999408722, + "p95": 233.66400599479675, + "p99": 240.63999950885773 }, "isolatedSum": { - "p50": 124.22399967908859, - "p90": 128.9920024573803, - "p95": 130.8159977197647, - "p99": 146.4959979057312 + "p50": 252.32000648975372, + "p90": 264.6079957485199, + "p95": 269.6000039577484, + "p99": 286.71999275684357 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 60.5119988322258, - "p90": 63.519999384880066, - "p95": 66.20799750089645, - "p99": 72.64000177383423 + "p50": 162.27200627326965, + "p90": 171.00800573825836, + "p95": 173.95199835300446, + "p99": 182.68799781799316 }, "combine": { - "p50": 67.87200272083282, - "p90": 69.98399645090103, - "p95": 71.42399996519089, - "p99": 86.87999844551086 + "p50": 168.38400065898895, + "p90": 175.64800381660461, + "p95": 177.85599827766418, + "p99": 181.95199966430664 }, "roundtrip": { - "p50": 129.95199859142303, - "p90": 132.22399353981018, - "p95": 133.63200426101685, - "p99": 141.37600362300873 + "p50": 305.1519989967346, + "p90": 313.53598833084106, + "p95": 316.22400879859924, + "p99": 321.3759958744049 }, "isolatedSum": { - "p50": 128.38400155305862, - "p90": 133.5039958357811, - "p95": 137.63199746608734, - "p99": 159.5200002193451 + "p50": 330.6560069322586, + "p90": 346.656009554863, + "p95": 351.80799663066864, + "p99": 364.6399974822998 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 61.664000153541565, - "p90": 64.03200328350067, - "p95": 65.34399837255478, - "p99": 70.62400132417679 + "p50": 232.9919934272766, + "p90": 241.85599386692047, + "p95": 244.83199417591095, + "p99": 249.85599517822266 }, "combine": { - "p50": 68.57600063085556, - "p90": 70.23999840021133, - "p95": 71.42399996519089, - "p99": 81.63200318813324 + "p50": 291.9999957084656, + "p90": 299.6160089969635, + "p95": 301.4400005340576, + "p99": 304.60798740386963 }, "roundtrip": { - "p50": 132.38400220870972, - "p90": 135.55200397968292, - "p95": 136.99199259281158, - "p99": 152.0639955997467 + "p50": 468.7039852142334, + "p90": 476.6719937324524, + "p95": 479.64799404144287, + "p99": 486.7520034313202 }, "isolatedSum": { - "p50": 130.24000078439713, - "p90": 134.272001683712, - "p95": 136.76799833774567, - "p99": 152.25600451231003 + "p50": 524.9919891357422, + "p90": 541.472002863884, + "p95": 546.2719947099686, + "p99": 554.4639825820923 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 64.7360011935234, - "p90": 66.94400310516357, - "p95": 68.06399673223495, - "p99": 75.68000257015228 + "p50": 358.0799996852875, + "p90": 365.4400110244751, + "p95": 368.6720132827759, + "p99": 376.8639862537384 }, "combine": { - "p50": 73.66400212049484, - "p90": 75.45600086450577, - "p95": 75.93599706888199, - "p99": 81.79199695587158 + "p50": 495.168000459671, + "p90": 502.560019493103, + "p95": 503.77601385116577, + "p99": 510.43200492858887 }, "roundtrip": { - "p50": 143.99999380111694, - "p90": 146.55999839305878, - "p95": 147.93600142002106, - "p99": 157.82399475574493 + "p50": 824.4159817695618, + "p90": 832.7999711036682, + "p95": 835.7120156288147, + "p99": 840.2559757232666 }, "isolatedSum": { - "p50": 138.40000331401825, - "p90": 142.40000396966934, - "p95": 143.99999380111694, - "p99": 157.47199952602386 + "p50": 853.2480001449585, + "p90": 868.0000305175781, + "p95": 872.4480271339417, + "p99": 887.2959911823273 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 73.37599992752075, - "p90": 75.99999755620956, - "p95": 77.72800326347351, - "p99": 170.46399414539337 + "p50": 623.0400204658508, + "p90": 629.7600269317627, + "p95": 631.9360136985779, + "p99": 637.2159719467163 }, "combine": { - "p50": 87.99999952316284, - "p90": 90.04800021648407, - "p95": 90.87999910116196, - "p99": 99.96800124645233 + "p50": 893.6640024185181, + "p90": 899.071991443634, + "p95": 904.3840169906616, + "p99": 909.1839790344238 }, "roundtrip": { - "p50": 171.26399278640747, - "p90": 174.27200078964233, - "p95": 175.9359985589981, - "p99": 191.03999435901642 + "p50": 1488.576054573059, + "p90": 1497.7600574493408, + "p95": 1500.5439519882202, + "p99": 1506.2079429626465 }, "isolatedSum": { - "p50": 161.3759994506836, - "p90": 166.04799777269363, - "p95": 168.60800236463547, - "p99": 270.4319953918457 + "p50": 1516.704022884369, + "p90": 1528.8320183753967, + "p95": 1536.3200306892395, + "p99": 1546.3999509811401 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 83.96799862384796, - "p90": 86.68799698352814, - "p95": 87.61599659919739, - "p99": 95.87199985980988 + "p50": 1147.5199460983276, + "p90": 1153.8879871368408, + "p95": 1157.088041305542, + "p99": 1161.471962928772 }, "combine": { - "p50": 109.0560033917427, - "p90": 112.0000034570694, - "p95": 112.89600282907486, - "p99": 120.99199742078781 + "p50": 1648.8959789276123, + "p90": 1658.784031867981, + "p95": 1660.0960493087769, + "p99": 1665.503978729248 }, "roundtrip": { - "p50": 219.00799870491028, - "p90": 222.08000719547272, - "p95": 224.16000068187714, - "p99": 238.8480007648468 + "p50": 2773.279905319214, + "p90": 2784.032106399536, + "p95": 2787.071943283081, + "p99": 2798.7520694732666 }, "isolatedSum": { - "p50": 193.02400201559067, - "p90": 198.68800044059753, - "p95": 200.51199942827225, - "p99": 216.8639972805977 + "p50": 2796.41592502594, + "p90": 2812.672019004822, + "p95": 2817.184090614319, + "p99": 2826.97594165802 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -17495,28 +16951,28 @@ ] }, { - "id": "cx-10314900", - "identity": "b300|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_5b8a7672", - "comparisonKey": "facc765e5a3b34b6", + "id": "cx-8c11501e", + "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_42130d21", + "comparisonKey": "ffdc9987b54494d5", "schemaVersion": 3, - "generatedAt": "2026-06-27T15:56:09.517904+00:00", + "generatedAt": "2026-06-29T13:48:41.427518+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_13", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8-pertoken", + "label": "GB200 EP8 · deepep · bf16 [cl]", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -17527,15 +16983,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8-pertoken", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -17543,317 +17000,243 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28294164589", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294164589", - "createdAt": "2026-06-27T15:56:09.517904+00:00", - "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 57.08799883723259, - "p90": 59.51999872922897, - "p95": 60.864001512527466, - "p99": 74.65600222349167 + "p50": 107.84000158309937, + "p90": 115.29599875211716, + "p95": 118.6240017414093, + "p99": 124.86399710178375 }, "combine": { - "p50": 63.1679967045784, - "p90": 65.11999666690826, - "p95": 66.20799750089645, - "p99": 72.4480003118515 + "p50": 126.88000500202179, + "p90": 131.52000308036804, + "p95": 133.44000279903412, + "p99": 140.76800644397736 }, "roundtrip": { - "p50": 124.83199685811996, - "p90": 126.97599828243256, - "p95": 128.48000228405, - "p99": 136.73600554466248 + "p50": 206.62400126457214, + "p90": 212.5760018825531, + "p95": 215.488001704216, + "p99": 222.6559966802597 }, "isolatedSum": { - "p50": 120.25599554181099, - "p90": 124.63999539613724, - "p95": 127.07199901342392, - "p99": 147.10400253534317 + "p50": 234.72000658512115, + "p90": 246.8160018324852, + "p95": 252.06400454044342, + "p99": 265.6320035457611 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 57.72799998521805, - "p90": 59.967998415231705, - "p95": 60.99199876189232, - "p99": 68.70400160551071 - }, - "combine": { - "p50": 63.58399987220764, - "p90": 65.18399715423584, - "p95": 66.0799965262413, - "p99": 72.25599884986877 - }, - "roundtrip": { - "p50": 124.4800016283989, - "p90": 127.07200646400452, - "p95": 128.86400520801544, - "p99": 151.5520066022873 - }, - "isolatedSum": { - "p50": 121.31199985742569, - "p90": 125.15199556946754, - "p95": 127.07199528813362, - "p99": 140.9600004553795 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 59.167999774217606, - "p90": 61.02399900555611, - "p95": 62.431998550891876, - "p99": 81.40800148248672 - }, - "combine": { - "p50": 63.87200206518173, - "p90": 65.5359998345375, - "p95": 66.49599969387054, - "p99": 72.95999675989151 - }, - "roundtrip": { - "p50": 125.21600723266602, - "p90": 128.09599936008453, - "p95": 130.23999333381653, - "p99": 148.19200336933136 - }, - "isolatedSum": { - "p50": 123.04000183939934, - "p90": 126.55999884009361, - "p95": 128.92799824476242, - "p99": 154.36799824237823 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 61.08799949288368, - "p90": 63.29599767923355, - "p95": 64.57599997520447, - "p99": 72.95999675989151 + "p50": 145.88800072669983, + "p90": 154.94400262832642, + "p95": 158.01599621772766, + "p99": 165.8879965543747 }, "combine": { - "p50": 67.58400052785873, - "p90": 69.21599805355072, - "p95": 69.63200122117996, - "p99": 79.52000200748444 + "p50": 167.55199432373047, + "p90": 171.4559942483902, + "p95": 175.58400332927704, + "p99": 180.16000092029572 }, "roundtrip": { - "p50": 130.14400005340576, - "p90": 133.18400084972382, - "p95": 136.35200262069702, - "p99": 151.8400013446808 + "p50": 286.3680124282837, + "p90": 293.5679852962494, + "p95": 296.06398940086365, + "p99": 299.9039888381958 }, "isolatedSum": { - "p50": 128.67200002074242, - "p90": 132.51199573278427, - "p95": 134.20800119638443, - "p99": 152.47999876737595 + "p50": 313.4399950504303, + "p90": 326.3999968767166, + "p95": 333.5999995470047, + "p99": 346.0479974746704 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 61.792001128196716, - "p90": 64.51199948787689, - "p95": 65.5680000782013, - "p99": 77.05599814653397 + "p50": 218.4319943189621, + "p90": 224.44799542427063, + "p95": 227.00800001621246, + "p99": 231.9359928369522 }, "combine": { - "p50": 68.80000233650208, - "p90": 70.65600156784058, - "p95": 71.29599899053574, - "p99": 77.66400277614594 + "p50": 292.38399863243103, + "p90": 300.4800081253052, + "p95": 302.2719919681549, + "p99": 305.6960105895996 }, "roundtrip": { - "p50": 133.215993642807, - "p90": 135.3919953107834, - "p95": 136.99199259281158, - "p99": 163.32800686359406 + "p50": 457.2159945964813, + "p90": 466.17600321769714, + "p95": 468.76800060272217, + "p99": 473.7280011177063 }, "isolatedSum": { - "p50": 130.5920034646988, - "p90": 135.16800105571747, - "p95": 136.86399906873703, - "p99": 154.7200009226799 + "p50": 510.8159929513931, + "p90": 524.9280035495758, + "p95": 529.2799919843674, + "p99": 537.6320034265518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 64.83200192451477, - "p90": 66.91200286149979, - "p95": 67.45599955320358, - "p99": 70.78400254249573 + "p50": 345.69600224494934, + "p90": 352.9599905014038, + "p95": 356.06399178504944, + "p99": 360.9279990196228 }, "combine": { - "p50": 73.53600114583969, - "p90": 75.58400183916092, - "p95": 76.4480009675026, - "p99": 98.11200201511383 + "p50": 497.8559911251068, + "p90": 502.7199983596802, + "p95": 504.5440196990967, + "p99": 510.17600297927856 }, "roundtrip": { - "p50": 145.24799585342407, - "p90": 147.64800667762756, - "p95": 148.95999431610107, - "p99": 166.4000004529953 + "p50": 810.2399706840515, + "p90": 817.2799944877625, + "p95": 819.2960023880005, + "p99": 823.7119913101196 }, "isolatedSum": { - "p50": 138.36800307035446, - "p90": 142.4960047006607, - "p95": 143.90400052070618, - "p99": 168.89600455760956 + "p50": 843.5519933700562, + "p90": 855.679988861084, + "p95": 860.6080114841461, + "p99": 871.1040019989014 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 73.53600114583969, - "p90": 75.9039968252182, - "p95": 76.73600316047668, - "p99": 81.34400099515915 + "p50": 605.791985988617, + "p90": 613.6320233345032, + "p95": 616.3840293884277, + "p99": 622.6879954338074 }, "combine": { - "p50": 87.77599781751633, - "p90": 89.91999924182892, - "p95": 90.7839983701706, - "p99": 105.0880029797554 + "p50": 880.7039856910706, + "p90": 886.1119747161865, + "p95": 889.5679712295532, + "p99": 894.6560025215149 }, "roundtrip": { - "p50": 173.08799922466278, - "p90": 175.6799966096878, - "p95": 177.37600207328796, - "p99": 204.73599433898926 + "p50": 1458.1760168075562, + "p90": 1466.8480157852173, + "p95": 1469.2800045013428, + "p99": 1474.8159646987915 }, "isolatedSum": { - "p50": 161.31199896335602, - "p90": 165.82399606704712, - "p95": 167.52000153064728, - "p99": 186.43200397491455 + "p50": 1486.4959716796875, + "p90": 1499.7439980506897, + "p95": 1505.952000617981, + "p99": 1517.3439979553223 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 84.19200032949448, - "p90": 87.0399996638298, - "p95": 88.86399865150452, - "p99": 98.01600128412247 + "p50": 1116.1279678344727, + "p90": 1123.9039897918701, + "p95": 1126.3680458068848, + "p99": 1129.7919750213623 }, "combine": { - "p50": 109.40799862146378, - "p90": 111.93600296974182, - "p95": 112.67200112342834, - "p99": 124.44800138473511 + "p50": 1633.952021598816, + "p90": 1640.9920454025269, + "p95": 1644.2240476608276, + "p99": 1646.9119787216187 }, "roundtrip": { - "p50": 219.7760045528412, - "p90": 223.4240025281906, - "p95": 225.2800017595291, - "p99": 244.80000138282776 + "p50": 2724.479913711548, + "p90": 2733.5360050201416, + "p95": 2735.6479167938232, + "p99": 2747.1680641174316 }, "isolatedSum": { - "p50": 193.59999895095825, - "p90": 198.97600263357162, - "p95": 201.53599977493286, - "p99": 222.46400266885757 + "p50": 2750.0799894332886, + "p90": 2764.896035194397, + "p95": 2770.5920934677124, + "p99": 2776.703953742981 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -17862,28 +17245,28 @@ ] }, { - "id": "cx-5fc48052", - "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "b300_c9569580", - "comparisonKey": "789db7396b5cd7a2", + "id": "cx-42f87ef0", + "identity": "gb200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb200_b0118480", + "comparisonKey": "047ebfa2d1bec960", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:23.346610+00:00", + "generatedAt": "2026-06-29T13:59:42.464730+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_02", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "GB200 EP8 · deepep · fp8", "model": "Qwen3.5", "shape": { "hidden": 4096, @@ -17894,15 +17277,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -17910,59 +17294,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287508460", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460", - "createdAt": "2026-06-27T11:14:23.346610+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 82.56000280380249, - "p90": 85.02399921417236, - "p95": 88.16000074148178, - "p99": 96.3520035147667 + "p50": 342.848002910614, + "p90": 364.3839955329895, + "p95": 600.7999777793884, + "p99": 661.1199975013733 }, "combine": { - "p50": 91.48799628019333, - "p90": 93.9520001411438, - "p95": 94.55999732017517, - "p99": 102.94400155544281 + "p50": 100.28800368309021, + "p90": 337.8880023956299, + "p95": 369.1520094871521, + "p99": 395.6480026245117 }, "roundtrip": { - "p50": 158.39999914169312, - "p90": 166.24000668525696, - "p95": 167.80799627304077, - "p99": 184.4799965620041 + "p50": 414.8479998111725, + "p90": 432.44799971580505, + "p95": 678.4960031509399, + "p99": 714.9760127067566 }, "isolatedSum": { - "p50": 174.04799908399582, - "p90": 178.97599935531616, - "p95": 182.71999806165695, - "p99": 199.2960050702095 + "p50": 443.1360065937042, + "p90": 702.2719979286194, + "p95": 969.9519872665405, + "p99": 1056.768000125885 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, + "dispatchLogicalBytes": 22282240, "combineLogicalBytes": 44564480, "fanoutMean": 5.3125, "recvTokensMax": 699, - "stragglerRank": 5, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -17971,35 +17355,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 124.70400333404541, - "p90": 127.23200023174286, - "p95": 128.7360042333603, - "p99": 135.83999872207642 + "p50": 349.69601035118103, + "p90": 376.51199102401733, + "p95": 667.680025100708, + "p99": 707.1999907493591 }, "combine": { - "p50": 128.48000228405, - "p90": 130.5920034646988, - "p95": 131.45600259304047, - "p99": 141.02399349212646 + "p50": 139.8719996213913, + "p90": 378.9440095424652, + "p95": 420.54399847984314, + "p99": 448.1920003890991 }, "roundtrip": { - "p50": 231.6800057888031, - "p90": 237.95199394226074, - "p95": 239.29600417613983, - "p99": 251.52000784873962 + "p50": 466.2719964981079, + "p90": 480.6720018386841, + "p95": 703.1999826431274, + "p99": 811.3279938697815 }, "isolatedSum": { - "p50": 253.1840056180954, - "p90": 257.82400369644165, - "p95": 260.19200682640076, - "p99": 276.8639922142029 + "p50": 489.5680099725723, + "p90": 755.4560005664825, + "p95": 1088.2240235805511, + "p99": 1155.3919911384583 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, + "dispatchLogicalBytes": 44863488, "combineLogicalBytes": 89726976, "fanoutMean": 5.34814453125, "recvTokensMax": 1385, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18008,35 +17392,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 174.17599260807037, - "p90": 177.21599340438843, - "p95": 179.07199263572693, - "p99": 195.0400024652481 + "p50": 422.94400930404663, + "p90": 439.61599469184875, + "p95": 456.7680060863495, + "p99": 782.7200293540955 }, "combine": { - "p50": 191.64800643920898, - "p90": 201.02399587631226, - "p95": 201.56799256801605, - "p99": 213.6639952659607 + "p50": 208.12800526618958, + "p90": 442.1440064907074, + "p95": 485.8880043029785, + "p99": 526.3360142707825 }, "roundtrip": { - "p50": 346.8480110168457, - "p90": 351.26399993896484, - "p95": 352.86399722099304, - "p99": 362.39999532699585 + "p50": 609.9200248718262, + "p90": 626.0160207748413, + "p95": 633.8239908218384, + "p99": 962.3680114746094 }, "isolatedSum": { - "p50": 365.82399904727936, - "p90": 378.2399892807007, - "p95": 380.639985203743, - "p99": 408.7039977312088 + "p50": 631.0720145702362, + "p90": 881.7600011825562, + "p95": 942.656010389328, + "p99": 1309.056043624878 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, + "dispatchLogicalBytes": 89751552, "combineLogicalBytes": 179503104, "fanoutMean": 5.349609375, "recvTokensMax": 2772, - "stragglerRank": 5, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18045,35 +17429,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 289.34401273727417, - "p90": 292.4480140209198, - "p95": 293.88800263404846, - "p99": 305.34398555755615 + "p50": 580.8320045471191, + "p90": 595.9039926528931, + "p95": 824.7680068016052, + "p99": 938.1440281867981 }, "combine": { - "p50": 389.1200125217438, - "p90": 398.5919952392578, - "p95": 400.9599983692169, - "p99": 410.1119935512543 + "p50": 403.26398611068726, + "p90": 656.4159989356995, + "p95": 708.4479928016663, + "p99": 752.1920204162598 }, "roundtrip": { - "p50": 597.5040197372437, - "p90": 608.1600189208984, - "p95": 612.7039790153503, - "p99": 631.8399906158447 + "p50": 972.8639721870422, + "p90": 1011.4239454269409, + "p95": 1198.0479955673218, + "p99": 1805.3120374679565 }, "isolatedSum": { - "p50": 678.464025259018, - "p90": 691.0400092601776, - "p95": 694.8480010032654, - "p99": 715.4559791088104 + "p50": 984.0959906578064, + "p90": 1252.3199915885925, + "p95": 1533.2159996032715, + "p99": 1690.3360486030579 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, + "dispatchLogicalBytes": 179511296, "combineLogicalBytes": 359022592, "fanoutMean": 5.349853515625, "recvTokensMax": 5558, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18082,35 +17466,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 520.6720232963562, - "p90": 525.439977645874, - "p95": 530.9439897537231, - "p99": 536.0640287399292 + "p50": 897.3760008811951, + "p90": 911.5200042724609, + "p95": 921.1840033531189, + "p99": 1252.511978149414 }, "combine": { - "p50": 754.9759745597839, - "p90": 765.7920122146606, - "p95": 766.9119834899902, - "p99": 778.6880135536194 + "p50": 813.2479786872864, + "p90": 1091.9359922409058, + "p95": 1143.1039571762085, + "p99": 1172.320008277893 }, "roundtrip": { - "p50": 1255.5840015411377, - "p90": 1263.7759447097778, - "p95": 1268.1920528411865, - "p99": 1274.8479843139648 + "p50": 1658.1120491027832, + "p90": 1689.6320581436157, + "p95": 1707.5200080871582, + "p99": 1989.1200065612793 }, "isolatedSum": { - "p50": 1275.6479978561401, - "p90": 1291.2319898605347, - "p95": 1297.8559732437134, - "p99": 1314.7520422935486 + "p50": 1710.6239795684814, + "p90": 2003.4559965133667, + "p95": 2064.2879605293274, + "p99": 2424.831986427307 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, + "dispatchLogicalBytes": 358055936, "combineLogicalBytes": 716111872, "fanoutMean": 5.33544921875, "recvTokensMax": 10982, - "stragglerRank": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18119,31 +17503,31 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 979.5200228691101, - "p90": 990.1760220527649, - "p95": 992.0960068702698, - "p99": 1001.5039443969727 + "p50": 1547.2320318222046, + "p90": 1564.2240047454834, + "p95": 1583.3920240402222, + "p99": 1904.960036277771 }, "combine": { - "p50": 1442.304015159607, - "p90": 1454.1120529174805, - "p95": 1455.1680088043213, - "p99": 1493.7599897384644 + "p50": 1536.8000268936157, + "p90": 1578.5280466079712, + "p95": 1848.512053489685, + "p99": 1890.239953994751 }, "roundtrip": { - "p50": 2391.200065612793, - "p90": 2402.9760360717773, - "p95": 2407.7439308166504, - "p99": 2476.6080379486084 + "p50": 3048.896074295044, + "p90": 3067.8720474243164, + "p95": 3086.6239070892334, + "p99": 3380.000114440918 }, "isolatedSum": { - "p50": 2421.824038028717, - "p90": 2444.2880749702454, - "p95": 2447.264015674591, - "p99": 2495.263934135437 + "p50": 3084.0320587158203, + "p90": 3142.7520513534546, + "p95": 3431.904077529907, + "p99": 3795.199990272522 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, + "dispatchLogicalBytes": 716197888, "combineLogicalBytes": 1432395776, "fanoutMean": 5.336090087890625, "recvTokensMax": 21939, @@ -18155,16 +17539,16 @@ ] }, { - "id": "cx-65c7aa3e", - "identity": "b300|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "b300_307ed708", - "comparisonKey": "691973c29c59446c", + "id": "cx-e69d7792", + "identity": "gb200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb200_b0118480", + "comparisonKey": "003150d36349a329", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:08.477764+00:00", + "generatedAt": "2026-06-29T13:39:30.760982+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -18172,30 +17556,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "Qwen3.5", + "label": "GB200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 4096, + "hidden": 5120, "topk": 8, - "experts": 128, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -18203,59 +17588,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285702163", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285702163", - "createdAt": "2026-06-27T09:52:08.477764+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374321542", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374321542", + "createdAt": "2026-06-29T13:08:07Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 82.68799632787704, - "p90": 86.30400151014328, - "p95": 89.66399729251862, - "p99": 95.29600292444229 + "p50": 386.27201318740845, + "p90": 416.6719913482666, + "p95": 425.24799704551697, + "p99": 439.8080110549927 }, "combine": { - "p50": 92.22400188446045, - "p90": 94.43199634552002, - "p95": 101.6319990158081, - "p99": 103.96800190210342 + "p50": 106.88000172376633, + "p90": 111.26399785280228, + "p95": 113.66400122642517, + "p99": 117.27999895811081 }, "roundtrip": { - "p50": 159.9999964237213, - "p90": 167.90400445461273, - "p95": 170.49600183963776, - "p99": 177.12000012397766 + "p50": 466.97598695755005, + "p90": 492.2559857368469, + "p95": 498.49599599838257, + "p99": 509.3439817428589 }, "isolatedSum": { - "p50": 174.9119982123375, - "p90": 180.7359978556633, - "p95": 191.29599630832672, - "p99": 199.26400482654572 + "p50": 493.1520149111748, + "p90": 527.9359892010689, + "p95": 538.9119982719421, + "p99": 557.0880100131035 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, "recvTokensMax": 699, - "stragglerRank": 7, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18264,35 +17649,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 123.29600006341934, - "p90": 127.00800597667694, - "p95": 128.22400033473969, - "p99": 140.03199338912964 + "p50": 381.6959857940674, + "p90": 408.6399972438812, + "p95": 415.96800088882446, + "p99": 430.4960072040558 }, "combine": { - "p50": 127.9039978981018, - "p90": 129.82399761676788, - "p95": 131.9359987974167, - "p99": 143.42400431632996 + "p50": 146.04799449443817, + "p90": 151.74399316310883, + "p95": 154.08000349998474, + "p99": 158.01599621772766 }, "roundtrip": { - "p50": 229.5999974012375, - "p90": 235.83999276161194, - "p95": 237.34399676322937, - "p99": 241.60000681877136 + "p50": 511.23201847076416, + "p90": 531.7440032958984, + "p95": 538.4640097618103, + "p99": 548.1600165367126 }, "isolatedSum": { - "p50": 251.19999796152115, - "p90": 256.8320035934448, - "p95": 260.1599991321564, - "p99": 283.4559977054596 + "p50": 527.7439802885056, + "p90": 560.38399040699, + "p95": 570.0480043888092, + "p99": 588.5120034217834 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 4, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18301,35 +17686,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 173.75999689102173, - "p90": 177.18400061130524, - "p95": 178.6240041255951, - "p99": 186.97600066661835 + "p50": 473.7600088119507, + "p90": 490.01601338386536, + "p95": 495.9680140018463, + "p99": 504.5120120048523 }, "combine": { - "p50": 191.64800643920898, - "p90": 200.3519982099533, - "p95": 200.8640021085739, - "p99": 212.3199999332428 + "p50": 221.82400524616241, + "p90": 228.19200158119202, + "p95": 230.24000227451324, + "p99": 236.54399812221527 }, "roundtrip": { - "p50": 345.7599878311157, - "p90": 350.816011428833, - "p95": 352.6400029659271, - "p99": 360.1279854774475 + "p50": 696.3199973106384, + "p90": 716.2240147590637, + "p95": 720.9600210189819, + "p99": 728.6400198936462 }, "isolatedSum": { - "p50": 365.4080033302307, - "p90": 377.53599882125854, - "p95": 379.488006234169, - "p99": 399.29600059986115 + "p50": 695.5840140581131, + "p90": 718.2080149650574, + "p95": 726.2080162763596, + "p99": 741.0560101270676 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 4, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18338,35 +17723,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 289.0560030937195, - "p90": 292.86399483680725, - "p95": 294.1119968891144, - "p99": 311.71199679374695 + "p50": 666.3359999656677, + "p90": 688.7999773025513, + "p95": 693.7280297279358, + "p99": 700.8000016212463 }, "combine": { - "p50": 397.599995136261, - "p90": 408.9280068874359, - "p95": 410.0160002708435, - "p99": 421.7279851436615 + "p50": 473.7600088119507, + "p90": 480.25599122047424, + "p95": 482.4639856815338, + "p99": 487.4880015850067 }, "roundtrip": { - "p50": 594.3359732627869, - "p90": 600.6079912185669, - "p95": 604.4480204582214, - "p99": 610.5920076370239 + "p50": 1111.4879846572876, + "p90": 1132.2239637374878, + "p95": 1135.8400583267212, + "p99": 1144.9600458145142 }, "isolatedSum": { - "p50": 686.6559982299805, - "p90": 701.7920017242432, - "p95": 704.1279971599579, - "p99": 733.4399819374084 + "p50": 1140.0960087776184, + "p90": 1169.0559685230255, + "p95": 1176.1920154094696, + "p99": 1188.288003206253 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 4, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18375,35 +17760,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 521.3119983673096, - "p90": 528.544008731842, - "p95": 534.0480208396912, - "p99": 546.8479990959167 + "p50": 1043.4880256652832, + "p90": 1062.656044960022, + "p95": 1066.9759511947632, + "p99": 1077.8239965438843 }, "combine": { - "p50": 755.2000284194946, - "p90": 765.887975692749, - "p95": 766.6559815406799, - "p99": 781.5039753913879 + "p50": 840.0959968566895, + "p90": 846.015989780426, + "p95": 847.711980342865, + "p99": 851.6479730606079 }, "roundtrip": { - "p50": 1255.0400495529175, - "p90": 1264.8320198059082, - "p95": 1271.3279724121094, - "p99": 1316.3199424743652 + "p50": 1844.256043434143, + "p90": 1863.935947418213, + "p95": 1870.911955833435, + "p99": 1884.384036064148 }, "isolatedSum": { - "p50": 1276.5120267868042, - "p90": 1294.431984424591, - "p95": 1300.704002380371, - "p99": 1328.3519744873047 + "p50": 1883.5840225219727, + "p90": 1908.672034740448, + "p95": 1914.6879315376282, + "p99": 1929.4719696044922 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 7, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18412,35 +17797,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 980.0000190734863, - "p90": 991.0719990730286, - "p95": 995.0399994850159, - "p99": 1015.1040554046631 + "p50": 1834.496021270752, + "p90": 1857.9519987106323, + "p95": 1866.495966911316, + "p99": 2043.9679622650146 }, "combine": { - "p50": 1441.856026649475, - "p90": 1453.5679817199707, - "p95": 1456.9599628448486, - "p99": 1492.5119876861572 + "p50": 1586.2400531768799, + "p90": 1593.1520462036133, + "p95": 1595.296025276184, + "p99": 1600.7360219955444 }, "roundtrip": { - "p50": 2390.6240463256836, - "p90": 2406.9759845733643, - "p95": 2415.616035461426, - "p99": 2474.3359088897705 + "p50": 3375.391960144043, + "p90": 3391.871929168701, + "p95": 3397.887945175171, + "p99": 3405.280113220215 }, "isolatedSum": { - "p50": 2421.8560457229614, - "p90": 2444.6399807929993, - "p95": 2451.9999623298645, - "p99": 2507.6160430908203 + "p50": 3420.736074447632, + "p90": 3451.1040449142456, + "p95": 3461.7919921875, + "p99": 3644.703984260559 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 6, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18448,16 +17833,16 @@ ] }, { - "id": "cx-ec7ecdcc", - "identity": "b300|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "b300_307ed708", - "comparisonKey": "03e634138c74f76f", + "id": "cx-e1f3cb9e", + "identity": "gb200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_b0118480", + "comparisonKey": "a99dfa04a87e0b18", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:35.993019+00:00", + "generatedAt": "2026-06-29T14:03:20.332386+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_13", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_1", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -18465,30 +17850,31 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "shape 5120/8/160", + "label": "GB200 EP8 · deepep · fp8", + "model": "MiniMax-M3", "shape": { - "hidden": 5120, + "hidden": 6144, "topk": 8, - "experts": 160, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -18496,59 +17882,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285713494", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285713494", - "createdAt": "2026-06-27T09:52:35.993019+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 92.51199662685394, - "p90": 98.81599992513657, - "p95": 100.12800246477127, - "p99": 117.50400066375732 + "p50": 415.8720076084137, + "p90": 446.5920031070709, + "p95": 601.9840240478516, + "p99": 737.6000285148621 }, "combine": { - "p50": 103.13600301742554, - "p90": 104.22399640083313, - "p95": 104.96000200510025, - "p99": 114.01599645614624 + "p50": 114.68800157308578, + "p90": 122.6240023970604, + "p95": 171.39199376106262, + "p99": 422.6880073547363 }, "roundtrip": { - "p50": 176.60799622535706, - "p90": 182.8799992799759, - "p95": 184.92799997329712, - "p99": 195.5520063638687 + "p50": 508.7360143661499, + "p90": 540.6079888343811, + "p95": 742.0799732208252, + "p99": 809.9200129508972 }, "isolatedSum": { - "p50": 195.64799964427948, - "p90": 203.0399963259697, - "p95": 205.08800446987152, - "p99": 231.51999711990356 + "p50": 530.5600091814995, + "p90": 569.2160055041313, + "p95": 773.3760178089142, + "p99": 1160.2880358695984 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 7, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18557,35 +17943,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 119.52000111341476, - "p90": 124.95999783277512, - "p95": 126.46399438381195, - "p99": 136.25599443912506 + "p50": 402.3999869823456, + "p90": 447.29599356651306, + "p95": 700.1280188560486, + "p99": 777.7919769287109 }, "combine": { - "p50": 139.96799290180206, - "p90": 141.37600362300873, - "p95": 142.7839994430542, - "p99": 151.48800611495972 + "p50": 158.01599621772766, + "p90": 401.5679955482483, + "p95": 440.5440092086792, + "p99": 486.4000082015991 }, "roundtrip": { - "p50": 244.54399943351746, - "p90": 249.4720071554184, - "p95": 251.10399723052979, - "p99": 258.08000564575195 + "p50": 540.0959849357605, + "p90": 579.2959928512573, + "p95": 596.992015838623, + "p99": 850.9119749069214 }, "isolatedSum": { - "p50": 259.4879940152168, - "p90": 266.33600145578384, - "p95": 269.24799382686615, - "p99": 287.7440005540848 + "p50": 560.4159832000732, + "p90": 848.8639891147614, + "p95": 1140.6720280647278, + "p99": 1264.19198513031 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 111104000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 7, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18594,35 +17980,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 185.85599958896637, - "p90": 189.05599415302277, - "p95": 191.48799777030945, - "p99": 201.7280012369156 + "p50": 528.4799933433533, + "p90": 556.5760135650635, + "p95": 668.0319905281067, + "p99": 878.5600066184998 }, "combine": { - "p50": 214.62400257587433, - "p90": 224.48000311851501, - "p95": 225.43999552726746, - "p99": 236.4799976348877 + "p50": 261.9839906692505, + "p90": 510.55997610092163, + "p95": 548.5759973526001, + "p99": 573.4080076217651 }, "roundtrip": { - "p50": 372.76801466941833, - "p90": 379.2319893836975, - "p95": 381.632000207901, - "p99": 400.9599983692169 + "p50": 754.144012928009, + "p90": 774.4960188865662, + "p95": 1018.6560153961182, + "p99": 1087.9679918289185 }, "isolatedSum": { - "p50": 400.4800021648407, - "p90": 413.5359972715378, - "p95": 416.9279932975769, - "p99": 438.2079988718033 + "p50": 790.4639840126038, + "p90": 1067.135989665985, + "p95": 1216.6079878807068, + "p99": 1451.968014240265 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 223098880, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 5, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18631,35 +18017,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 303.0720055103302, - "p90": 310.65601110458374, - "p95": 313.1519854068756, - "p99": 327.84000039100647 + "p50": 750.2719759941101, + "p90": 798.7840175628662, + "p95": 878.3680200576782, + "p99": 1141.759991645813 }, "combine": { - "p50": 436.2240135669708, - "p90": 445.47200202941895, - "p95": 445.8880126476288, - "p99": 458.9439928531647 + "p50": 475.19999742507935, + "p90": 747.6159930229187, + "p95": 801.0879755020142, + "p99": 840.9600257873535 }, "roundtrip": { - "p50": 699.4240283966064, - "p90": 707.9359889030457, - "p95": 712.2560143470764, - "p99": 739.520013332367 + "p50": 1190.783977508545, + "p90": 1414.1440391540527, + "p95": 1574.6560096740723, + "p99": 5944.064140319824 }, "isolatedSum": { - "p50": 739.296019077301, - "p90": 756.1280131340027, - "p95": 759.0399980545044, - "p99": 786.7839932441711 + "p50": 1225.4719734191895, + "p90": 1546.400010585785, + "p95": 1679.4559955596924, + "p99": 1982.7200174331665 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 446730240, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 5, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18668,35 +18054,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 548.9280223846436, - "p90": 558.0160021781921, - "p95": 559.7119927406311, - "p99": 571.5199708938599 + "p50": 1186.3679885864258, + "p90": 1213.215947151184, + "p95": 1304.7679662704468, + "p99": 1598.9760160446167 }, "combine": { - "p50": 779.3279886245728, - "p90": 790.4639840126038, - "p95": 791.263997554779, - "p99": 803.9360046386719 + "p50": 857.7600121498108, + "p90": 1133.6959600448608, + "p95": 1182.8479766845703, + "p99": 1223.9680290222168 }, "roundtrip": { - "p50": 1311.1679553985596, - "p90": 1321.3759660720825, - "p95": 1328.3519744873047, - "p99": 1356.0960292816162 + "p50": 2003.2639503479004, + "p90": 2130.496025085449, + "p95": 2245.8879947662354, + "p99": 2302.9119968414307 }, "isolatedSum": { - "p50": 1328.2560110092163, - "p90": 1348.479986190796, - "p95": 1350.9759902954102, - "p99": 1375.4559755325317 + "p50": 2044.1280007362366, + "p90": 2346.911907196045, + "p95": 2487.615942955017, + "p99": 2822.9440450668335 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 893634560, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 4, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18705,35 +18091,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1032.3200225830078, - "p90": 1042.688012123108, - "p95": 1046.6879606246948, - "p99": 1057.919979095459 + "p50": 2106.2400341033936, + "p90": 2129.6958923339844, + "p95": 2328.831911087036, + "p99": 2491.6160106658936 }, "combine": { - "p50": 1477.4080514907837, - "p90": 1481.4079999923706, - "p95": 1490.9759759902954, - "p99": 1538.9440059661865 + "p50": 1610.0159883499146, + "p90": 1913.599967956543, + "p95": 1943.8079595565796, + "p99": 1971.168041229248 }, "roundtrip": { - "p50": 2480.6079864501953, - "p90": 2492.9919242858887, - "p95": 2498.624086380005, - "p99": 2541.7280197143555 + "p50": 3669.4719791412354, + "p90": 3683.3600997924805, + "p95": 3691.3599967956543, + "p99": 3996.0319995880127 }, "isolatedSum": { - "p50": 2509.7280740737915, - "p90": 2524.0960121154785, - "p95": 2537.6639366149902, - "p99": 2596.8639850616455 + "p50": 3716.256022453308, + "p90": 4043.2958602905273, + "p95": 4272.639870643616, + "p99": 4462.784051895142 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1786265600, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 7, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18741,16 +18127,16 @@ ] }, { - "id": "cx-99771256", - "identity": "b300|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_c9569580", - "comparisonKey": "f9f9af4879f1b5f6", + "id": "cx-b9c2ee85", + "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_6d63c708", + "comparisonKey": "7d3b869c7fd78b55", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:49.871789+00:00", + "generatedAt": "2026-06-29T13:50:26.724922+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -18758,14 +18144,14 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "MiniMax-M3", + "label": "GB200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -18773,15 +18159,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -18789,59 +18176,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287497246", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287497246", - "createdAt": "2026-06-27T11:13:49.871789+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 99.29600358009338, - "p90": 102.14400291442871, - "p95": 102.9760017991066, - "p99": 110.55999994277954 + "p50": 112.06399649381638, + "p90": 119.61600184440613, + "p95": 122.75200337171555, + "p99": 128.7360042333603 }, "combine": { - "p50": 105.69600015878677, - "p90": 114.20799791812897, - "p95": 114.62400108575821, - "p99": 128.83199751377106 + "p50": 119.74400281906128, + "p90": 124.86399710178375, + "p95": 128.09599936008453, + "p99": 136.73600554466248 }, "roundtrip": { - "p50": 184.57600474357605, - "p90": 188.83199989795685, - "p95": 190.17599523067474, - "p99": 198.08000326156616 + "p50": 275.39199590682983, + "p90": 286.24001145362854, + "p95": 289.792001247406, + "p99": 294.3040132522583 }, "isolatedSum": { - "p50": 204.99200373888016, - "p90": 216.35200083255768, - "p95": 217.6000028848648, - "p99": 239.3919974565506 + "p50": 231.80799931287766, + "p90": 244.47999894618988, + "p95": 250.84800273180008, + "p99": 265.47200977802277 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18850,35 +18237,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 132.38400220870972, - "p90": 137.472003698349, - "p95": 139.42399621009827, - "p99": 147.20000326633453 + "p50": 143.93599331378937, + "p90": 150.84800124168396, + "p95": 153.28000485897064, + "p99": 157.98400342464447 }, "combine": { - "p50": 150.14399588108063, - "p90": 151.61600708961487, - "p95": 151.7760008573532, - "p99": 154.11199629306793 + "p50": 161.98399662971497, + "p90": 166.78400337696075, + "p95": 168.83200407028198, + "p99": 172.7360039949417 }, "roundtrip": { - "p50": 259.93600487709045, - "p90": 264.0640139579773, - "p95": 265.1520073413849, - "p99": 282.81599283218384 + "p50": 363.072007894516, + "p90": 370.9760010242462, + "p95": 374.1759955883026, + "p99": 381.3439905643463 }, "isolatedSum": { - "p50": 282.52799808979034, - "p90": 289.08801078796387, - "p95": 291.1999970674515, - "p99": 301.31199955940247 + "p50": 305.91998994350433, + "p90": 317.6320046186447, + "p95": 322.1120089292526, + "p99": 330.7200074195862 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 0, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18887,35 +18274,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 197.37599790096283, - "p90": 201.34399831295013, - "p95": 202.36800611019135, - "p99": 210.40000021457672 + "p50": 199.39200580120087, + "p90": 207.07200467586517, + "p95": 210.4959934949875, + "p99": 215.39199352264404 }, "combine": { - "p50": 238.81599307060242, - "p90": 248.79999458789825, - "p95": 249.85599517822266, - "p99": 255.74401021003723 + "p50": 284.5439910888672, + "p90": 290.20801186561584, + "p95": 292.1600043773651, + "p99": 296.03201150894165 }, "roundtrip": { - "p50": 410.4959964752197, - "p90": 417.7919924259186, - "p95": 420.8959937095642, - "p99": 438.01599740982056 + "p50": 594.8479771614075, + "p90": 603.2639741897583, + "p95": 605.4080128669739, + "p99": 613.3120059967041 }, "isolatedSum": { - "p50": 436.19199097156525, - "p90": 450.1439929008484, - "p95": 452.224001288414, - "p99": 466.14401042461395 + "p50": 483.93599689006805, + "p90": 497.280016541481, + "p95": 502.6559978723526, + "p99": 511.4240050315857 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18924,35 +18311,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 319.42400336265564, - "p90": 327.5519907474518, - "p95": 330.24001121520996, - "p99": 346.94400429725647 + "p50": 313.6959969997406, + "p90": 321.50399684906006, + "p95": 324.44798946380615, + "p99": 334.01599526405334 }, "combine": { - "p50": 444.89601254463196, - "p90": 447.61601090431213, - "p95": 449.0880072116852, - "p99": 458.3680033683777 + "p50": 489.56799507141113, + "p90": 495.87199091911316, + "p95": 497.8240132331848, + "p99": 502.78401374816895 }, "roundtrip": { - "p50": 742.464005947113, - "p90": 748.960018157959, - "p95": 751.6160011291504, - "p99": 762.1440291404724 + "p50": 1021.6319561004639, + "p90": 1028.607964515686, + "p95": 1031.008005142212, + "p99": 1035.1040363311768 }, "isolatedSum": { - "p50": 764.3200159072876, - "p90": 775.1680016517639, - "p95": 779.3280184268951, - "p99": 805.3120076656342 + "p50": 803.2639920711517, + "p90": 817.3759877681732, + "p95": 822.272002696991, + "p99": 836.8000090122223 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18961,35 +18348,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 568.2880282402039, - "p90": 572.9600191116333, - "p95": 574.176013469696, - "p99": 603.4560203552246 + "p50": 559.008002281189, + "p90": 567.903995513916, + "p95": 571.3919997215271, + "p99": 579.8079967498779 }, "combine": { - "p50": 802.4640083312988, - "p90": 813.7279748916626, - "p95": 814.9759769439697, - "p99": 830.847978591919 + "p50": 875.1999735832214, + "p90": 881.7920088768005, + "p95": 884.2880129814148, + "p99": 888.0320191383362 }, "roundtrip": { - "p50": 1348.5759496688843, - "p90": 1358.5599660873413, - "p95": 1367.3280477523804, - "p99": 1390.0799751281738 + "p50": 1877.8879642486572, + "p90": 1887.8079652786255, + "p95": 1891.1360502243042, + "p99": 1899.3279933929443 }, "isolatedSum": { - "p50": 1370.7520365715027, - "p90": 1386.687994003296, - "p95": 1389.1519904136658, - "p99": 1434.3039989471436 + "p50": 1434.2079758644104, + "p90": 1449.6960043907166, + "p95": 1455.680012702942, + "p99": 1467.840015888214 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -18998,35 +18385,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1055.3920269012451, - "p90": 1064.5760297775269, - "p95": 1068.1920051574707, - "p99": 1080.191969871521 + "p50": 1040.1279926300049, + "p90": 1051.967978477478, + "p95": 1054.5599460601807, + "p99": 1064.2880201339722 }, "combine": { - "p50": 1502.8799772262573, - "p90": 1514.464020729065, - "p95": 1516.8319940567017, - "p99": 1539.6159887313843 + "p50": 1631.55198097229, + "p90": 1638.1440162658691, + "p95": 1640.0320529937744, + "p99": 1649.3760347366333 }, "roundtrip": { - "p50": 2540.4160022735596, - "p90": 2552.6719093322754, - "p95": 2560.512065887451, - "p99": 2638.6559009552 + "p50": 3564.192056655884, + "p90": 3574.78404045105, + "p95": 3577.2159099578857, + "p99": 3582.304000854492 }, "isolatedSum": { - "p50": 2558.2720041275024, - "p90": 2579.040050506592, - "p95": 2585.0239992141724, - "p99": 2619.8079586029053 + "p50": 2671.679973602295, + "p90": 2690.111994743347, + "p95": 2694.591999053955, + "p99": 2713.6640548706055 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19034,16 +18421,16 @@ ] }, { - "id": "cx-46706f1e", - "identity": "b300|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_307ed708", - "comparisonKey": "b477f7e33cf027ec", + "id": "cx-e6a97375", + "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_b0118480", + "comparisonKey": "e07c5ac7fb8068b5", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:53:05.143387+00:00", + "generatedAt": "2026-06-29T13:52:17.589100+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_05", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", @@ -19051,14 +18438,14 @@ "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "MiniMax-M3", + "label": "GB200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -19066,15 +18453,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -19082,59 +18470,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285723416", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285723416", - "createdAt": "2026-06-27T09:53:05.143387+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 99.35999661684036, - "p90": 101.72799974679947, - "p95": 102.59199887514114, - "p99": 109.0560033917427 + "p50": 341.0559892654419, + "p90": 360.3839874267578, + "p95": 365.56801199913025, + "p99": 375.64799189567566 }, "combine": { - "p50": 104.8320010304451, - "p90": 113.88800293207169, - "p95": 114.20799791812897, - "p99": 117.34399944543839 + "p50": 118.8800036907196, + "p90": 124.86399710178375, + "p95": 127.10399925708771, + "p99": 131.20000064373016 }, "roundtrip": { - "p50": 185.15199422836304, - "p90": 189.28000330924988, - "p95": 191.23199582099915, - "p99": 221.95200622081757 + "p50": 435.61598658561707, + "p90": 448.41599464416504, + "p95": 452.32000946998596, + "p99": 458.5280120372772 }, "isolatedSum": { - "p50": 204.19199764728546, - "p90": 215.61600267887115, - "p95": 216.7999967932701, - "p99": 226.4000028371811 + "p50": 459.9359929561615, + "p90": 485.24798452854156, + "p95": 492.67201125621796, + "p99": 506.8479925394058 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 2, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19143,35 +18531,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 128.54400277137756, - "p90": 136.00000739097595, - "p95": 137.66400516033173, - "p99": 154.08000349998474 + "p50": 376.2879967689514, + "p90": 386.55999302864075, + "p95": 388.7360095977783, + "p99": 393.7920033931732 }, "combine": { - "p50": 142.94399321079254, - "p90": 152.0639955997467, - "p95": 152.41600573062897, - "p99": 176.35199427604675 + "p50": 161.21600568294525, + "p90": 166.27199947834015, + "p95": 168.12799870967865, + "p99": 173.34400117397308 }, "roundtrip": { - "p50": 259.64799523353577, - "p90": 263.5200023651123, - "p95": 265.9519910812378, - "p99": 286.1120104789734 + "p50": 527.2960066795349, + "p90": 537.8559827804565, + "p95": 540.3839945793152, + "p99": 544.3519949913025 }, "isolatedSum": { - "p50": 271.4879959821701, - "p90": 288.06400299072266, - "p95": 290.0800108909607, - "p99": 330.4319977760315 + "p50": 537.5040024518967, + "p90": 552.8319925069809, + "p95": 556.864008307457, + "p99": 567.1360045671463 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 2, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19180,35 +18568,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 196.57599925994873, - "p90": 200.3519982099533, - "p95": 201.63199305534363, - "p99": 223.23200106620789 + "p50": 495.61598896980286, + "p90": 506.1119794845581, + "p95": 509.5360279083252, + "p99": 516.0959959030151 }, "combine": { - "p50": 239.45599794387817, - "p90": 249.34400618076324, - "p95": 250.11199712753296, - "p99": 262.4320089817047 + "p50": 283.3600044250488, + "p90": 289.0239953994751, + "p95": 290.8799946308136, + "p99": 295.1360046863556 }, "roundtrip": { - "p50": 409.40800309181213, - "p90": 418.17599534988403, - "p95": 426.144003868103, - "p99": 449.7919976711273 + "p50": 754.5920014381409, + "p90": 763.9999985694885, + "p95": 765.9839987754822, + "p99": 773.9840149879456 }, "isolatedSum": { - "p50": 436.0319972038269, - "p90": 449.69600439071655, - "p95": 451.7439901828766, - "p99": 485.6640100479126 + "p50": 778.9759933948517, + "p90": 795.1359748840332, + "p95": 800.4160225391388, + "p99": 811.2320005893707 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 5, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19217,35 +18605,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 316.0000145435333, - "p90": 321.6319978237152, - "p95": 326.55999064445496, - "p99": 339.1680121421814 + "p50": 731.935977935791, + "p90": 743.4239983558655, + "p95": 746.4960217475891, + "p99": 752.3199915885925 }, "combine": { - "p50": 445.15201449394226, - "p90": 446.78398966789246, - "p95": 448.60801100730896, - "p99": 472.03201055526733 + "p50": 489.0879988670349, + "p90": 494.30400133132935, + "p95": 496.44801020622253, + "p99": 499.35999512672424 }, "roundtrip": { - "p50": 743.2000041007996, - "p90": 750.0799894332886, - "p95": 757.5039863586426, - "p99": 775.7120132446289 + "p50": 1187.7119541168213, + "p90": 1199.77605342865, + "p95": 1202.623963356018, + "p99": 1210.6239795684814 }, "isolatedSum": { - "p50": 761.1520290374756, - "p90": 768.4159874916077, - "p95": 775.1680016517639, - "p99": 811.2000226974487 + "p50": 1221.023976802826, + "p90": 1237.7279996871948, + "p95": 1242.9440319538116, + "p99": 1251.6799867153168 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19254,35 +18642,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 567.0719742774963, - "p90": 570.9760189056396, - "p95": 573.2799768447876, - "p99": 593.5360193252563 + "p50": 1204.8319578170776, + "p90": 1216.8960571289062, + "p95": 1223.3599424362183, + "p99": 1430.3359985351562 }, "combine": { - "p50": 801.7920255661011, - "p90": 805.8239817619324, - "p95": 815.1040077209473, - "p99": 850.6879806518555 + "p50": 878.3680200576782, + "p90": 885.1839900016785, + "p95": 887.2640132904053, + "p99": 891.9360041618347 }, "roundtrip": { - "p50": 1346.336007118225, - "p90": 1356.7359447479248, - "p95": 1364.0960454940796, - "p99": 1429.535984992981 + "p50": 2039.9041175842285, + "p90": 2050.784111022949, + "p95": 2054.7521114349365, + "p99": 2062.3679161071777 }, "isolatedSum": { - "p50": 1368.8639998435974, - "p90": 1376.800000667572, - "p95": 1388.3839845657349, - "p99": 1444.2239999771118 + "p50": 2083.199977874756, + "p90": 2102.0800471305847, + "p95": 2110.6239557266235, + "p99": 2322.272002696991 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19291,35 +18679,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1061.4080429077148, - "p90": 1067.039966583252, - "p95": 1075.32799243927, - "p99": 1103.9040088653564 + "p50": 2268.8639163970947, + "p90": 2281.4719676971436, + "p95": 2284.0960025787354, + "p99": 2294.3360805511475 }, "combine": { - "p50": 1503.2000541687012, - "p90": 1515.2640342712402, - "p95": 1526.9759893417358, - "p99": 1554.8160076141357 + "p50": 1629.0240287780762, + "p90": 1636.1600160598755, + "p95": 1638.8800144195557, + "p99": 1645.8239555358887 }, "roundtrip": { - "p50": 2543.2960987091064, - "p90": 2558.880090713501, - "p95": 2570.847988128662, - "p99": 2619.1680431365967 + "p50": 3866.5599822998047, + "p90": 3876.192092895508, + "p95": 3881.216049194336, + "p99": 3893.2158946990967 }, "isolatedSum": { - "p50": 2564.608097076416, - "p90": 2582.304000854492, - "p95": 2602.303981781006, - "p99": 2658.720016479492 + "p50": 3897.887945175171, + "p90": 3917.631983757019, + "p95": 3922.976016998291, + "p99": 3940.160036087036 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19327,47 +18715,48 @@ ] }, { - "id": "cx-238797ce", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", - "colorKey": "b300_c9569580", - "comparisonKey": "c4fbb2dad9521e3e", + "id": "cx-54bf03e2", + "identity": "gb200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb200_b0118480", + "comparisonKey": "1434b75a5e7d7c2d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:38.465863+00:00", + "generatedAt": "2026-06-29T13:53:16.382279+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_13", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB200 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -19375,132 +18764,243 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "157ca81687ddb63", - "workloadId": "set:3:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271869301", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271869301", - "createdAt": "2026-06-26T23:57:38.465863+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 93.66399794816971, - "p90": 99.42399710416794, - "p95": 101.24800354242325, - "p99": 112.15999722480774 + "p50": 354.6240031719208, + "p90": 374.1439878940582, + "p95": 378.2080113887787, + "p99": 386.46399974823 }, "combine": { - "p50": 115.7120019197464, - "p90": 116.54400080442429, - "p95": 117.47200042009354, - "p99": 128.7039965391159 + "p50": 121.5360015630722, + "p90": 125.95200538635254, + "p95": 128.38399410247803, + "p99": 133.12000036239624 }, "roundtrip": { - "p50": 195.3279972076416, - "p90": 199.072003364563, - "p95": 200.57600736618042, - "p99": 214.1440063714981 + "p50": 448.96000623703003, + "p90": 460.7999920845032, + "p95": 465.11998772621155, + "p99": 484.25599932670593 }, "isolatedSum": { - "p50": 209.3759998679161, - "p90": 215.96799790859222, - "p95": 218.72000396251678, - "p99": 240.86399376392365 + "p50": 476.160004734993, + "p90": 500.09599328041077, + "p95": 506.5920054912567, + "p99": 519.5840001106262 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 193.7599927186966, - "p90": 200.3519982099533, - "p95": 202.94399559497833, - "p99": 209.75999534130096 + "p50": 383.39200615882874, + "p90": 394.5919871330261, + "p95": 398.0160057544708, + "p99": 404.06399965286255 }, "combine": { - "p50": 272.92799949645996, - "p90": 275.04000067710876, - "p95": 275.6800055503845, - "p99": 289.4720137119293 + "p50": 164.44799304008484, + "p90": 170.17599940299988, + "p95": 172.44799435138702, + "p99": 175.87199807167053 }, "roundtrip": { - "p50": 434.5279932022095, - "p90": 444.95999813079834, - "p95": 448.1920003890991, - "p99": 461.37601137161255 + "p50": 534.3040227890015, + "p90": 545.4080104827881, + "p95": 548.6400127410889, + "p99": 564.2240047454834 }, "isolatedSum": { - "p50": 466.68799221515656, - "p90": 475.3919988870621, - "p95": 478.62400114536285, - "p99": 499.2320090532303 + "p50": 547.8399991989136, + "p90": 564.767986536026, + "p95": 570.4640001058578, + "p99": 579.9359977245331 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 577.6960253715515, - "p90": 582.6879739761353, - "p95": 584.6400260925293, - "p99": 595.7120060920715 + "p50": 501.8240213394165, + "p90": 511.4240050315857, + "p95": 514.0159726142883, + "p99": 518.6880230903625 }, "combine": { - "p50": 818.336009979248, - "p90": 828.4479975700378, - "p95": 838.3679986000061, - "p99": 852.6399731636047 + "p50": 285.95200181007385, + "p90": 291.7119860649109, + "p95": 293.8239872455597, + "p99": 298.2720136642456 }, "roundtrip": { - "p50": 1377.7920007705688, - "p90": 1387.3920440673828, - "p95": 1397.2480297088623, - "p99": 1410.4640483856201 + "p50": 761.4719867706299, + "p90": 772.159993648529, + "p95": 774.8159766197205, + "p99": 784.4799757003784 }, "isolatedSum": { - "p50": 1396.0320353507996, - "p90": 1411.135971546173, - "p95": 1423.0080246925354, - "p99": 1448.3519792556763 + "p50": 787.7760231494904, + "p90": 803.1359910964966, + "p95": 807.839959859848, + "p99": 816.9600367546082 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 739.2320036888123, + "p90": 750.976026058197, + "p95": 754.8159956932068, + "p99": 763.7119889259338 + }, + "combine": { + "p50": 492.48000979423523, + "p90": 498.52800369262695, + "p95": 501.0240077972412, + "p99": 504.83202934265137 + }, + "roundtrip": { + "p50": 1193.503975868225, + "p90": 1203.2320499420166, + "p95": 1207.0399522781372, + "p99": 1216.1279916763306 + }, + "isolatedSum": { + "p50": 1231.7120134830475, + "p90": 1249.504029750824, + "p95": 1255.840003490448, + "p99": 1268.5440182685852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1204.3520212173462, + "p90": 1216.5440320968628, + "p95": 1221.1840152740479, + "p99": 1232.7040433883667 + }, + "combine": { + "p50": 869.0879940986633, + "p90": 875.648021697998, + "p95": 877.1839737892151, + "p99": 883.7760090827942 + }, + "roundtrip": { + "p50": 2034.1439247131348, + "p90": 2047.1038818359375, + "p95": 2052.8318881988525, + "p99": 2060.7359409332275 + }, + "isolatedSum": { + "p50": 2073.4400153160095, + "p90": 2092.192053794861, + "p95": 2098.367989063263, + "p99": 2116.480052471161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2272.2880840301514, + "p90": 2281.6319465637207, + "p95": 2285.183906555176, + "p99": 2294.3038940429688 + }, + "combine": { + "p50": 1623.968005180359, + "p90": 1630.784034729004, + "p95": 1633.3119869232178, + "p99": 1637.8240585327148 + }, + "roundtrip": { + "p50": 3862.9438877105713, + "p90": 3873.2481002807617, + "p95": 3877.2799968719482, + "p99": 3888.0960941314697 + }, + "isolatedSum": { + "p50": 3896.2560892105103, + "p90": 3912.4159812927246, + "p95": 3918.4958934783936, + "p99": 3932.1279525756836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -19509,28 +19009,28 @@ ] }, { - "id": "cx-20a284d3", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_c9569580", - "comparisonKey": "0484fdcbaa6c315c", + "id": "cx-0cf4ef81", + "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb200_7e970144", + "comparisonKey": "e8f405c383a7484e", "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:05.756924+00:00", + "generatedAt": "2026-06-29T13:51:18.445065+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb200-nv_0", + "sku": "gb200", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb200-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", + "label": "GB200 EP8 · deepep · fp8 [cl]", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -19541,15 +19041,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -19557,59 +19058,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28286434915", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286434915", - "createdAt": "2026-06-27T10:26:05.756924+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 94.33600306510925, - "p90": 98.33600372076035, - "p95": 101.40799731016159, - "p99": 131.1040073633194 + "p50": 95.51999717950821, + "p90": 102.75200009346008, + "p95": 105.8880016207695, + "p99": 121.79200351238251 }, "combine": { - "p50": 115.99999666213989, - "p90": 117.47200042009354, - "p95": 118.6240017414093, - "p99": 131.071999669075 + "p50": 119.1679984331131, + "p90": 124.22399967908859, + "p95": 126.52799487113953, + "p99": 130.2720010280609 }, "roundtrip": { - "p50": 194.4960057735443, - "p90": 200.70399343967438, - "p95": 203.3279985189438, - "p99": 237.34399676322937 + "p50": 258.0159902572632, + "p90": 269.0240144729614, + "p95": 271.07200026512146, + "p99": 281.2800109386444 }, "isolatedSum": { - "p50": 210.33599972724915, - "p90": 215.80800414085388, - "p95": 220.0319990515709, - "p99": 262.1760070323944 + "p50": 214.6879956126213, + "p90": 226.97599977254868, + "p95": 232.41599649190903, + "p99": 252.06400454044342 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, + "dispatchLogicalBytes": 38836224, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19618,35 +19119,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 136.63999736309052, - "p90": 140.1599943637848, - "p95": 141.53599739074707, - "p99": 167.32800006866455 + "p50": 127.87200510501862, + "p90": 133.59999656677246, + "p95": 136.73600554466248, + "p99": 140.3840035200119 }, "combine": { - "p50": 156.70399367809296, - "p90": 165.02399742603302, - "p95": 165.6319946050644, - "p99": 177.50400304794312 + "p50": 161.6639941930771, + "p90": 166.97600483894348, + "p95": 169.0559983253479, + "p99": 172.06400632858276 }, "roundtrip": { - "p50": 273.21600914001465, - "p90": 279.4240117073059, - "p95": 281.2480032444, - "p99": 292.4160063266754 + "p50": 347.03999757766724, + "p90": 354.52800989151, + "p95": 356.79998993873596, + "p99": 360.8640134334564 }, "isolatedSum": { - "p50": 293.3439910411835, - "p90": 305.1839917898178, - "p95": 307.16799199581146, - "p99": 344.83200311660767 + "p50": 289.5359992980957, + "p90": 300.57600140571594, + "p95": 305.7920038700104, + "p99": 312.44800984859467 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, + "dispatchLogicalBytes": 77944832, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19655,35 +19156,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 194.4960057735443, - "p90": 202.81599462032318, - "p95": 204.16000485420227, - "p99": 231.455996632576 + "p50": 183.29599499702454, + "p90": 189.5039975643158, + "p95": 192.3840045928955, + "p99": 198.88000190258026 }, "combine": { - "p50": 266.59199595451355, - "p90": 275.519996881485, - "p95": 277.3759961128235, - "p99": 302.3679852485657 + "p50": 286.1439883708954, + "p90": 292.7039861679077, + "p95": 294.624000787735, + "p99": 298.46400022506714 }, "roundtrip": { - "p50": 437.6319944858551, - "p90": 447.9359984397888, - "p95": 454.0480077266693, - "p99": 517.6960229873657 + "p50": 574.4320154190063, + "p90": 581.3440084457397, + "p95": 584.6719741821289, + "p99": 589.8240208625793 }, "isolatedSum": { - "p50": 461.08800172805786, - "p90": 478.33599150180817, - "p95": 481.53600096702576, - "p99": 533.8239818811417 + "p50": 469.4399833679199, + "p90": 482.2079837322235, + "p95": 487.0080053806305, + "p99": 497.3440021276474 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, + "dispatchLogicalBytes": 156133376, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19692,35 +19193,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 326.55999064445496, - "p90": 330.3360044956207, - "p95": 334.49599146842957, - "p99": 353.15200686454773 + "p50": 296.51200771331787, + "p90": 304.3839931488037, + "p95": 306.71998858451843, + "p99": 313.728004693985 }, "combine": { - "p50": 459.3279957771301, - "p90": 462.72000670433044, - "p95": 471.0400104522705, - "p99": 533.5680246353149 + "p50": 488.6400103569031, + "p90": 494.81600522994995, + "p95": 496.12799286842346, + "p99": 500.3200173377991 }, "roundtrip": { - "p50": 764.9279832839966, - "p90": 773.1519937515259, - "p95": 777.1520018577576, - "p99": 811.0399842262268 + "p50": 1004.4480562210083, + "p90": 1011.3279819488525, + "p95": 1014.6239995956421, + "p99": 1019.1359519958496 }, "isolatedSum": { - "p50": 785.8879864215851, - "p90": 793.0560111999512, - "p95": 805.5360019207001, - "p99": 886.7200314998627 + "p50": 785.152018070221, + "p90": 799.1999983787537, + "p95": 802.8479814529419, + "p99": 814.0480220317841 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, + "dispatchLogicalBytes": 311721984, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 7, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19729,35 +19230,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 574.9120116233826, - "p90": 586.7840051651001, - "p95": 597.0879793167114, - "p99": 678.4639954566956 + "p50": 540.3519868850708, + "p90": 550.4639744758606, + "p95": 553.0239939689636, + "p99": 558.8160157203674 }, "combine": { - "p50": 818.2399868965149, - "p90": 828.7360072135925, - "p95": 832.7360153198242, - "p99": 879.8080086708069 + "p50": 878.7840008735657, + "p90": 919.2320108413696, + "p95": 928.76797914505, + "p99": 971.2960124015808 }, "roundtrip": { - "p50": 1376.1279582977295, - "p90": 1384.7039937973022, - "p95": 1398.1120586395264, - "p99": 1485.0879907608032 + "p50": 1854.2720079421997, + "p90": 1866.528034210205, + "p95": 1895.7760334014893, + "p99": 1940.8960342407227 }, "isolatedSum": { - "p50": 1393.1519985198975, - "p90": 1415.5200123786926, - "p95": 1429.8239946365356, - "p99": 1558.2720041275024 + "p50": 1419.1359877586365, + "p90": 1469.6959853172302, + "p95": 1481.7919731140137, + "p99": 1530.1120281219482 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, + "dispatchLogicalBytes": 621902848, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 6, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19766,35 +19267,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1068.3519840240479, - "p90": 1078.0800580978394, - "p95": 1086.4640474319458, - "p99": 1142.624020576477 + "p50": 1003.2639503479004, + "p90": 1012.5119686126709, + "p95": 1016.0959959030151, + "p99": 1022.5600004196167 }, "combine": { - "p50": 1529.47199344635, - "p90": 1541.3119792938232, - "p95": 1551.8079996109009, - "p99": 1614.9120330810547 + "p50": 1630.9759616851807, + "p90": 1637.1840238571167, + "p95": 1639.456033706665, + "p99": 1643.5199975967407 }, "roundtrip": { - "p50": 2586.5280628204346, - "p90": 2602.7839183807373, - "p95": 2617.6319122314453, - "p99": 2691.5199756622314 + "p50": 3531.071901321411, + "p90": 3541.50390625, + "p95": 3545.4719066619873, + "p99": 3552.608013153076 }, "isolatedSum": { - "p50": 2597.823977470398, - "p90": 2619.3920373916626, - "p95": 2638.2720470428467, - "p99": 2757.5360536575317 + "p50": 2634.239912033081, + "p90": 2649.6959924697876, + "p95": 2655.55202960968, + "p99": 2666.0799980163574 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, + "dispatchLogicalBytes": 1243504640, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 6, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -19802,47 +19303,48 @@ ] }, { - "id": "cx-330e7a0b", - "identity": "b300|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_307ed708", - "comparisonKey": "669ed990dbfd00e2", + "id": "cx-e1708e07", + "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "c86d940414a55991", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:13.255714+00:00", + "generatedAt": "2026-06-29T14:03:30.906721+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -19850,244 +19352,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285680003", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285680003", - "createdAt": "2026-06-27T09:51:13.255714+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 93.44000369310379, - "p90": 96.54399752616882, - "p95": 100.09600222110748, - "p99": 102.94400155544281 + "p50": 95.8079993724823, + "p90": 110.36799848079681, + "p95": 115.42399972677231, + "p99": 124.95999783277512 }, "combine": { - "p50": 115.26399850845337, - "p90": 116.09599739313126, - "p95": 117.34399944543839, - "p99": 127.77599692344666 + "p50": 71.19999825954437, + "p90": 77.88799703121185, + "p95": 81.56800270080566, + "p99": 84.6719965338707 }, "roundtrip": { - "p50": 192.06400215625763, - "p90": 198.7520009279251, - "p95": 199.71199333667755, - "p99": 215.68000316619873 + "p50": 142.56000518798828, + "p90": 155.68000078201294, + "p95": 160.8320027589798, + "p99": 169.95200514793396 }, "isolatedSum": { - "p50": 208.70400220155716, - "p90": 212.63999491930008, - "p95": 217.44000166654587, - "p99": 230.71999847888947 + "p50": 167.00799763202667, + "p90": 188.25599551200867, + "p95": 196.99200242757797, + "p99": 209.6319943666458 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 136.54400408267975, - "p90": 139.29599523544312, - "p95": 141.12000167369843, - "p99": 151.10400319099426 + "p50": 96.57599776983261, + "p90": 112.03200370073318, + "p95": 116.64000153541565, + "p99": 138.14400136470795 }, "combine": { - "p50": 162.9440039396286, - "p90": 164.60800170898438, - "p95": 165.18400609493256, - "p99": 178.52799594402313 + "p50": 72.54400104284286, + "p90": 79.83999699354172, + "p95": 82.17599987983704, + "p99": 85.66399663686752 }, "roundtrip": { - "p50": 271.84000611305237, - "p90": 277.75999903678894, - "p95": 280.0639867782593, - "p99": 295.48799991607666 + "p50": 144.54400539398193, + "p90": 157.3760062456131, + "p95": 161.15200519561768, + "p99": 173.8239973783493 }, "isolatedSum": { - "p50": 299.48800802230835, - "p90": 303.9039969444275, - "p95": 306.304007768631, - "p99": 329.6319991350174 + "p50": 169.11999881267548, + "p90": 191.8720006942749, + "p95": 198.81600141525269, + "p99": 223.80799800157547 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 192.7040070295334, - "p90": 198.7520009279251, - "p95": 200.95999538898468, - "p99": 214.27200734615326 + "p50": 96.22400254011154, + "p90": 111.26399785280228, + "p95": 117.08799749612808, + "p99": 128.67200374603271 }, "combine": { - "p50": 264.8960053920746, - "p90": 274.27199482917786, - "p95": 274.87999200820923, - "p99": 286.3039970397949 + "p50": 72.51200079917908, + "p90": 80.48000186681747, + "p95": 83.90399813652039, + "p99": 97.31200337409973 }, "roundtrip": { - "p50": 443.36000084877014, - "p90": 448.86401295661926, - "p95": 453.0560076236725, - "p99": 460.640013217926 + "p50": 145.88800072669983, + "p90": 159.67999398708344, + "p95": 165.21599888801575, + "p99": 186.52799725532532 }, "isolatedSum": { - "p50": 457.60001242160797, - "p90": 473.02399575710297, - "p95": 475.8399873971939, - "p99": 500.5760043859482 + "p50": 168.73600333929062, + "p90": 191.74399971961975, + "p95": 200.99199563264847, + "p99": 225.98400712013245 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 326.84800028800964, - "p90": 329.75998520851135, - "p95": 331.36001229286194, - "p99": 340.9599959850311 + "p50": 97.47199714183807, + "p90": 113.21599781513214, + "p95": 118.6240017414093, + "p99": 129.2160004377365 }, "combine": { - "p50": 458.97600054740906, - "p90": 462.46400475502014, - "p95": 470.335990190506, - "p99": 474.36800599098206 + "p50": 74.81600344181061, + "p90": 82.07999914884567, + "p95": 83.29600095748901, + "p99": 94.04800087213516 }, "roundtrip": { - "p50": 764.2880082130432, - "p90": 772.0639705657959, - "p95": 773.5360264778137, - "p99": 783.8079929351807 + "p50": 146.7200070619583, + "p90": 159.29600596427917, + "p95": 163.455992937088, + "p99": 177.05599963665009 }, "isolatedSum": { - "p50": 785.8240008354187, - "p90": 792.2239899635315, - "p95": 801.6960024833679, - "p99": 815.3280019760132 + "p50": 172.28800058364868, + "p90": 195.2959969639778, + "p95": 201.92000269889832, + "p99": 223.26400130987167 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 575.9680271148682, - "p90": 583.1040143966675, - "p95": 584.6719741821289, - "p99": 595.4880118370056 + "p50": 97.24800288677216, + "p90": 112.06399649381638, + "p95": 117.72800236940384, + "p99": 128.86400520801544 }, "combine": { - "p50": 817.6640272140503, - "p90": 827.7760148048401, - "p95": 828.2240033149719, - "p99": 840.1280045509338 + "p50": 77.504001557827, + "p90": 82.94399827718735, + "p95": 85.21600067615509, + "p99": 94.81599926948547 }, "roundtrip": { - "p50": 1376.7679929733276, - "p90": 1384.5759630203247, - "p95": 1390.3679847717285, - "p99": 1429.6319484710693 + "p50": 147.90399372577667, + "p90": 160.47999262809753, + "p95": 164.48000073432922, + "p99": 176.4480024576187 }, "isolatedSum": { - "p50": 1393.6320543289185, - "p90": 1410.8800292015076, - "p95": 1412.8959774971008, - "p99": 1435.6160163879395 + "p50": 174.75200444459915, + "p90": 195.00799477100372, + "p95": 202.94400304555893, + "p99": 223.68000447750092 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 6, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1069.6320533752441, - "p90": 1077.6959657669067, - "p95": 1080.1600217819214, - "p99": 1091.4560556411743 + "p50": 98.78399968147278, + "p90": 111.35999858379364, + "p95": 117.95199662446976, + "p99": 131.9359987974167 }, "combine": { - "p50": 1529.0240049362183, - "p90": 1540.2239561080933, - "p95": 1541.0560369491577, - "p99": 1551.5199899673462 + "p50": 80.32000064849854, + "p90": 84.95999872684479, + "p95": 86.87999844551086, + "p99": 95.16800194978714 }, "roundtrip": { - "p50": 2583.616018295288, - "p90": 2593.696117401123, - "p95": 2599.3599891662598, - "p99": 2626.4960765838623 + "p50": 150.78400075435638, + "p90": 160.89600324630737, + "p95": 166.52800142765045, + "p99": 181.60000443458557 }, "isolatedSum": { - "p50": 2598.6560583114624, - "p90": 2617.919921875, - "p95": 2621.216058731079, - "p99": 2642.9760456085205 + "p50": 179.1040003299713, + "p90": 196.31999731063843, + "p95": 204.83199506998062, + "p99": 227.10400074720383 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 6, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.00000214576721, + "p90": 115.35999923944473, + "p95": 122.20799922943115, + "p99": 134.8479986190796 + }, + "combine": { + "p50": 91.96799993515015, + "p90": 96.92800045013428, + "p95": 102.49599814414978, + "p99": 109.63200032711029 + }, + "roundtrip": { + "p50": 165.98400473594666, + "p90": 176.32000148296356, + "p95": 181.34400248527527, + "p99": 194.30400431156158 + }, + "isolatedSum": { + "p50": 195.96800208091736, + "p90": 212.287999689579, + "p95": 224.70399737358093, + "p99": 244.47999894618988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.5120005607605, + "p90": 125.11999905109406, + "p95": 129.2160004377365, + "p99": 139.71200585365295 + }, + "combine": { + "p50": 107.16799646615982, + "p90": 112.0000034570694, + "p95": 117.0559972524643, + "p99": 126.43200159072876 + }, + "roundtrip": { + "p50": 194.75199282169342, + "p90": 203.96800339221954, + "p95": 207.07200467586517, + "p99": 217.66400337219238 + }, + "isolatedSum": { + "p50": 223.67999702692032, + "p90": 237.12000250816345, + "p95": 246.2719976902008, + "p99": 266.1440074443817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -20095,47 +19671,48 @@ ] }, { - "id": "cx-d4f1db50", - "identity": "b300|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "b300_c9569580", - "comparisonKey": "70142fedc425dd51", + "id": "cx-9e8c8650", + "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "94583a6ef392e3d0", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:26.079004+00:00", + "generatedAt": "2026-06-29T14:07:58.912744+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "Kimi-K2", + "label": "GB300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 384, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -20143,292 +19720,367 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287503016", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503016", - "createdAt": "2026-06-27T11:14:26.079004+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 1799.6480464935303, - "p90": 2024.2879390716553, - "p95": 2855.3919792175293, - "p99": 3412.2560024261475 + "p50": 96.22400254011154, + "p90": 111.23199760913849, + "p95": 117.76000261306763, + "p99": 156.67200088500977 }, "combine": { - "p50": 1812.8000497817993, - "p90": 1949.5359659194946, - "p95": 2620.09596824646, - "p99": 2830.048084259033 + "p50": 72.89600372314453, + "p90": 82.17599987983704, + "p95": 85.11999994516373, + "p99": 129.72800433635712 }, "roundtrip": { - "p50": 1900.1920223236084, - "p90": 2016.5760517120361, - "p95": 2611.488103866577, - "p99": 3049.344062805176 + "p50": 146.84799313545227, + "p90": 160.60799360275269, + "p95": 170.71999609470367, + "p99": 226.8799990415573 }, "isolatedSum": { - "p50": 3612.4480962753296, - "p90": 3973.82390499115, - "p95": 5475.487947463989, - "p99": 6242.304086685181 + "p50": 169.12000626325607, + "p90": 193.40799748897552, + "p95": 202.88000255823135, + "p99": 286.4000052213669 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 1876.1919736862183, - "p90": 2189.120054244995, - "p95": 2922.816038131714, - "p99": 3402.240037918091 + "p50": 98.11200201511383, + "p90": 113.53600025177002, + "p95": 123.19999933242798, + "p99": 170.30400037765503 }, "combine": { - "p50": 1860.6079816818237, - "p90": 1970.52800655365, - "p95": 2403.167963027954, - "p99": 2977.8881072998047 + "p50": 72.9919970035553, + "p90": 81.40800148248672, + "p95": 83.64800363779068, + "p99": 120.83200365304947 }, "roundtrip": { - "p50": 1979.2640209197998, - "p90": 2097.536087036133, - "p95": 2794.1761016845703, - "p99": 3157.9198837280273 + "p50": 147.10399508476257, + "p90": 161.76000237464905, + "p95": 176.64000391960144, + "p99": 214.08000588417053 }, "isolatedSum": { - "p50": 3736.799955368042, - "p90": 4159.648060798645, - "p95": 5325.984001159668, - "p99": 6380.1281452178955 + "p50": 171.10399901866913, + "p90": 194.94400173425674, + "p95": 206.84800297021866, + "p99": 291.1360040307045 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 4, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 1976.0639667510986, - "p90": 2366.368055343628, - "p95": 2979.0399074554443, - "p99": 3521.440029144287 + "p50": 97.59999811649323, + "p90": 112.31999844312668, + "p95": 119.10399794578552, + "p99": 146.36799693107605 }, "combine": { - "p50": 1994.1760301589966, - "p90": 2153.6319255828857, - "p95": 2808.351993560791, - "p99": 3210.304021835327 + "p50": 75.00799745321274, + "p90": 82.78399705886841, + "p95": 86.91199868917465, + "p99": 116.31999909877777 }, "roundtrip": { - "p50": 2184.7360134124756, - "p90": 2389.280080795288, - "p95": 3086.7199897766113, - "p99": 3524.319887161255 + "p50": 149.1519957780838, + "p90": 160.38399934768677, + "p95": 166.04800522327423, + "p99": 194.33599710464478 }, "isolatedSum": { - "p50": 3970.239996910095, - "p90": 4519.999980926514, - "p95": 5787.391901016235, - "p99": 6731.744050979614 + "p50": 172.60799556970596, + "p90": 195.1039955019951, + "p95": 206.01599663496017, + "p99": 262.6879960298538 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 7, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 2102.2400856018066, - "p90": 2479.5520305633545, - "p95": 3182.1439266204834, - "p99": 4024.6081352233887 + "p50": 98.55999797582626, + "p90": 113.37599903345108, + "p95": 121.18399888277054, + "p99": 164.15999829769135 }, "combine": { - "p50": 2238.5919094085693, - "p90": 2511.5840435028076, - "p95": 3066.6239261627197, - "p99": 3605.247974395752 + "p50": 80.44800162315369, + "p90": 84.86399799585342, + "p95": 91.45600348711014, + "p99": 121.88799679279327 }, "roundtrip": { - "p50": 2536.7679595947266, - "p90": 2645.951986312866, - "p95": 3478.5280227661133, - "p99": 4007.6160430908203 + "p50": 151.32799744606018, + "p90": 164.57599401474, + "p95": 175.90400576591492, + "p99": 238.5919988155365 }, "isolatedSum": { - "p50": 4340.831995010376, - "p90": 4991.136074066162, - "p95": 6248.767852783203, - "p99": 7629.856109619141 + "p50": 179.00799959897995, + "p90": 198.2399970293045, + "p95": 212.64000236988068, + "p99": 286.0479950904846 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 2352.7679443359375, - "p90": 2601.088047027588, - "p95": 3376.3840198516846, - "p99": 4238.1439208984375 + "p50": 98.33600372076035, + "p90": 111.93600296974182, + "p95": 117.60000139474869, + "p99": 157.4079990386963 }, "combine": { - "p50": 2585.2479934692383, - "p90": 2841.9840335845947, - "p95": 3667.9999828338623, - "p99": 4010.7522010803223 + "p50": 80.89599758386612, + "p90": 85.1840004324913, + "p95": 89.79199826717377, + "p99": 119.10399794578552 }, "roundtrip": { - "p50": 3136.607885360718, - "p90": 3412.1599197387695, - "p95": 4064.095973968506, - "p99": 6203.680038452148 + "p50": 152.96000242233276, + "p90": 166.6560024023056, + "p95": 172.28800058364868, + "p99": 221.50400280952454 }, "isolatedSum": { - "p50": 4938.015937805176, - "p90": 5443.072080612183, - "p95": 7044.384002685547, - "p99": 8248.89612197876 + "p50": 179.23200130462646, + "p90": 197.12000340223312, + "p95": 207.39199966192245, + "p99": 276.5119969844818 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 4, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2850.5918979644775, - "p90": 3381.5360069274902, - "p95": 3976.288080215454, - "p99": 5621.503829956055 + "p50": 98.78399968147278, + "p90": 113.21599781513214, + "p95": 124.54400211572647, + "p99": 163.07200491428375 }, "combine": { - "p50": 3287.7440452575684, - "p90": 3433.759927749634, - "p95": 3676.8319606781006, - "p99": 4466.11213684082 + "p50": 83.36000144481659, + "p90": 91.87199920415878, + "p95": 95.36000341176987, + "p99": 141.15199446678162 }, "roundtrip": { - "p50": 4338.784217834473, - "p90": 4467.199802398682, - "p95": 4870.207786560059, - "p99": 5583.968162536621 + "p50": 154.88000214099884, + "p90": 168.38400065898895, + "p95": 187.42400407791138, + "p99": 240.54400622844696 }, "isolatedSum": { - "p50": 6138.335943222046, - "p90": 6815.295934677124, - "p95": 7653.120040893555, - "p99": 10087.615966796875 + "p50": 182.14400112628937, + "p90": 205.08799701929092, + "p95": 219.90400552749634, + "p99": 304.22399938106537 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 4, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.89600217342377, + "p90": 119.77600306272507, + "p95": 126.78399682044983, + "p99": 179.71199750900269 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 105.8880016207695, + "p95": 108.99200290441513, + "p99": 150.91200172901154 + }, + "roundtrip": { + "p50": 176.12800002098083, + "p90": 187.96800076961517, + "p95": 194.4960057735443, + "p99": 237.34399676322937 + }, + "isolatedSum": { + "p50": 205.72800189256668, + "p90": 225.66400468349457, + "p95": 235.77599972486496, + "p99": 330.6239992380142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.81599640846252, + "p90": 132.89600610733032, + "p95": 138.8159990310669, + "p99": 169.11999881267548 + }, + "combine": { + "p50": 117.0559972524643, + "p90": 121.88799679279327, + "p95": 129.40800189971924, + "p99": 154.6880006790161 + }, + "roundtrip": { + "p50": 209.60000157356262, + "p90": 220.7999974489212, + "p95": 225.66400468349457, + "p99": 258.432000875473 + }, + "isolatedSum": { + "p50": 239.87199366092682, + "p90": 254.7840029001236, + "p95": 268.22400093078613, + "p99": 323.8079994916916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] }, { - "id": "cx-0ef62f98", - "identity": "b300|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "b300_307ed708", - "comparisonKey": "6ef04ab36d1b6989", + "id": "cx-7c993840", + "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "1c929d1cf59e66d3", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:40.258532+00:00", + "generatedAt": "2026-06-29T14:12:18.029743+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_08", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16", - "model": "Kimi-K2", + "label": "GB300 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -20436,537 +20088,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285690957", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285690957", - "createdAt": "2026-06-27T09:51:40.258532+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 94.14400160312653, - "p90": 98.4639972448349, - "p95": 102.94400155544281, - "p99": 110.91200262308121 - }, - "combine": { - "p50": 115.26399850845337, - "p90": 116.12799763679504, - "p95": 117.60000139474869, - "p99": 127.23200023174286 - }, - "roundtrip": { - "p50": 192.86400079727173, - "p90": 199.45600628852844, - "p95": 202.07999646663666, - "p99": 214.78399634361267 - }, - "isolatedSum": { - "p50": 209.4080001115799, - "p90": 214.59199488162994, - "p95": 220.5440029501915, - "p99": 238.14400285482407 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 131.74399733543396, - "p90": 138.7840062379837, - "p95": 141.184002161026, - "p99": 154.4319987297058 - }, - "combine": { - "p50": 161.85599565505981, - "p90": 164.2560064792633, - "p95": 164.99200463294983, - "p99": 175.04000663757324 - }, - "roundtrip": { - "p50": 276.5760123729706, - "p90": 284.31999683380127, - "p95": 288.4159982204437, - "p99": 299.80799555778503 - }, - "isolatedSum": { - "p50": 293.5999929904938, - "p90": 303.040012717247, - "p95": 306.17600679397583, - "p99": 329.47200536727905 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 192.60799884796143, - "p90": 199.52000677585602, - "p95": 202.43200659751892, - "p99": 214.23999965190887 - }, - "combine": { - "p50": 265.28000831604004, - "p90": 274.4640111923218, - "p95": 275.1680016517639, - "p99": 287.1679961681366 - }, - "roundtrip": { - "p50": 434.7200095653534, - "p90": 443.3920085430145, - "p95": 447.1360146999359, - "p99": 463.00798654556274 - }, - "isolatedSum": { - "p50": 457.88800716400146, - "p90": 473.9840179681778, - "p95": 477.60000824928284, - "p99": 501.40799582004547 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 326.9760012626648, - "p90": 330.27198910713196, - "p95": 331.36001229286194, - "p99": 341.8239951133728 + "p50": 102.55999863147736, + "p90": 120.64000219106674, + "p95": 138.84800672531128, + "p99": 175.4560023546219 }, "combine": { - "p50": 458.3039879798889, - "p90": 462.3039960861206, - "p95": 470.2720046043396, - "p99": 482.7840030193329 + "p50": 80.54400235414505, + "p90": 85.82399785518646, + "p95": 94.11200135946274, + "p99": 132.60799646377563 }, "roundtrip": { - "p50": 764.2560005187988, - "p90": 772.1920013427734, - "p95": 775.4560112953186, - "p99": 788.320004940033 + "p50": 155.39200603961945, + "p90": 169.8240041732788, + "p95": 194.0159946680069, + "p99": 232.70399868488312 }, "isolatedSum": { - "p50": 785.2799892425537, - "p90": 792.5759851932526, - "p95": 801.6320168972015, - "p99": 824.6079981327057 + "p50": 183.1040009856224, + "p90": 206.4640000462532, + "p95": 232.96000808477402, + "p99": 308.0639988183975 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 567.9680109024048, - "p90": 572.2560286521912, - "p95": 577.9520273208618, - "p99": 588.7359976768494 + "p50": 103.84000092744827, + "p90": 117.50400066375732, + "p95": 127.23200023174286, + "p99": 161.95200383663177 }, "combine": { - "p50": 807.4560165405273, - "p90": 816.864013671875, - "p95": 826.2720108032227, - "p99": 877.1520256996155 + "p50": 80.9599980711937, + "p90": 85.28000116348267, + "p95": 91.26400202512741, + "p99": 131.71200454235077 }, "roundtrip": { - "p50": 1359.0079545974731, - "p90": 1367.6799535751343, - "p95": 1373.7280368804932, - "p99": 1425.5039691925049 + "p50": 157.82399475574493, + "p90": 169.66399550437927, + "p95": 174.72000420093536, + "p99": 218.4000015258789 }, "isolatedSum": { - "p50": 1375.4240274429321, - "p90": 1389.1200423240662, - "p95": 1404.2240381240845, - "p99": 1465.8880233764648 + "p50": 184.79999899864197, + "p90": 202.78400182724, + "p95": 218.49600225687027, + "p99": 293.66400837898254 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 4, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 1064.0640258789062, - "p90": 1069.1200494766235, - "p95": 1075.103998184204, - "p99": 1101.088047027588 + "p50": 103.61599922180176, + "p90": 115.32799899578094, + "p95": 122.04799801111221, + "p99": 142.5279974937439 }, "combine": { - "p50": 1516.2559747695923, - "p90": 1527.4560451507568, - "p95": 1529.4400453567505, - "p99": 1576.3520002365112 + "p50": 82.24000036716461, + "p90": 85.82399785518646, + "p95": 89.75999802350998, + "p99": 97.21600264310837 }, "roundtrip": { - "p50": 2562.78395652771, - "p90": 2572.5440979003906, - "p95": 2577.984094619751, - "p99": 2608.351945877075 + "p50": 159.10400450229645, + "p90": 170.81600427627563, + "p95": 175.26400089263916, + "p99": 192.3840045928955 }, "isolatedSum": { - "p50": 2580.3200006484985, - "p90": 2596.5760946273804, - "p95": 2604.5440435409546, - "p99": 2677.440047264099 + "p50": 185.85599958896637, + "p90": 201.1519968509674, + "p95": 211.8079960346222, + "p99": 239.74400013685226 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-1f1575ee", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", - "colorKey": "b300_77566238", - "comparisonKey": "89f8d104edbb2508", - "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:40.157886+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28285615307", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285615307", - "createdAt": "2026-06-27T09:48:40.157886+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 1811.2640380859375, - "p90": 2052.7360439300537, - "p95": 2767.9359912872314, - "p99": 3486.1440658569336 - }, - "combine": { - "p50": 1848.6720323562622, - "p90": 1981.9200038909912, - "p95": 2632.8959465026855, - "p99": 3014.080047607422 - }, - "roundtrip": { - "p50": 1926.3039827346802, - "p90": 2019.2639827728271, - "p95": 2607.0079803466797, - "p99": 3037.4081134796143 - }, - "isolatedSum": { - "p50": 3659.9360704421997, - "p90": 4034.656047821045, - "p95": 5400.831937789917, - "p99": 6500.2241134643555 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 1909.9199771881104, - "p90": 2291.3599014282227, - "p95": 2951.96795463562, - "p99": 4049.7918128967285 + "p50": 104.41599786281586, + "p90": 119.29599940776825, + "p95": 129.43999469280243, + "p99": 184.7359985113144 }, "combine": { - "p50": 1909.9839925765991, - "p90": 2116.7359352111816, - "p95": 2735.680103302002, - "p99": 3026.4639854431152 + "p50": 83.71199667453766, + "p90": 92.67199784517288, + "p95": 94.84799951314926, + "p99": 107.80800133943558 }, "roundtrip": { - "p50": 2060.3199005126953, - "p90": 2157.792091369629, - "p95": 2832.7999114990234, - "p99": 3228.3198833465576 + "p50": 161.0880047082901, + "p90": 172.70399630069733, + "p95": 182.43199586868286, + "p99": 230.04800081253052 }, "isolatedSum": { - "p50": 3819.9039697647095, - "p90": 4408.095836639404, - "p95": 5687.648057937622, - "p99": 7076.255798339844 + "p50": 188.12799453735352, + "p90": 211.96799725294113, + "p95": 224.2879942059517, + "p99": 292.54399985074997 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 6, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 2026.7200469970703, - "p90": 2262.399911880493, - "p95": 2992.89608001709, - "p99": 3506.0160160064697 + "p50": 102.59199887514114, + "p90": 116.28799885511398, + "p95": 122.94399738311768, + "p99": 169.0559983253479 }, "combine": { - "p50": 2108.9279651641846, - "p90": 2252.255916595459, - "p95": 2964.672088623047, - "p99": 3763.808012008667 + "p50": 84.41600203514099, + "p90": 92.8959995508194, + "p95": 95.23200243711472, + "p99": 119.03999745845795 }, "roundtrip": { - "p50": 2335.0400924682617, - "p90": 2459.1360092163086, - "p95": 3039.2000675201416, - "p99": 3627.135992050171 + "p50": 161.85599565505981, + "p90": 174.112007021904, + "p95": 179.9039989709854, + "p99": 247.51999974250793 }, "isolatedSum": { - "p50": 4135.648012161255, - "p90": 4514.655828475952, - "p95": 5957.568168640137, - "p99": 7269.824028015137 + "p50": 187.00800091028214, + "p90": 209.18399840593338, + "p95": 218.1759998202324, + "p99": 288.09599578380585 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2215.167999267578, - "p90": 2474.047899246216, - "p95": 2963.9999866485596, - "p99": 3755.0079822540283 + "p50": 103.93600165843964, + "p90": 116.7680025100708, + "p95": 121.0239976644516, + "p99": 162.04799711704254 }, "combine": { - "p50": 2386.8160247802734, - "p90": 2521.951913833618, - "p95": 3310.7199668884277, - "p99": 3616.895914077759 + "p50": 88.0960002541542, + "p90": 95.8079993724823, + "p95": 98.75199943780899, + "p99": 142.43200421333313 }, "roundtrip": { - "p50": 2777.695894241333, - "p90": 2873.3439445495605, - "p95": 3295.2001094818115, - "p99": 4089.024066925049 + "p50": 166.24000668525696, + "p90": 177.98399925231934, + "p95": 183.20000171661377, + "p99": 235.55199801921844 }, "isolatedSum": { - "p50": 4601.984024047852, - "p90": 4995.999813079834, - "p95": 6274.719953536987, - "p99": 7371.903896331787 + "p50": 192.03200191259384, + "p90": 212.5760018825531, + "p95": 219.7759971022606, + "p99": 304.48000133037567 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 4, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 2534.015893936157, - "p90": 2614.207983016968, - "p95": 3331.199884414673, - "p99": 3946.6240406036377 + "p50": 113.88800293207169, + "p90": 121.40800058841705, + "p95": 124.51200187206268, + "p99": 134.0160071849823 }, "combine": { - "p50": 2894.8159217834473, - "p90": 2969.0239429473877, - "p95": 3296.128034591675, - "p99": 4143.392086029053 + "p50": 102.88000106811523, + "p90": 108.47999900579453, + "p95": 109.69600081443787, + "p99": 116.89600348472595 }, "roundtrip": { - "p50": 3649.6639251708984, - "p90": 3799.5200157165527, - "p95": 4219.871997833252, - "p99": 4852.320194244385 + "p50": 186.39999628067017, + "p90": 195.71200013160706, + "p95": 198.5280066728592, + "p99": 208.8959962129593 }, "isolatedSum": { - "p50": 5428.8318157196045, - "p90": 5583.2319259643555, - "p95": 6627.327919006348, - "p99": 8090.01612663269 + "p50": 216.76800400018692, + "p90": 229.88799959421158, + "p95": 234.20800268650055, + "p99": 250.91201066970825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 6, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 3252.351999282837, - "p90": 3331.104040145874, - "p95": 3698.4639167785645, - "p99": 4560.927867889404 + "p50": 125.791996717453, + "p90": 133.59999656677246, + "p95": 136.1600011587143, + "p99": 144.6080058813095 }, "combine": { - "p50": 3938.591957092285, - "p90": 4131.968021392822, - "p95": 4414.432048797607, - "p99": 5301.055908203125 + "p50": 122.23999947309494, + "p90": 130.62399625778198, + "p95": 132.35199451446533, + "p99": 137.88799941539764 }, "roundtrip": { - "p50": 5385.6000900268555, - "p90": 5495.0079917907715, - "p95": 6258.880138397217, - "p99": 6821.216106414795 + "p50": 218.4319943189621, + "p90": 226.78400576114655, + "p95": 229.44000363349915, + "p99": 236.32000386714935 }, "isolatedSum": { - "p50": 7190.943956375122, - "p90": 7463.072061538696, - "p95": 8112.895965576172, - "p99": 9861.98377609253 + "p50": 248.03199619054794, + "p90": 264.22399282455444, + "p95": 268.5119956731796, + "p99": 282.49600529670715 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 7, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -20974,47 +20407,48 @@ ] }, { - "id": "cx-a989dada", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", - "colorKey": "b300_77566238", - "comparisonKey": "0cdc743c580a47d3", + "id": "cx-07f80259", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||00df46ebb2988d7", + "colorKey": "gb300_74218200", + "comparisonKey": "771769a5e7987ff5", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:19.169974+00:00", + "generatedAt": "2026-06-29T13:43:34.234497+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced", + "label": "GB300 EP8 · deepep · bf16", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -21022,133 +20456,96 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "9e6ac678a09f7f8", - "workloadId": "set:3:2dad1a73ff872905", - "workloadSource": "canonical-serialized", + "traceSignature": "00df46ebb2988d7", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271876366", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271876366", - "createdAt": "2026-06-26T23:58:19.169974+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 1816.2239789962769, - "p90": 2297.152042388916, - "p95": 2896.320104598999, - "p99": 3506.6559314727783 - }, - "combine": { - "p50": 1859.1680526733398, - "p90": 2047.4560260772705, - "p95": 2707.1681022644043, - "p99": 3027.2960662841797 - }, - "roundtrip": { - "p50": 1932.8960180282593, - "p90": 2138.335943222046, - "p95": 2772.9599475860596, - "p99": 3193.279981613159 - }, - "isolatedSum": { - "p50": 3675.3920316696167, - "p90": 4344.6080684661865, - "p95": 5603.488206863403, - "p99": 6533.951997756958 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 2029.6320915222168, - "p90": 2355.0078868865967, - "p95": 3023.6799716949463, - "p99": 3532.543897628784 + "p50": 93.9520001411438, + "p90": 107.42399841547012, + "p95": 111.55200004577637, + "p99": 120.35199999809265 }, "combine": { - "p50": 2128.671884536743, - "p90": 2460.576057434082, - "p95": 3003.5200119018555, - "p99": 3345.4079627990723 + "p50": 83.23200047016144, + "p90": 89.91999924182892, + "p95": 92.03200042247772, + "p99": 97.88800030946732 }, "roundtrip": { - "p50": 2337.8241062164307, - "p90": 2708.159923553467, - "p95": 3375.744104385376, - "p99": 3673.952102661133 + "p50": 155.35999834537506, + "p90": 164.0319973230362, + "p95": 167.1680063009262, + "p99": 173.95199835300446 }, "isolatedSum": { - "p50": 4158.30397605896, - "p90": 4815.583944320679, - "p95": 6027.199983596802, - "p99": 6877.951860427856 + "p50": 177.18400061130524, + "p90": 197.34399765729904, + "p95": 203.5840004682541, + "p99": 218.24000030755997 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 2545.1838970184326, - "p90": 2883.19993019104, - "p95": 3424.1280555725098, - "p99": 3852.544069290161 + "p50": 110.20799726247787, + "p90": 118.17599833011627, + "p95": 121.24799937009811, + "p99": 127.77599692344666 }, "combine": { - "p50": 2903.520107269287, - "p90": 3124.959945678711, - "p95": 3718.2400226593018, - "p99": 4377.791881561279 + "p50": 105.05600273609161, + "p90": 109.24799740314484, + "p95": 111.1999973654747, + "p99": 117.76000261306763 }, "roundtrip": { - "p50": 3660.6719493865967, - "p90": 3928.3199310302734, - "p95": 4631.743907928467, - "p99": 5148.064136505127 + "p50": 185.92000007629395, + "p90": 193.08799505233765, + "p95": 196.6399997472763, + "p99": 203.64800095558167 }, "isolatedSum": { - "p50": 5448.70400428772, - "p90": 6008.159875869751, - "p95": 7142.3680782318115, - "p99": 8230.33595085144 + "p50": 215.2639999985695, + "p90": 227.4239957332611, + "p95": 232.44799673557281, + "p99": 245.53599953651428 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 4, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -21156,474 +20553,367 @@ ] }, { - "id": "cx-092ff174", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", - "colorKey": "b300_a314501b", - "comparisonKey": "c51826952291f0ba", + "id": "cx-7324ba0b", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_74218200", + "comparisonKey": "771769a5e7987ff5", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:58.409823+00:00", + "generatedAt": "2026-06-29T13:37:36.702477+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "label": "GB300 EP8 · deepep · bf16", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "7aa44c7b86748b9", - "workloadId": "set:3:388ff74baef05c72", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271883343", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271883343", - "createdAt": "2026-06-26T23:57:58.409823+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 69.37599927186966, - "p90": 71.03999704122543, - "p95": 73.37599992752075, - "p99": 81.69600367546082 + "p50": 98.84800016880035, + "p90": 110.72000116109848, + "p95": 117.34399944543839, + "p99": 128.89599800109863 }, "combine": { - "p50": 67.61600077152252, - "p90": 69.60000097751617, - "p95": 77.02399790287018, - "p99": 83.39200168848038 + "p50": 82.40000158548355, + "p90": 89.34400230646133, + "p95": 92.70399808883667, + "p99": 98.75199943780899 }, "roundtrip": { - "p50": 119.93599683046341, - "p90": 126.01600587368011, - "p95": 128.48000228405, - "p99": 135.55200397968292 + "p50": 155.93600273132324, + "p90": 167.07199811935425, + "p95": 170.1440066099167, + "p99": 179.1040003299713 }, "isolatedSum": { - "p50": 136.99200004339218, - "p90": 140.6399980187416, - "p95": 150.39999783039093, - "p99": 165.0880053639412 + "p50": 181.2480017542839, + "p90": 200.06400346755981, + "p95": 210.04799753427505, + "p99": 227.64799743890762 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 4, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 93.98400038480759, - "p90": 98.68799895048141, - "p95": 100.28800368309021, - "p99": 105.72800040245056 + "p50": 98.24000298976898, + "p90": 110.6560006737709, + "p95": 115.87200313806534, + "p99": 126.14400684833527 }, "combine": { - "p50": 115.52000045776367, - "p90": 116.5120005607605, - "p95": 116.73600226640701, - "p99": 123.48800152540207 + "p50": 82.46400207281113, + "p90": 87.67999708652496, + "p95": 92.51199662685394, + "p99": 98.14400225877762 }, "roundtrip": { - "p50": 193.08799505233765, - "p90": 197.88800179958344, - "p95": 198.59200716018677, - "p99": 204.0960043668747 + "p50": 155.8080017566681, + "p90": 165.95199704170227, + "p95": 169.27999258041382, + "p99": 177.76000499725342 }, "isolatedSum": { - "p50": 209.50400084257126, - "p90": 215.1999995112419, - "p95": 217.02400594949722, - "p99": 229.21600192785263 + "p50": 180.7040050625801, + "p90": 198.33599776029587, + "p95": 208.38399976491928, + "p99": 224.28800910711288 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 1, - "recvTokensMax": 512, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 197.37599790096283, - "p90": 199.96799528598785, - "p95": 200.80000162124634, - "p99": 207.10399746894836 - }, - "combine": { - "p50": 248.1600046157837, - "p90": 249.9839961528778, - "p95": 250.68798661231995, - "p99": 253.79198789596558 - }, - "roundtrip": { - "p50": 429.8880100250244, - "p90": 434.30399894714355, - "p95": 436.2879991531372, - "p99": 442.84799695014954 - }, - "isolatedSum": { - "p50": 445.5360025167465, - "p90": 449.95199143886566, - "p95": 451.4879882335663, - "p99": 460.89598536491394 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 1, - "recvTokensMax": 2048, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-91ac2845", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", - "colorKey": "b300_592e9a16", - "comparisonKey": "0a480d3d40419b1c", - "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:29.790713+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · balanced+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "df54a9510825f71", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28285617940", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285617940", - "createdAt": "2026-06-27T09:48:29.790713+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 84.25600081682205, - "p90": 88.16000074148178, - "p95": 89.21600133180618, - "p99": 96.03200107812881 + "p50": 98.59199821949005, + "p90": 111.96800321340561, + "p95": 118.75200271606445, + "p99": 129.2160004377365 }, "combine": { - "p50": 82.2720006108284, - "p90": 90.71999788284302, - "p95": 90.97599983215332, - "p99": 102.49599814414978 + "p50": 84.54400300979614, + "p90": 90.94399958848953, + "p95": 93.50399672985077, + "p99": 99.87200051546097 }, "roundtrip": { - "p50": 146.40000462532043, - "p90": 149.1200029850006, - "p95": 150.68799257278442, - "p99": 157.31200575828552 + "p50": 158.91200304031372, + "p90": 170.6559956073761, + "p95": 173.47200214862823, + "p99": 182.5920045375824 }, "isolatedSum": { - "p50": 166.52800142765045, - "p90": 178.8799986243248, - "p95": 180.1920011639595, - "p99": 198.5279992222786 + "p50": 183.1360012292862, + "p90": 202.91200280189514, + "p95": 212.25599944591522, + "p99": 229.08800095319748 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 96.12800180912018, - "p90": 98.59199821949005, - "p95": 100.44799745082855, - "p99": 120.12799829244614 + "p50": 99.93600100278854, + "p90": 111.29599809646606, + "p95": 116.99199676513672, + "p99": 128.4160017967224 }, "combine": { - "p50": 104.92800176143646, - "p90": 113.92000317573547, - "p95": 114.43199962377548, - "p99": 116.38399958610535 + "p50": 85.02399921417236, + "p90": 92.22400188446045, + "p95": 94.65599805116653, + "p99": 99.64799880981445 }, "roundtrip": { - "p50": 184.28799510002136, - "p90": 191.74399971961975, - "p95": 194.14399564266205, - "p99": 206.01600408554077 + "p50": 160.99199652671814, + "p90": 170.59199512004852, + "p95": 174.55999553203583, + "p99": 186.91200017929077 }, "isolatedSum": { - "p50": 201.05600357055664, - "p90": 212.51200139522552, - "p95": 214.87999707460403, - "p99": 236.51199787855148 + "p50": 184.9600002169609, + "p90": 203.5199999809265, + "p95": 211.64799481630325, + "p99": 228.06400060653687 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 2, - "recvTokensMax": 768, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 131.55199587345123, - "p90": 138.91200721263885, - "p95": 140.19200205802917, - "p99": 149.85600113868713 + "p50": 99.58399832248688, + "p90": 111.29599809646606, + "p95": 114.52800035476685, + "p99": 123.07199835777283 }, "combine": { - "p50": 142.65599846839905, - "p90": 151.90400183200836, - "p95": 152.41600573062897, - "p99": 164.09599781036377 + "p50": 87.0399996638298, + "p90": 94.11200135946274, + "p95": 96.76799923181534, + "p99": 101.50399804115295 }, "roundtrip": { - "p50": 258.59200954437256, - "p90": 264.6400034427643, - "p95": 268.38400959968567, - "p99": 282.943993806839 + "p50": 162.33600676059723, + "p90": 172.28800058364868, + "p95": 175.84000527858734, + "p99": 188.31999599933624 }, "isolatedSum": { - "p50": 274.2079943418503, - "p90": 290.8160090446472, - "p95": 292.60800778865814, - "p99": 313.9519989490509 + "p50": 186.62399798631668, + "p90": 205.4079994559288, + "p95": 211.29599958658218, + "p99": 224.57599639892578 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 2, - "recvTokensMax": 1536, - "stragglerRank": 7, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 199.8399943113327, - "p90": 207.10399746894836, - "p95": 214.36800062656403, - "p99": 236.4799976348877 + "p50": 103.07200253009796, + "p90": 114.62400108575821, + "p95": 118.94399672746658, + "p99": 134.0479999780655 }, "combine": { - "p50": 262.36799359321594, - "p90": 262.9759907722473, - "p95": 263.35999369621277, - "p99": 272.5119888782501 + "p50": 93.05600076913834, + "p90": 98.2080027461052, + "p95": 100.22400319576263, + "p99": 107.84000158309937 }, "roundtrip": { - "p50": 435.5199933052063, - "p90": 441.9200122356415, - "p95": 445.4079866409302, - "p99": 463.29599618911743 + "p50": 167.29600727558136, + "p90": 178.81600558757782, + "p95": 182.3039948940277, + "p99": 192.09599494934082 }, "isolatedSum": { - "p50": 462.20798790454865, - "p90": 470.0799882411957, - "p95": 477.7279943227768, - "p99": 508.9919865131378 + "p50": 196.1280032992363, + "p90": 212.8320038318634, + "p95": 219.16799992322922, + "p99": 241.88800156116486 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 2, - "recvTokensMax": 3072, - "stragglerRank": 4, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 345.7599878311157, - "p90": 352.7039885520935, - "p95": 355.9040129184723, - "p99": 390.3999924659729 + "p50": 112.8000020980835, + "p90": 122.46400117874146, + "p95": 125.31200051307678, + "p99": 132.79999792575836 }, "combine": { - "p50": 459.55199003219604, - "p90": 462.911993265152, - "p95": 470.8159863948822, - "p99": 483.6159944534302 + "p50": 106.46399855613708, + "p90": 113.24799805879593, + "p95": 117.79200285673141, + "p99": 129.98400628566742 }, "roundtrip": { - "p50": 786.9439721107483, - "p90": 792.8640246391296, - "p95": 797.5040078163147, - "p99": 829.7920227050781 + "p50": 190.33600389957428, + "p90": 199.35999810695648, + "p95": 202.01599597930908, + "p99": 218.62399578094482 }, "isolatedSum": { - "p50": 805.3119778633118, - "p90": 815.6159818172455, - "p95": 826.7199993133545, - "p99": 874.0159869194031 + "p50": 219.26400065422058, + "p90": 235.71199923753738, + "p95": 243.1040033698082, + "p99": 262.7840042114258 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 2, - "recvTokensMax": 6144, - "stragglerRank": 6, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 648.5120058059692, - "p90": 655.6479930877686, - "p95": 660.5439782142639, - "p99": 667.7119731903076 + "p50": 128.38399410247803, + "p90": 137.95199990272522, + "p95": 141.50400459766388, + "p99": 148.51200580596924 }, "combine": { - "p50": 828.0959725379944, - "p90": 838.4320139884949, - "p95": 840.6400084495544, - "p99": 855.0400137901306 + "p50": 126.08000636100769, + "p90": 132.79999792575836, + "p95": 134.5600038766861, + "p99": 145.47200500965118 }, "roundtrip": { - "p50": 1455.3279876708984, - "p90": 1466.5919542312622, - "p95": 1471.0079431533813, - "p99": 1482.4320077896118 + "p50": 226.4000028371811, + "p90": 236.03199422359467, + "p95": 239.96800184249878, + "p99": 246.20799720287323 }, "isolatedSum": { - "p50": 1476.6079783439636, - "p90": 1494.0800070762634, - "p95": 1501.1839866638184, - "p99": 1522.7519869804382 + "p50": 254.46400046348572, + "p90": 270.7519978284836, + "p95": 276.06400847435, + "p99": 293.9840108156204 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 2, - "recvTokensMax": 12288, - "stragglerRank": 6, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -21631,47 +20921,48 @@ ] }, { - "id": "cx-eac6e215", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", - "colorKey": "b300_5b993222", - "comparisonKey": "d3d6cc25fee96bc7", + "id": "cx-0d0d8f23", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "61b32b843c8fbec1", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:52.035249+00:00", + "generatedAt": "2026-06-29T13:47:31.111489+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "label": "GB300 EP8 · deepep · bf16", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -21679,426 +20970,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "38fd0bcf7109c32", - "workloadId": "set:3:b952d4a43d688b50", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271903494", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271903494", - "createdAt": "2026-06-26T23:58:52.035249+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 106.08000308275223, - "p90": 107.87200182676315, - "p95": 109.15199667215347, - "p99": 120.28799951076508 + "p50": 89.02399986982346, + "p90": 103.29599678516388, + "p95": 107.64800012111664, + "p99": 119.39200013875961 }, "combine": { - "p50": 127.83999741077423, - "p90": 129.85600531101227, - "p95": 130.97600638866425, - "p99": 139.5840048789978 + "p50": 79.55200225114822, + "p90": 84.70399677753448, + "p95": 87.36000210046768, + "p99": 95.16800194978714 }, "roundtrip": { - "p50": 219.39200162887573, - "p90": 224.16000068187714, - "p95": 225.055992603302, - "p99": 235.35999655723572 + "p50": 149.85600113868713, + "p90": 162.4639928340912, + "p95": 166.81599617004395, + "p99": 180.35200238227844 }, "isolatedSum": { - "p50": 233.92000049352646, - "p90": 237.72800713777542, - "p95": 240.12800306081772, - "p99": 259.8720043897629 + "p50": 168.57600212097168, + "p90": 187.99999356269836, + "p95": 195.00800222158432, + "p99": 214.56000208854675 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 217.95199811458588, - "p90": 224.03199970722198, - "p95": 228.83200645446777, - "p99": 252.70399451255798 + "p50": 89.85599875450134, + "p90": 103.29599678516388, + "p95": 109.31199789047241, + "p99": 124.70400333404541 }, "combine": { - "p50": 336.38399839401245, - "p90": 338.49599957466125, - "p95": 339.9040102958679, - "p99": 348.4160006046295 + "p50": 81.34400099515915, + "p90": 86.14400029182434, + "p95": 89.63199704885483, + "p99": 95.64799815416336 }, "roundtrip": { - "p50": 535.8399748802185, - "p90": 546.0159778594971, - "p95": 551.3280034065247, - "p99": 558.3680272102356 + "p50": 148.95999431610107, + "p90": 160.22400557994843, + "p95": 163.42400014400482, + "p99": 171.48800194263458 }, "isolatedSum": { - "p50": 554.3359965085983, - "p90": 562.5279992818832, - "p95": 568.7360167503357, - "p99": 601.1199951171875 + "p50": 171.1999997496605, + "p90": 189.43999707698822, + "p95": 198.94399493932724, + "p99": 220.35200148820877 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 685.0559711456299, - "p90": 694.5599913597107, - "p95": 696.3199973106384, - "p99": 705.3760290145874 - }, - "combine": { - "p50": 1085.4400396347046, - "p90": 1086.3360166549683, - "p95": 1087.6480340957642, - "p99": 1096.7680215835571 - }, - "roundtrip": { - "p50": 1752.511978149414, - "p90": 1760.3199481964111, - "p95": 1762.0480060577393, - "p99": 1772.6080417633057 - }, - "isolatedSum": { - "p50": 1770.4960107803345, - "p90": 1780.896008014679, - "p95": 1783.9680314064026, - "p99": 1802.1440505981445 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b38b286e", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", - "colorKey": "b300_5b993222", - "comparisonKey": "acefe503588b8e8a", - "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:40.107682+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_13", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "bfbb64a166e9f1c", - "workloadId": "set:6:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28285666343", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285666343", - "createdAt": "2026-06-27T09:50:40.107682+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 104.12800312042236, - "p90": 105.79200088977814, - "p95": 106.65600001811981, - "p99": 124.79999661445618 + "p50": 90.08000046014786, + "p90": 105.43999820947647, + "p95": 111.26399785280228, + "p99": 127.26399302482605 }, "combine": { - "p50": 128.9599984884262, - "p90": 138.59200477600098, - "p95": 139.42399621009827, - "p99": 144.16000247001648 + "p50": 82.71999657154083, + "p90": 87.2960016131401, + "p95": 91.0400003194809, + "p99": 97.08800166845322 }, "roundtrip": { - "p50": 217.3759937286377, - "p90": 224.0000069141388, - "p95": 225.055992603302, - "p99": 228.89600694179535 + "p50": 151.74399316310883, + "p90": 164.76799547672272, + "p95": 169.72799599170685, + "p99": 210.14399826526642 }, "isolatedSum": { - "p50": 233.08800160884857, - "p90": 244.3840056657791, - "p95": 246.07999622821808, - "p99": 268.95999908447266 + "p50": 172.7999970316887, + "p90": 192.73599982261658, + "p95": 202.30399817228317, + "p99": 224.35199469327927 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 5, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 141.76000654697418, - "p90": 143.61600577831268, - "p95": 145.53600549697876, - "p99": 164.44799304008484 + "p50": 92.41600334644318, + "p90": 105.56799918413162, + "p95": 109.6000000834465, + "p99": 120.86399644613266 }, "combine": { - "p50": 188.38399648666382, - "p90": 190.17599523067474, - "p95": 192.00000166893005, - "p99": 201.9840031862259 + "p50": 83.99999886751175, + "p90": 88.03199976682663, + "p95": 92.38400310277939, + "p99": 98.14400225877762 }, "roundtrip": { - "p50": 318.11198592185974, - "p90": 323.64800572395325, - "p95": 325.0240087509155, - "p99": 335.3919982910156 + "p50": 154.30399775505066, + "p90": 166.75199568271637, + "p95": 170.17599940299988, + "p99": 179.9039989709854 }, "isolatedSum": { - "p50": 330.144003033638, - "p90": 333.7920010089874, - "p95": 337.5360071659088, - "p99": 366.43199622631073 + "p50": 176.41600221395493, + "p90": 193.59999895095825, + "p95": 201.9840031862259, + "p99": 219.00799870491028 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156090368, - "combineLogicalBytes": 156090368, - "fanoutMean": 5.31640625, - "recvTokensMax": 2048, - "stragglerRank": 5, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 216.8319970369339, - "p90": 220.0320065021515, - "p95": 223.32799434661865, - "p99": 231.29600286483765 + "p50": 93.53599697351456, + "p90": 105.85600137710571, + "p95": 110.11199653148651, + "p99": 121.21599912643433 }, "combine": { - "p50": 336.5760147571564, - "p90": 338.20798993110657, - "p95": 339.6799862384796, - "p99": 351.23199224472046 + "p50": 85.63199639320374, + "p90": 92.79999881982803, + "p95": 96.41599655151367, + "p99": 102.4319976568222 }, "roundtrip": { - "p50": 534.6879959106445, - "p90": 541.5040254592896, - "p95": 543.8399910926819, - "p99": 547.327995300293 + "p50": 156.92800283432007, + "p90": 169.3120002746582, + "p95": 173.92000555992126, + "p99": 184.7040057182312 }, "isolatedSum": { - "p50": 553.4080117940903, - "p90": 558.239996433258, - "p95": 563.0079805850983, - "p99": 582.5279951095581 + "p50": 179.1679933667183, + "p90": 198.65600019693375, + "p95": 206.52799308300018, + "p99": 223.64799678325653 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 5, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 369.1520094871521, - "p90": 377.1199882030487, - "p95": 378.62399220466614, - "p99": 388.35200667381287 + "p50": 98.4639972448349, + "p90": 108.86400192975998, + "p95": 112.76800185441971, + "p99": 120.67200243473053 }, "combine": { - "p50": 580.5119872093201, - "p90": 582.1120142936707, - "p95": 582.5920104980469, - "p99": 585.3760242462158 + "p50": 89.15200084447861, + "p90": 96.6079980134964, + "p95": 98.9760011434555, + "p99": 104.89600151777267 }, "roundtrip": { - "p50": 939.1679763793945, - "p90": 944.2880153656006, - "p95": 945.9840059280396, - "p99": 958.079993724823 + "p50": 161.98399662971497, + "p90": 172.7360039949417, + "p95": 176.60799622535706, + "p99": 186.3359957933426 }, "isolatedSum": { - "p50": 949.6639966964722, - "p90": 959.2320024967194, - "p95": 961.216002702713, - "p99": 973.7280309200287 + "p50": 187.6159980893135, + "p90": 205.47199994325638, + "p95": 211.7440029978752, + "p99": 225.5680039525032 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620648448, - "combineLogicalBytes": 620648448, - "fanoutMean": 5.2847900390625, - "recvTokensMax": 8192, - "stragglerRank": 5, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 684.1279864311218, - "p90": 693.3119893074036, - "p95": 694.6560144424438, - "p99": 802.5919795036316 + "p50": 110.6560006737709, + "p90": 119.23199892044067, + "p95": 123.23199957609177, + "p99": 131.99999928474426 }, "combine": { - "p50": 1085.15202999115, - "p90": 1086.7520570755005, - "p95": 1087.3279571533203, - "p99": 1098.9760160446167 + "p50": 104.63999956846237, + "p90": 110.33599823713303, + "p95": 112.5440001487732, + "p99": 121.50400131940842 }, "roundtrip": { - "p50": 1750.656008720398, - "p90": 1759.071946144104, - "p95": 1762.7840042114258, - "p99": 1789.2800569534302 + "p50": 186.11200153827667, + "p90": 193.85600090026855, + "p95": 197.85599410533905, + "p99": 204.22400534152985 }, "isolatedSum": { - "p50": 1769.2800164222717, - "p90": 1780.064046382904, - "p95": 1781.9839715957642, - "p99": 1901.5679955482483 + "p50": 215.29600024223328, + "p90": 229.5679971575737, + "p95": 235.77599972486496, + "p99": 253.50400060415268 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1323.3599662780762, - "p90": 1332.1599960327148, - "p95": 1336.5440368652344, - "p99": 1345.3439474105835 + "p50": 124.22399967908859, + "p90": 132.9919993877411, + "p95": 136.09600067138672, + "p99": 145.21600306034088 }, "combine": { - "p50": 2080.22403717041, - "p90": 2082.0159912109375, - "p95": 2084.0959548950195, - "p99": 2094.655990600586 + "p50": 123.4240010380745, + "p90": 131.26400113105774, + "p95": 134.20799374580383, + "p99": 136.89599931240082 }, "roundtrip": { - "p50": 3382.688045501709, - "p90": 3391.9999599456787, - "p95": 3396.4478969573975, - "p99": 3412.480115890503 + "p50": 221.98399901390076, + "p90": 231.58399760723114, + "p95": 234.9119931459427, + "p99": 247.99999594688416 }, "isolatedSum": { - "p50": 3403.5840034484863, - "p90": 3414.1759872436523, - "p95": 3420.639991760254, - "p99": 3439.9999380111694 + "p50": 247.6480007171631, + "p90": 264.2560005187988, + "p95": 270.30399441719055, + "p99": 282.1120023727417 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2484242432, - "combineLogicalBytes": 2484242432, - "fanoutMean": 5.288299560546875, - "recvTokensMax": 32768, - "stragglerRank": 7, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -22106,47 +21289,48 @@ ] }, { - "id": "cx-6ace94e5", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", - "colorKey": "b300_39a5906c", - "comparisonKey": "4191eeca9b95da96", + "id": "cx-cb8753e8", + "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "e0f3959bcbc3fc9a", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:47.306052+00:00", + "generatedAt": "2026-06-29T13:57:22.452311+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB300 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -22154,244 +21338,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "29ae5ace13636f8", - "workloadId": "set:6:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.8466796875, - "eplbImbalanceAfter": 1.0002700343276514, - "backendVersion": "1.2.1", + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285668831", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285668831", - "createdAt": "2026-06-27T09:50:47.306052+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 95.74399888515472, - "p90": 99.84000027179718, - "p95": 101.31199657917023, - "p99": 107.42399841547012 + "p50": 106.59199953079224, + "p90": 121.15199863910675, + "p95": 125.47199428081512, + "p99": 157.3439985513687 }, "combine": { - "p50": 115.26399850845337, - "p90": 116.35199934244156, - "p95": 117.5680011510849, - "p99": 131.77600502967834 + "p50": 82.91199803352356, + "p90": 87.26400136947632, + "p95": 92.3520028591156, + "p99": 99.58399832248688 }, "roundtrip": { - "p50": 194.14399564266205, - "p90": 199.52000677585602, - "p95": 200.54399967193604, - "p99": 206.68800175189972 + "p50": 162.9440039396286, + "p90": 173.7920045852661, + "p95": 178.0800074338913, + "p99": 187.23200261592865 }, "isolatedSum": { - "p50": 211.0079973936081, - "p90": 216.19199961423874, - "p95": 218.87999773025513, - "p99": 239.20000344514847 + "p50": 189.5039975643158, + "p90": 208.41600000858307, + "p95": 217.82399713993073, + "p99": 256.9279968738556 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77701120, - "combineLogicalBytes": 77701120, - "fanoutMean": 5.29296875, - "recvTokensMax": 697, - "stragglerRank": 4, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 134.5919966697693, - "p90": 139.8400068283081, - "p95": 141.34399592876434, - "p99": 147.77599275112152 + "p50": 106.46399855613708, + "p90": 119.77600306272507, + "p95": 126.62400305271149, + "p99": 169.5680022239685 }, "combine": { - "p50": 155.87200224399567, - "p90": 165.27999937534332, - "p95": 170.43200135231018, - "p99": 176.7680048942566 + "p50": 84.60800349712372, + "p90": 92.70399808883667, + "p95": 96.83199971914291, + "p99": 122.27199971675873 }, "roundtrip": { - "p50": 273.27999472618103, - "p90": 280.5120050907135, - "p95": 281.72799944877625, - "p99": 288.35201263427734 + "p50": 166.9120043516159, + "p90": 178.1120002269745, + "p95": 182.6239973306656, + "p99": 194.87999379634857 }, "isolatedSum": { - "p50": 290.46399891376495, - "p90": 305.1200062036514, - "p95": 311.7759972810745, - "p99": 324.5439976453781 + "p50": 191.0720020532608, + "p90": 212.48000115156174, + "p95": 223.4560027718544, + "p99": 291.84000194072723 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155187200, - "combineLogicalBytes": 155187200, - "fanoutMean": 5.28564453125, - "recvTokensMax": 1372, - "stragglerRank": 6, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 194.46399807929993, - "p90": 200.83199441432953, - "p95": 203.64800095558167, - "p99": 213.24799954891205 + "p50": 107.71200060844421, + "p90": 122.52800166606903, + "p95": 128.7039965391159, + "p99": 157.50400722026825 }, "combine": { - "p50": 265.3760015964508, - "p90": 274.3679881095886, - "p95": 274.84801411628723, - "p99": 277.75999903678894 + "p50": 85.05599945783615, + "p90": 93.82399916648865, + "p95": 96.89600020647049, + "p99": 120.89599668979645 }, "roundtrip": { - "p50": 444.19199228286743, - "p90": 448.67199659347534, - "p95": 450.27199387550354, - "p99": 476.0960042476654 + "p50": 165.40800034999847, + "p90": 177.15199291706085, + "p95": 181.92000687122345, + "p99": 244.57600712776184 }, "isolatedSum": { - "p50": 459.83999967575073, - "p90": 475.19998252391815, - "p95": 478.4960150718689, - "p99": 491.007998585701 + "p50": 192.76800006628036, + "p90": 216.35200083255768, + "p95": 225.5999967455864, + "p99": 278.4000039100647 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311162880, - "combineLogicalBytes": 311162880, - "fanoutMean": 5.299072265625, - "recvTokensMax": 2761, - "stragglerRank": 7, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 326.6560137271881, - "p90": 330.3999900817871, - "p95": 331.29599690437317, - "p99": 342.8800106048584 + "p50": 109.02400314807892, + "p90": 121.18399888277054, + "p95": 125.56800246238708, + "p99": 141.31200313568115 }, "combine": { - "p50": 461.88798546791077, - "p90": 470.94398736953735, - "p95": 471.45599126815796, - "p99": 483.2639992237091 + "p50": 89.91999924182892, + "p90": 96.6079980134964, + "p95": 98.49599748849869, + "p99": 108.09600353240967 }, "roundtrip": { - "p50": 770.4960107803345, - "p90": 775.3599882125854, - "p95": 777.5999903678894, - "p99": 795.9039807319641 + "p50": 168.73599588871002, + "p90": 180.89599907398224, + "p95": 185.37600338459015, + "p99": 204.48000729084015 }, "isolatedSum": { - "p50": 788.5439991950989, - "p90": 801.3439774513245, - "p95": 802.7519881725311, - "p99": 826.1440098285675 + "p50": 198.94400238990784, + "p90": 217.79199689626694, + "p95": 224.06399995088577, + "p99": 249.40800666809082 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619974656, - "combineLogicalBytes": 619974656, - "fanoutMean": 5.279052734375, - "recvTokensMax": 5481, - "stragglerRank": 7, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 570.1760053634644, - "p90": 577.567994594574, - "p95": 579.5199871063232, - "p99": 643.8400149345398 + "p50": 107.13600367307663, + "p90": 119.71200257539749, + "p95": 124.79999661445618, + "p99": 148.3200043439865 }, "combine": { - "p50": 815.8400058746338, - "p90": 826.5600204467773, - "p95": 827.5840282440186, - "p99": 830.8159708976746 + "p50": 91.80799871683121, + "p90": 97.05600142478943, + "p95": 99.74399954080582, + "p99": 116.19199812412262 }, "roundtrip": { - "p50": 1370.9759712219238, - "p90": 1381.0559511184692, - "p95": 1383.8720321655273, - "p99": 1396.672010421753 + "p50": 169.5680022239685, + "p90": 180.09600043296814, + "p95": 184.64000523090363, + "p99": 193.24800372123718 }, "isolatedSum": { - "p50": 1386.0160112380981, - "p90": 1404.1280150413513, - "p95": 1407.1040153503418, - "p99": 1474.6559858322144 + "p50": 198.94400238990784, + "p90": 216.76800400018692, + "p95": 224.543996155262, + "p99": 264.51200246810913 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240020992, - "combineLogicalBytes": 1240020992, - "fanoutMean": 5.27935791015625, - "recvTokensMax": 10883, - "stragglerRank": 5, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1064.7039413452148, - "p90": 1068.4479475021362, - "p95": 1071.8079805374146, - "p99": 1093.3760404586792 + "p50": 108.0000028014183, + "p90": 120.31999975442886, + "p95": 125.82400441169739, + "p99": 133.82400572299957 }, "combine": { - "p50": 1526.2080430984497, - "p90": 1530.56001663208, - "p95": 1539.3919944763184, - "p99": 1604.8959493637085 + "p50": 95.83999961614609, + "p90": 100.5759984254837, + "p95": 106.46399855613708, + "p99": 133.7919980287552 }, "roundtrip": { - "p50": 2567.7759647369385, - "p90": 2580.415964126587, - "p95": 2587.8400802612305, - "p99": 2656.8961143493652 + "p50": 178.1120002269745, + "p90": 189.08800184726715, + "p95": 193.53599846363068, + "p99": 213.21600675582886 }, "isolatedSum": { - "p50": 2590.9119844436646, - "p90": 2599.0079641342163, - "p95": 2611.199975013733, - "p99": 2698.2719898223877 + "p50": 203.8400024175644, + "p90": 220.89599817991257, + "p95": 232.28800296783447, + "p99": 267.61600375175476 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480414720, - "combineLogicalBytes": 2480414720, - "fanoutMean": 5.2801513671875, - "recvTokensMax": 21702, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 119.35999989509583, + "p90": 130.5599957704544, + "p95": 135.3919953107834, + "p99": 178.81600558757782 + }, + "combine": { + "p50": 109.50399935245514, + "p90": 117.76000261306763, + "p95": 120.60800194740295, + "p99": 131.67999684810638 + }, + "roundtrip": { + "p50": 197.40800559520721, + "p90": 206.84799551963806, + "p95": 210.81599593162537, + "p99": 228.15999388694763 + }, + "isolatedSum": { + "p50": 228.86399924755096, + "p90": 248.31999838352203, + "p95": 255.99999725818634, + "p99": 310.4960024356842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 134.8160058259964, + "p90": 146.7839926481247, + "p95": 156.8640023469925, + "p99": 189.5039975643158 + }, + "combine": { + "p50": 131.32800161838531, + "p90": 136.1279934644699, + "p95": 141.02399349212646, + "p99": 171.32799327373505 + }, + "roundtrip": { + "p50": 233.95200073719025, + "p90": 242.97599494457245, + "p95": 247.3279982805252, + "p99": 270.52798867225647 + }, + "isolatedSum": { + "p50": 266.1440074443817, + "p90": 282.9119861125946, + "p95": 297.88799583911896, + "p99": 360.83199083805084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -22399,47 +21657,48 @@ ] }, { - "id": "cx-f0a8ca82", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", - "colorKey": "b300_e3d449ce", - "comparisonKey": "5a2fc26356c2c7bc", + "id": "cx-ea3485e1", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||af0b2d2a9119979", + "colorKey": "gb300_d4c8afb8", + "comparisonKey": "947bc78137c317bf", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:47:59.202782+00:00", + "generatedAt": "2026-06-29T13:41:20.865867+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · uniform+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -22447,244 +21706,170 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2225dbbdab9bf2d", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.006072998046875, - "eplbImbalanceAfter": 1.0000152587890625, - "backendVersion": "1.2.1", + "traceSignature": "af0b2d2a9119979", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285607618", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285607618", - "createdAt": "2026-06-27T09:47:59.202782+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 94.46399658918381, - "p90": 100.35199671983719, - "p95": 103.2319962978363, - "p99": 128.4160017967224 - }, - "combine": { - "p50": 115.03999680280685, - "p90": 115.80800265073776, - "p95": 116.7680025100708, - "p99": 120.99199742078781 - }, - "roundtrip": { - "p50": 193.4400051832199, - "p90": 200.1280039548874, - "p95": 201.9840031862259, - "p99": 223.1999933719635 - }, - "isolatedSum": { - "p50": 209.50399339199066, - "p90": 216.15999937057495, - "p95": 219.9999988079071, - "p99": 249.40799921751022 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77041664, - "combineLogicalBytes": 77041664, - "fanoutMean": 5.248046875, - "recvTokensMax": 686, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 135.3919953107834, - "p90": 139.80799913406372, - "p95": 141.4719969034195, - "p99": 152.6080071926117 - }, - "combine": { - "p50": 153.9199948310852, - "p90": 163.7440025806427, - "p95": 164.22399878501892, - "p99": 176.67199671268463 - }, - "roundtrip": { - "p50": 270.4319953918457, - "p90": 275.4560112953186, - "p95": 277.47198939323425, - "p99": 282.4000120162964 - }, - "isolatedSum": { - "p50": 289.3119901418686, - "p90": 303.5520017147064, - "p95": 305.6959956884384, - "p99": 329.2800039052963 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154542080, - "combineLogicalBytes": 154542080, - "fanoutMean": 5.263671875, - "recvTokensMax": 1365, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 195.0719952583313, - "p90": 202.91200280189514, - "p95": 204.76800203323364, - "p99": 211.5519940853119 + "p50": 93.28000247478485, + "p90": 106.9440022110939, + "p95": 113.21599781513214, + "p99": 137.43999600410461 }, "combine": { - "p50": 273.75999093055725, - "p90": 275.4560112953186, - "p95": 276.70401334762573, - "p99": 286.8480086326599 + "p50": 82.56000280380249, + "p90": 86.81599795818329, + "p95": 91.00800007581711, + "p99": 94.55999732017517 }, "roundtrip": { - "p50": 438.33601474761963, - "p90": 447.6799964904785, - "p95": 457.2800099849701, - "p99": 516.0959959030151 + "p50": 152.12799608707428, + "p90": 164.73600268363953, + "p95": 168.12799870967865, + "p99": 181.34400248527527 }, "isolatedSum": { - "p50": 468.83198618888855, - "p90": 478.36801409721375, - "p95": 481.4720153808594, - "p99": 498.4000027179718 + "p50": 175.84000527858734, + "p90": 193.7600001692772, + "p95": 204.22399789094925, + "p99": 231.99999332427979 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310589440, - "combineLogicalBytes": 310589440, - "fanoutMean": 5.289306640625, - "recvTokensMax": 2746, - "stragglerRank": 7, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 325.408011674881, - "p90": 328.99200916290283, - "p95": 330.2080035209656, - "p99": 342.0160114765167 + "p50": 96.12800180912018, + "p90": 108.70400071144104, + "p95": 112.2559979557991, + "p99": 135.68000495433807 }, "combine": { - "p50": 459.48800444602966, - "p90": 470.46399116516113, - "p95": 470.94398736953735, - "p99": 482.87999629974365 + "p50": 85.95199882984161, + "p90": 93.88799965381622, + "p95": 95.51999717950821, + "p99": 106.46399855613708 }, "roundtrip": { - "p50": 764.959990978241, - "p90": 773.792028427124, - "p95": 783.456027507782, - "p99": 817.8880214691162 + "p50": 158.59200060367584, + "p90": 168.16000640392303, + "p95": 171.74400389194489, + "p99": 180.92800676822662 }, "isolatedSum": { - "p50": 784.8960161209106, - "p90": 799.456000328064, - "p95": 801.1519908905029, - "p99": 824.8960077762604 + "p50": 182.0800006389618, + "p90": 202.59200036525726, + "p95": 207.7759951353073, + "p99": 242.14400351047516 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619171840, - "combineLogicalBytes": 619171840, - "fanoutMean": 5.272216796875, - "recvTokensMax": 5467, - "stragglerRank": 4, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 568.9600110054016, - "p90": 572.8960037231445, - "p95": 575.8079886436462, - "p99": 665.9200191497803 + "p50": 103.4879982471466, + "p90": 114.3999993801117, + "p95": 118.46400052309036, + "p99": 127.80800461769104 }, "combine": { - "p50": 814.0159845352173, - "p90": 815.6480193138123, - "p95": 817.8880214691162, - "p99": 888.8959884643555 + "p50": 93.82399916648865, + "p90": 98.01600128412247, + "p95": 103.7760004401207, + "p99": 111.26399785280228 }, "roundtrip": { - "p50": 1359.7760200500488, - "p90": 1370.0480461120605, - "p95": 1375.8080005645752, - "p99": 1418.239951133728 + "p50": 169.50400173664093, + "p90": 178.01600694656372, + "p95": 182.01600015163422, + "p99": 187.26399540901184 }, "isolatedSum": { - "p50": 1382.975995540619, - "p90": 1388.5440230369568, - "p95": 1393.6960101127625, - "p99": 1554.8160076141357 + "p50": 197.31199741363525, + "p90": 212.41600066423416, + "p95": 222.24000096321106, + "p99": 239.07200247049332 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1238945792, - "combineLogicalBytes": 1238945792, - "fanoutMean": 5.2747802734375, - "recvTokensMax": 10913, - "stragglerRank": 6, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1064.2880201339722, - "p90": 1069.823980331421, - "p95": 1076.5119791030884, - "p99": 1097.6639986038208 + "p50": 138.40000331401825, + "p90": 148.54399859905243, + "p95": 152.3520052433014, + "p99": 162.08000481128693 }, "combine": { - "p50": 1516.8960094451904, - "p90": 1527.9040336608887, - "p95": 1529.8240184783936, - "p99": 1575.8399963378906 + "p50": 144.76799964904785, + "p90": 153.3759981393814, + "p95": 155.2319973707199, + "p99": 158.55999290943146 }, "roundtrip": { - "p50": 2567.840099334717, - "p90": 2580.9600353240967, - "p95": 2591.4878845214844, - "p99": 2632.960081100464 + "p50": 255.48800826072693, + "p90": 264.3199861049652, + "p95": 267.520010471344, + "p99": 273.6319899559021 }, "isolatedSum": { - "p50": 2581.1840295791626, - "p90": 2597.7280139923096, - "p95": 2606.335997581482, - "p99": 2673.5039949417114 + "p50": 283.1680029630661, + "p90": 301.91999673843384, + "p95": 307.5840026140213, + "p99": 320.6399977207184 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481747968, - "combineLogicalBytes": 2481747968, - "fanoutMean": 5.282989501953125, - "recvTokensMax": 21789, - "stragglerRank": 7, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -22692,47 +21877,48 @@ ] }, { - "id": "cx-4cb883eb", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", - "colorKey": "b300_8d2811e3", - "comparisonKey": "c2361bc487e04e6e", + "id": "cx-8068f2a4", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb300_f163949b", + "comparisonKey": "13efb5d3604f8176", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:36.475166+00:00", + "generatedAt": "2026-06-29T13:43:17.120318+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf", + "label": "GB300 EP8 · deepep · bf16 · balanced-rank-local", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -22740,133 +21926,170 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "4caecd33bedf786", - "workloadId": "set:3:830e36e88869e222", - "workloadSource": "canonical-serialized", + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271889990", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271889990", - "createdAt": "2026-06-26T23:58:36.475166+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 103.87200117111206, - "p90": 106.88000172376633, - "p95": 109.3439981341362, - "p99": 126.62400305271149 + "p50": 110.68800091743469, + "p90": 150.751993060112, + "p95": 157.31200575828552, + "p99": 168.60799491405487 }, "combine": { - "p50": 126.91199779510498, - "p90": 128.1919926404953, - "p95": 128.57599556446075, - "p99": 139.615997672081 + "p50": 71.29599899053574, + "p90": 105.59999942779541, + "p95": 115.9679964184761, + "p99": 147.32800424098969 }, "roundtrip": { - "p50": 209.6640020608902, - "p90": 213.95200490951538, - "p95": 215.488001704216, - "p99": 220.47999501228333 + "p50": 144.99199390411377, + "p90": 185.31200289726257, + "p95": 193.66399943828583, + "p99": 216.95999801158905 }, "isolatedSum": { - "p50": 230.78399896621704, - "p90": 235.07199436426163, - "p95": 237.91999369859695, - "p99": 266.2400007247925 + "p50": 181.98399990797043, + "p90": 256.3519924879074, + "p95": 273.2800021767616, + "p99": 315.93599915504456 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 204.73599433898926, - "p90": 212.44800090789795, - "p95": 213.98399770259857, - "p99": 221.02400660514832 + "p50": 109.95200276374817, + "p90": 151.32799744606018, + "p95": 158.01599621772766, + "p99": 171.39199376106262 }, "combine": { - "p50": 325.28001070022583, - "p90": 336.41600608825684, - "p95": 336.70398592948914, - "p99": 340.4799997806549 + "p50": 78.91199737787247, + "p90": 109.31199789047241, + "p95": 119.64800208806992, + "p99": 140.1280015707016 }, "roundtrip": { - "p50": 510.528028011322, - "p90": 517.087996006012, - "p95": 519.1680192947388, - "p99": 526.4639854431152 + "p50": 149.1840034723282, + "p90": 187.3600035905838, + "p95": 195.6160068511963, + "p99": 217.75999665260315 }, "isolatedSum": { - "p50": 530.0160050392151, - "p90": 548.8640069961548, - "p95": 550.6879836320877, - "p99": 561.5040063858032 + "p50": 188.86400014162064, + "p90": 260.6399953365326, + "p95": 277.6639983057976, + "p99": 311.5199953317642 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 6, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 648.1919884681702, - "p90": 659.0080261230469, - "p95": 662.6240015029907, - "p99": 672.5760102272034 + "p50": 129.98400628566742, + "p90": 154.27200496196747, + "p95": 159.39199924468994, + "p99": 175.9359985589981 }, "combine": { - "p50": 1063.8400316238403, - "p90": 1073.248028755188, - "p95": 1073.6639499664307, - "p99": 1096.60804271698 + "p50": 85.40800213813782, + "p90": 122.17599898576736, + "p95": 139.80799913406372, + "p99": 144.67200636863708 }, "roundtrip": { - "p50": 1698.815941810608, - "p90": 1708.1600427627563, - "p95": 1712.4799489974976, - "p99": 1786.7519855499268 + "p50": 169.72799599170685, + "p90": 203.87199521064758, + "p95": 213.76000344753265, + "p99": 228.5120040178299 }, "isolatedSum": { - "p50": 1712.0320200920105, - "p90": 1732.2560548782349, - "p95": 1736.2879514694214, - "p99": 1769.1840529441833 + "p50": 215.39200842380524, + "p90": 276.44800394773483, + "p95": 299.19999837875366, + "p99": 320.6080049276352 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 6, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.4240003824234, + "p90": 152.19199657440186, + "p95": 156.41599893569946, + "p99": 169.72799599170685 + }, + "combine": { + "p50": 90.55999666452408, + "p90": 122.78400361537933, + "p95": 137.28000223636627, + "p99": 153.50399911403656 + }, + "roundtrip": { + "p50": 163.71199488639832, + "p90": 192.25600361824036, + "p95": 204.48000729084015, + "p99": 222.04799950122833 + }, + "isolatedSum": { + "p50": 209.98399704694748, + "p90": 274.9760001897812, + "p95": 293.69600117206573, + "p99": 323.2319951057434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -22874,47 +22097,48 @@ ] }, { - "id": "cx-2d848061", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "b300_8d2811e3", - "comparisonKey": "572a75005556e63b", + "id": "cx-e180de44", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb300_c93e2296", + "comparisonKey": "657a9fa446798c99", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:48.610470+00:00", + "generatedAt": "2026-06-29T13:38:59.609788+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -22922,244 +22146,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285625501", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285625501", - "createdAt": "2026-06-27T09:48:48.610470+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 100.19200295209885, - "p90": 104.2879968881607, - "p95": 107.35999792814255, - "p99": 113.69600147008896 + "p50": 92.0960009098053, + "p90": 109.98400300741196, + "p95": 123.74400347471237, + "p99": 163.80800306797028 }, "combine": { - "p50": 118.43200027942657, - "p90": 127.03999876976013, - "p95": 127.51999497413635, - "p99": 129.2479932308197 + "p50": 71.10399752855301, + "p90": 78.52800190448761, + "p95": 83.61600339412689, + "p99": 118.01599711179733 }, "roundtrip": { - "p50": 207.58399367332458, - "p90": 212.54399418830872, - "p95": 213.82400393486023, - "p99": 217.0879989862442 + "p50": 141.6960060596466, + "p90": 160.64000129699707, + "p95": 192.9280012845993, + "p99": 237.31200397014618 }, "isolatedSum": { - "p50": 218.62400323152542, - "p90": 231.32799565792084, - "p95": 234.8799929022789, - "p99": 242.94399470090866 + "p50": 163.1999984383583, + "p90": 188.51200491189957, + "p95": 207.36000686883926, + "p99": 281.8240001797676 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 130.40000200271606, - "p90": 137.40800321102142, - "p95": 138.84800672531128, - "p99": 147.93600142002106 + "p50": 92.67199784517288, + "p90": 109.02400314807892, + "p95": 121.40800058841705, + "p99": 166.20799899101257 }, "combine": { - "p50": 176.28799378871918, - "p90": 178.3359944820404, - "p95": 179.87200617790222, - "p99": 189.91999328136444 + "p50": 72.41600006818771, + "p90": 80.79999685287476, + "p95": 83.61600339412689, + "p99": 94.11200135946274 }, "roundtrip": { - "p50": 294.5280075073242, - "p90": 299.77598786354065, - "p95": 301.56800150871277, - "p99": 312.22400069236755 + "p50": 145.37599682807922, + "p90": 161.76000237464905, + "p95": 179.967999458313, + "p99": 224.57599639892578 }, "isolatedSum": { - "p50": 306.68799579143524, - "p90": 315.74399769306183, - "p95": 318.7200129032135, - "p99": 337.8559947013855 + "p50": 165.0879979133606, + "p90": 189.82400000095367, + "p95": 205.02400398254395, + "p99": 260.3200003504753 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 207.13600516319275, - "p90": 211.776003241539, - "p95": 213.24799954891205, - "p99": 220.99199891090393 + "p50": 92.57599711418152, + "p90": 105.95200210809708, + "p95": 110.55999994277954, + "p99": 130.91200590133667 }, "combine": { - "p50": 324.8960077762604, - "p90": 334.9440097808838, - "p95": 335.61599254608154, - "p99": 338.46399188041687 + "p50": 73.27999919652939, + "p90": 83.45600217580795, + "p95": 92.12800115346909, + "p99": 132.64000415802002 }, "roundtrip": { - "p50": 504.12797927856445, - "p90": 511.03997230529785, - "p95": 513.2480263710022, - "p99": 517.5359845161438 + "p50": 145.50399780273438, + "p90": 158.720001578331, + "p95": 166.59200191497803, + "p99": 232.16000199317932 }, "isolatedSum": { - "p50": 532.0320129394531, - "p90": 546.7200130224228, - "p95": 548.8639920949936, - "p99": 559.4559907913208 + "p50": 165.8559963107109, + "p90": 189.40800428390503, + "p95": 202.68800109624863, + "p99": 263.5520100593567 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 7, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 347.6479947566986, - "p90": 353.08799147605896, - "p95": 354.8479974269867, - "p99": 364.4160032272339 + "p50": 95.67999839782715, + "p90": 114.20799791812897, + "p95": 131.74399733543396, + "p99": 174.20800030231476 }, "combine": { - "p50": 582.751989364624, - "p90": 592.8320288658142, - "p95": 593.4399962425232, - "p99": 599.7120141983032 + "p50": 75.23199915885925, + "p90": 84.57600325345993, + "p95": 94.94400024414062, + "p99": 132.89600610733032 }, "roundtrip": { - "p50": 909.4719886779785, - "p90": 917.248010635376, - "p95": 919.2320108413696, - "p99": 935.0079894065857 + "p50": 147.32800424098969, + "p90": 164.44799304008484, + "p95": 190.46400487422943, + "p99": 226.623997092247 }, "isolatedSum": { - "p50": 930.3999841213226, - "p90": 945.9200203418732, - "p95": 948.2879936695099, - "p99": 964.1280174255371 + "p50": 170.9119975566864, + "p90": 198.7840011715889, + "p95": 226.68799757957458, + "p99": 307.1040064096451 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 7, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.74399888515472, + "p90": 111.51999980211258, + "p95": 126.01600587368011, + "p99": 166.04800522327423 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 85.28000116348267, + "p95": 94.24000233411789, + "p99": 134.68800485134125 + }, + "roundtrip": { + "p50": 148.0959951877594, + "p90": 163.35999965667725, + "p95": 186.27199530601501, + "p99": 248.09600412845612 + }, + "isolatedSum": { + "p50": 174.46400225162506, + "p90": 196.80000096559525, + "p95": 220.256008207798, + "p99": 300.7360100746155 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 641.152024269104, - "p90": 652.0000100135803, - "p95": 655.2960276603699, - "p99": 747.6480007171631 + "p50": 95.77599912881851, + "p90": 107.39199817180634, + "p95": 113.18399757146835, + "p99": 154.14400398731232 }, "combine": { - "p50": 1062.0479583740234, - "p90": 1072.0640420913696, - "p95": 1072.6079940795898, - "p99": 1096.5440273284912 + "p50": 81.24800026416779, + "p90": 86.71999722719193, + "p95": 96.25600278377533, + "p99": 136.09600067138672 }, "roundtrip": { - "p50": 1689.9199485778809, - "p90": 1699.0079879760742, - "p95": 1702.5599479675293, - "p99": 1800.9920120239258 + "p50": 149.08799529075623, + "p90": 160.44799983501434, + "p95": 163.7759953737259, + "p99": 207.87200331687927 }, "isolatedSum": { - "p50": 1703.1999826431274, - "p90": 1724.06405210495, - "p95": 1727.9040217399597, - "p99": 1844.1920280456543 + "p50": 177.0239993929863, + "p90": 194.11199539899826, + "p95": 209.44000035524368, + "p99": 290.24000465869904 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1252.0320415496826, - "p90": 1263.424038887024, - "p95": 1268.7360048294067, - "p99": 1281.2479734420776 + "p50": 99.07200187444687, + "p90": 113.02399635314941, + "p95": 124.76799637079239, + "p99": 159.16800498962402 }, "combine": { - "p50": 2043.8721179962158, - "p90": 2046.015977859497, - "p95": 2054.464101791382, - "p99": 2093.503952026367 + "p50": 82.8159973025322, + "p90": 89.40800279378891, + "p95": 94.11200135946274, + "p99": 130.65600395202637 }, "roundtrip": { - "p50": 3286.976099014282, - "p90": 3298.5599040985107, - "p95": 3302.432060241699, - "p99": 3373.823881149292 + "p50": 153.50399911403656, + "p90": 169.76000368595123, + "p95": 184.25600230693817, + "p99": 212.64000236988068 }, "isolatedSum": { - "p50": 3295.9041595458984, - "p90": 3309.440016746521, - "p95": 3323.2001066207886, - "p99": 3374.751925468445 + "p50": 181.88799917697906, + "p90": 202.43199914693832, + "p95": 218.87999773025513, + "p99": 289.8240089416504 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 5, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.92800176143646, + "p90": 116.15999788045883, + "p95": 120.2239990234375, + "p99": 151.7760008573532 + }, + "combine": { + "p50": 96.67199850082397, + "p90": 104.92800176143646, + "p95": 107.71200060844421, + "p99": 122.43200093507767 + }, + "roundtrip": { + "p50": 176.03200674057007, + "p90": 186.3040030002594, + "p95": 190.17599523067474, + "p99": 249.05599653720856 + }, + "isolatedSum": { + "p50": 201.60000026226044, + "p90": 221.0879996418953, + "p95": 227.9359996318817, + "p99": 274.2080017924309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -23167,47 +22465,48 @@ ] }, { - "id": "cx-f7ec6aaf", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", - "colorKey": "b300_2e44c039", - "comparisonKey": "b198376a27b75c7f", + "id": "cx-61745319", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||d0eaac3a0f0ae8c", + "colorKey": "gb300_440d13a2", + "comparisonKey": "aa2d44f964843de7", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:40.218743+00:00", + "generatedAt": "2026-06-29T13:47:56.420171+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "label": "GB300 EP8 · deepep · bf16 · hotspot-single", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -23215,133 +22514,170 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "3dd868cb33839a3", - "workloadId": "set:3:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", + "traceSignature": "d0eaac3a0f0ae8c", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271897134", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271897134", - "createdAt": "2026-06-26T23:58:40.218743+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 92.3520028591156, - "p90": 95.90400010347366, - "p95": 98.78399968147278, - "p99": 113.34399878978729 + "p50": 95.51999717950821, + "p90": 110.36799848079681, + "p95": 114.9120032787323, + "p99": 127.96799838542938 }, "combine": { - "p50": 116.19199812412262, - "p90": 120.2239990234375, - "p95": 126.39999389648438, - "p99": 127.68000364303589 + "p50": 80.1599994301796, + "p90": 85.05599945783615, + "p95": 88.83199840784073, + "p99": 94.84799951314926 }, "roundtrip": { - "p50": 194.5279985666275, - "p90": 202.43200659751892, - "p95": 204.22400534152985, - "p99": 214.23999965190887 + "p50": 149.3760049343109, + "p90": 161.0880047082901, + "p95": 165.56799411773682, + "p99": 176.15999281406403 }, "isolatedSum": { - "p50": 208.54400098323822, - "p90": 216.12799912691116, - "p95": 225.18399357795715, - "p99": 241.02400243282318 + "p50": 175.6799966096878, + "p90": 195.42399793863297, + "p95": 203.74400168657303, + "p99": 222.81599789857864 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 180.09600043296814, - "p90": 188.6720061302185, - "p95": 190.46400487422943, - "p99": 204.83200252056122 + "p50": 98.4639972448349, + "p90": 113.47199976444244, + "p95": 118.75200271606445, + "p99": 128.31999361515045 }, "combine": { - "p50": 302.94400453567505, - "p90": 311.42398715019226, - "p95": 311.67998909950256, - "p99": 315.16799330711365 + "p50": 84.06399935483932, + "p90": 92.67199784517288, + "p95": 95.48799693584442, + "p99": 107.29599744081497 }, "roundtrip": { - "p50": 473.1520116329193, - "p90": 481.6960096359253, - "p95": 485.0560128688812, - "p99": 493.696004152298 + "p50": 157.1200042963028, + "p90": 169.91999745368958, + "p95": 174.46400225162506, + "p99": 187.71199882030487 }, "isolatedSum": { - "p50": 483.0400049686432, - "p90": 500.09599328041077, - "p95": 502.143993973732, - "p99": 519.9999958276749 + "p50": 182.52799659967422, + "p90": 206.14399760961533, + "p95": 214.23999965190887, + "p99": 235.61599105596542 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 570.0479745864868, - "p90": 580.4160237312317, - "p95": 583.7439894676208, - "p99": 621.0560202598572 + "p50": 101.95200145244598, + "p90": 114.9120032787323, + "p95": 119.87199634313583, + "p99": 123.90399724245071 }, "combine": { - "p50": 1098.7199544906616, - "p90": 1109.1840267181396, - "p95": 1109.663963317871, - "p99": 1124.4159936904907 + "p50": 92.73599833250046, + "p90": 98.68799895048141, + "p95": 104.22399640083313, + "p99": 115.42399972677231 }, "roundtrip": { - "p50": 1622.8159666061401, - "p90": 1629.3760538101196, - "p95": 1632.2239637374878, - "p99": 1643.3279514312744 + "p50": 165.40800034999847, + "p90": 177.69600450992584, + "p95": 182.14400112628937, + "p99": 190.11199474334717 }, "isolatedSum": { - "p50": 1668.7679290771484, - "p90": 1689.6000504493713, - "p95": 1693.407952785492, - "p99": 1745.472013950348 + "p50": 194.68799978494644, + "p90": 213.60000222921371, + "p95": 224.09599274396896, + "p99": 239.32799696922302 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 7, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.77600502967834, + "p90": 142.04800128936768, + "p95": 146.14400267601013, + "p99": 155.13600409030914 + }, + "combine": { + "p50": 139.3280029296875, + "p90": 146.33600413799286, + "p95": 147.96799421310425, + "p99": 158.1760048866272 + }, + "roundtrip": { + "p50": 245.34399807453156, + "p90": 254.62400913238525, + "p95": 258.5600018501282, + "p99": 269.0559923648834 + }, + "isolatedSum": { + "p50": 271.10400795936584, + "p90": 288.38400542736053, + "p95": 294.1119968891144, + "p99": 313.31200897693634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -23349,47 +22685,48 @@ ] }, { - "id": "cx-3f3c8c0f", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", - "colorKey": "b300_2e44c039", - "comparisonKey": "5c8a1b2520d6dc6d", + "id": "cx-419170bd", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb300_87f4d4ec", + "comparisonKey": "fd229a6aff63668c", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:12.421760+00:00", + "generatedAt": "2026-06-29T13:55:00.278129+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_04", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -23397,244 +22734,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "bbcd1d9d8d1e4fe", - "workloadId": "set:6:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285656632", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285656632", - "createdAt": "2026-06-27T09:50:12.421760+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 93.47199648618698, - "p90": 95.67999839782715, - "p95": 98.65599870681763, - "p99": 132.1599930524826 + "p50": 98.04800152778625, + "p90": 109.02400314807892, + "p95": 113.02399635314941, + "p99": 145.02400159835815 }, "combine": { - "p50": 116.83200299739838, - "p90": 126.30400061607361, - "p95": 126.88000500202179, - "p99": 138.047993183136 + "p50": 80.64000308513641, + "p90": 85.4400023818016, + "p95": 90.7519981265068, + "p99": 95.71199864149094 }, "roundtrip": { - "p50": 196.51199877262115, - "p90": 204.25599813461304, - "p95": 207.5520008802414, - "p99": 222.71999716758728 + "p50": 153.43999862670898, + "p90": 163.7440025806427, + "p95": 167.13599860668182, + "p99": 178.6240041255951 }, "isolatedSum": { - "p50": 210.30399948358536, - "p90": 221.98399901390076, - "p95": 225.53600370883942, - "p99": 270.2079862356186 + "p50": 178.68800461292267, + "p90": 194.46400552988052, + "p95": 203.77599447965622, + "p99": 240.7360002398491 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 127.6479959487915, - "p90": 131.1040073633194, - "p95": 133.95200669765472, - "p99": 144.57599818706512 + "p50": 97.59999811649323, + "p90": 108.5439994931221, + "p95": 112.5440001487732, + "p99": 121.11999839544296 }, "combine": { - "p50": 174.55999553203583, - "p90": 176.7680048942566, - "p95": 177.279993891716, - "p99": 179.32799458503723 + "p50": 81.53600245714188, + "p90": 88.79999816417694, + "p95": 92.47999638319016, + "p99": 101.6639992594719 }, "roundtrip": { - "p50": 283.29598903656006, - "p90": 288.12798857688904, - "p95": 290.1439964771271, - "p99": 312.73600459098816 + "p50": 154.62400019168854, + "p90": 164.92800414562225, + "p95": 168.09600591659546, + "p99": 176.35199427604675 }, "isolatedSum": { - "p50": 302.20799148082733, - "p90": 307.872012257576, - "p95": 311.2320005893707, - "p99": 323.90399277210236 + "p50": 179.1360005736351, + "p90": 197.34399765729904, + "p95": 205.02399653196335, + "p99": 222.78399765491486 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 45688832, - "combineLogicalBytes": 45688832, - "fanoutMean": 1.55615234375, - "recvTokensMax": 2048, - "stragglerRank": 7, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 186.65599822998047, - "p90": 191.16799533367157, - "p95": 194.97600197792053, - "p99": 212.44800090789795 + "p50": 98.81599992513657, + "p90": 110.97600311040878, + "p95": 115.84000289440155, + "p99": 136.28800213336945 }, "combine": { - "p50": 311.3279938697815, - "p90": 313.24800848960876, - "p95": 314.7520124912262, - "p99": 326.911985874176 + "p50": 82.17599987983704, + "p90": 87.87199854850769, + "p95": 90.36800265312195, + "p99": 96.00000083446503 }, "roundtrip": { - "p50": 479.74398732185364, - "p90": 486.7520034313202, - "p95": 488.8960123062134, - "p99": 497.79200553894043 + "p50": 156.92800283432007, + "p90": 167.04000532627106, + "p95": 170.43200135231018, + "p99": 186.88000738620758 }, "isolatedSum": { - "p50": 497.98399209976196, - "p90": 504.41600382328033, - "p95": 509.72801446914673, - "p99": 539.359986782074 + "p50": 180.9919998049736, + "p90": 198.84800165891647, + "p95": 206.2080055475235, + "p99": 232.28800296783447 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 311.42398715019226, - "p90": 318.62398982048035, - "p95": 323.13600182533264, - "p99": 337.15200424194336 + "p50": 98.24000298976898, + "p90": 108.0000028014183, + "p95": 111.51999980211258, + "p99": 117.76000261306763 }, "combine": { - "p50": 583.6480259895325, - "p90": 594.3679809570312, - "p95": 596.671998500824, - "p99": 632.6079964637756 + "p50": 83.93599838018417, + "p90": 91.39200299978256, + "p95": 93.47199648618698, + "p99": 103.29599678516388 }, "roundtrip": { - "p50": 887.4239921569824, - "p90": 891.9680118560791, - "p95": 893.6960101127625, - "p99": 918.4960126876831 + "p50": 159.67999398708344, + "p90": 169.53599452972412, + "p95": 172.67200350761414, + "p99": 178.65599691867828 }, "isolatedSum": { - "p50": 895.0720131397247, - "p90": 912.9919707775116, - "p95": 919.8080003261566, - "p99": 969.760000705719 + "p50": 182.17600136995316, + "p90": 199.39200580120087, + "p95": 204.99199628829956, + "p99": 221.0559993982315 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 183916544, - "combineLogicalBytes": 183916544, - "fanoutMean": 1.5660400390625, - "recvTokensMax": 8192, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.59199821949005, + "p90": 109.21599715948105, + "p95": 111.77600175142288, + "p99": 117.50400066375732 + }, + "combine": { + "p50": 85.82399785518646, + "p90": 92.73599833250046, + "p95": 94.01600062847137, + "p99": 99.04000163078308 + }, + "roundtrip": { + "p50": 162.1759980916977, + "p90": 171.74400389194489, + "p95": 175.48799514770508, + "p99": 185.95199286937714 + }, + "isolatedSum": { + "p50": 184.4159960746765, + "p90": 201.9519954919815, + "p95": 205.79200237989426, + "p99": 216.5440022945404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 568.2560205459595, - "p90": 577.0559906959534, - "p95": 585.6639742851257, - "p99": 622.0160126686096 + "p50": 103.20000350475311, + "p90": 112.38399893045425, + "p95": 115.77600240707397, + "p99": 126.20800733566284 }, "combine": { - "p50": 1099.1679430007935, - "p90": 1110.0800037384033, - "p95": 1111.1040115356445, - "p99": 1136.8639469146729 + "p50": 92.76799857616425, + "p90": 96.79999947547913, + "p95": 101.34399682283401, + "p99": 105.24799674749374 }, "roundtrip": { - "p50": 1613.2479906082153, - "p90": 1620.7040548324585, - "p95": 1624.2239475250244, - "p99": 1674.720048904419 + "p50": 166.97600483894348, + "p90": 175.55199563503265, + "p95": 179.36000227928162, + "p99": 195.77600061893463 }, "isolatedSum": { - "p50": 1667.423963546753, - "p90": 1687.1359944343567, - "p95": 1696.7679858207703, - "p99": 1758.8799595832825 + "p50": 195.96800208091736, + "p90": 209.18399840593338, + "p95": 217.119999229908, + "p99": 231.45600408315659 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 7, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1112.2239828109741, - "p90": 1126.8800497055054, - "p95": 1135.7439756393433, - "p99": 1233.247995376587 + "p50": 112.28799819946289, + "p90": 121.2799996137619, + "p95": 125.44000148773193, + "p99": 132.1280002593994 }, "combine": { - "p50": 2068.864107131958, - "p90": 2072.096109390259, - "p95": 2080.4800987243652, - "p99": 2143.2321071624756 + "p50": 106.4319983124733, + "p90": 113.56800049543381, + "p95": 116.19199812412262, + "p99": 122.27199971675873 }, "roundtrip": { - "p50": 3127.5839805603027, - "p90": 3139.359951019287, - "p95": 3147.6480960845947, - "p99": 3192.70396232605 + "p50": 189.88800048828125, + "p90": 199.5840072631836, + "p95": 203.64800095558167, + "p99": 214.4320011138916 }, "isolatedSum": { - "p50": 3181.088089942932, - "p90": 3198.976159095764, - "p95": 3216.2240743637085, - "p99": 3376.4801025390625 + "p50": 218.7199965119362, + "p90": 234.8480001091957, + "p95": 241.63199961185455, + "p99": 254.39999997615814 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 734720000, - "combineLogicalBytes": 734720000, - "fanoutMean": 1.56402587890625, - "recvTokensMax": 32768, - "stragglerRank": 0, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.67200374603271, + "p90": 137.5039964914322, + "p95": 141.12000167369843, + "p99": 153.47200632095337 + }, + "combine": { + "p50": 127.68000364303589, + "p90": 131.96800649166107, + "p95": 133.56800377368927, + "p99": 139.77600634098053 + }, + "roundtrip": { + "p50": 226.1440008878708, + "p90": 234.0800017118454, + "p95": 237.7600073814392, + "p99": 245.15199661254883 + }, + "isolatedSum": { + "p50": 256.3520073890686, + "p90": 269.47200298309326, + "p95": 274.6880054473877, + "p99": 293.2480126619339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -23642,47 +23053,48 @@ ] }, { - "id": "cx-861c4f52", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", - "colorKey": "b300_6d2e4735", - "comparisonKey": "e4e20084a0948dac", + "id": "cx-c886abc0", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb300_8b7def4e", + "comparisonKey": "8adbe858ea6e1f63", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:50:16.626677+00:00", + "generatedAt": "2026-06-29T13:36:56.578195+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "label": "GB300 EP8 · deepep · bf16 · uniform+eplb", "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", + "routing": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -23690,244 +23102,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "46855e7fa6754eb", - "workloadId": "set:6:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.38995361328125, - "eplbImbalanceAfter": 1.0000210716610862, - "backendVersion": "1.2.1", + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285658973", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285658973", - "createdAt": "2026-06-27T09:50:16.626677+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 94.30400282144547, - "p90": 96.03200107812881, - "p95": 97.88800030946732, - "p99": 106.55999928712845 + "p50": 95.04000097513199, + "p90": 109.27999764680862, + "p95": 115.1999980211258, + "p99": 145.08800208568573 }, "combine": { - "p50": 114.75200206041336, - "p90": 115.77600240707397, - "p95": 116.54400080442429, - "p99": 125.98399817943573 + "p50": 78.36800068616867, + "p90": 83.52000266313553, + "p95": 85.66399663686752, + "p99": 93.56799721717834 }, "roundtrip": { - "p50": 192.25600361824036, - "p90": 196.1279958486557, - "p95": 198.11199605464935, - "p99": 216.19200706481934 + "p50": 147.39200472831726, + "p90": 158.55999290943146, + "p95": 162.27200627326965, + "p99": 170.56000232696533 }, "isolatedSum": { - "p50": 209.05600488185883, - "p90": 211.8080034852028, - "p95": 214.4320011138916, - "p99": 232.54399746656418 + "p50": 173.40800166130066, + "p90": 192.80000030994415, + "p95": 200.86399465799332, + "p99": 238.65599930286407 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 79206400, - "combineLogicalBytes": 79206400, - "fanoutMean": 5.3955078125, - "recvTokensMax": 713, - "stragglerRank": 7, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 132.83200562000275, - "p90": 138.49599659442902, - "p95": 140.6400054693222, - "p99": 152.6080071926117 + "p50": 97.24800288677216, + "p90": 111.61600053310394, + "p95": 117.08799749612808, + "p99": 130.8480054140091 }, "combine": { - "p50": 155.20000457763672, - "p90": 163.83999586105347, - "p95": 164.19200599193573, - "p99": 166.9439971446991 + "p50": 79.74400371313095, + "p90": 84.3840017914772, + "p95": 86.2400010228157, + "p99": 95.67999839782715 }, "roundtrip": { - "p50": 272.4800109863281, - "p90": 279.87200021743774, - "p95": 287.1040105819702, - "p99": 306.5919876098633 + "p50": 149.75999295711517, + "p90": 161.3759994506836, + "p95": 164.000004529953, + "p99": 171.48800194263458 }, "isolatedSum": { - "p50": 288.03201019763947, - "p90": 302.3359924554825, - "p95": 304.83201146125793, - "p99": 319.5520043373108 + "p50": 176.9920065999031, + "p90": 196.00000232458115, + "p95": 203.3279985189438, + "p99": 226.52800381183624 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 159330304, - "combineLogicalBytes": 159330304, - "fanoutMean": 5.4267578125, - "recvTokensMax": 1436, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 192.671999335289, - "p90": 199.48799908161163, - "p95": 201.05600357055664, - "p99": 214.27200734615326 + "p50": 97.37599641084671, + "p90": 109.24799740314484, + "p95": 113.92000317573547, + "p99": 123.58400225639343 }, "combine": { - "p50": 274.2080092430115, - "p90": 277.5680124759674, - "p95": 285.95200181007385, - "p99": 298.335999250412 + "p50": 82.84799754619598, + "p90": 86.84799820184708, + "p95": 91.13600105047226, + "p99": 98.01600128412247 }, "roundtrip": { - "p50": 444.0639913082123, - "p90": 448.63998889923096, - "p95": 450.9119987487793, - "p99": 470.91200947761536 + "p50": 154.11199629306793, + "p90": 165.18400609493256, + "p95": 168.5120016336441, + "p99": 175.4560023546219 }, "isolatedSum": { - "p50": 466.8800085783005, - "p90": 477.05601155757904, - "p95": 487.0080053806305, - "p99": 512.6080065965652 + "p50": 180.2239939570427, + "p90": 196.0959956049919, + "p95": 205.05600422620773, + "p99": 221.6000035405159 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 319535104, - "combineLogicalBytes": 319535104, - "fanoutMean": 5.441650390625, - "recvTokensMax": 2897, - "stragglerRank": 4, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.73599898815155, + "p90": 109.21599715948105, + "p95": 113.76000195741653, + "p99": 125.50400197505951 + }, + "combine": { + "p50": 83.39200168848038, + "p90": 88.35200220346451, + "p95": 94.01600062847137, + "p99": 97.02400118112564 + }, + "roundtrip": { + "p50": 155.7759940624237, + "p90": 167.07199811935425, + "p95": 171.26399278640747, + "p99": 178.78399789333344 + }, + "isolatedSum": { + "p50": 180.12800067663193, + "p90": 197.56799936294556, + "p95": 207.7760025858879, + "p99": 222.52800315618515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 326.84800028800964, - "p90": 330.6240141391754, - "p95": 334.3679904937744, - "p99": 394.3359851837158 + "p50": 96.70399874448776, + "p90": 108.31999778747559, + "p95": 112.83200234174728, + "p99": 127.80800461769104 }, "combine": { - "p50": 469.63199973106384, - "p90": 471.1039960384369, - "p95": 472.7039933204651, - "p99": 483.13599824905396 + "p50": 83.74399691820145, + "p90": 91.10400080680847, + "p95": 94.04800087213516, + "p99": 97.53599762916565 }, "roundtrip": { - "p50": 772.4480032920837, - "p90": 781.7919850349426, - "p95": 785.2159738540649, - "p99": 801.2480139732361 + "p50": 156.99200332164764, + "p90": 168.89600455760956, + "p95": 172.992005944252, + "p99": 182.0800006389618 }, "isolatedSum": { - "p50": 796.4800000190735, - "p90": 801.7280101776123, - "p95": 807.0719838142395, - "p99": 877.4719834327698 + "p50": 180.4479956626892, + "p90": 199.42399859428406, + "p95": 206.88000321388245, + "p99": 225.3440022468567 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 638410752, - "combineLogicalBytes": 638410752, - "fanoutMean": 5.43603515625, - "recvTokensMax": 5815, - "stragglerRank": 4, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 581.6959738731384, - "p90": 585.9519839286804, - "p95": 589.792013168335, - "p99": 671.8400120735168 + "p50": 99.48799759149551, + "p90": 110.04800349473953, + "p95": 113.66400122642517, + "p99": 123.71200323104858 }, "combine": { - "p50": 828.2240033149719, - "p90": 838.8159871101379, - "p95": 839.6160006523132, - "p99": 850.8480191230774 + "p50": 90.55999666452408, + "p90": 96.25600278377533, + "p95": 98.11200201511383, + "p99": 105.0880029797554 }, "roundtrip": { - "p50": 1393.4400081634521, - "p90": 1402.4319648742676, - "p95": 1406.6879749298096, - "p99": 1428.1920194625854 + "p50": 164.38399255275726, + "p90": 173.40800166130066, + "p95": 176.67199671268463, + "p99": 183.9040070772171 }, "isolatedSum": { - "p50": 1409.9199771881104, - "p90": 1424.7679710388184, - "p95": 1429.4080138206482, - "p99": 1522.6880311965942 + "p50": 190.0479942560196, + "p90": 206.30400627851486, + "p95": 211.776003241539, + "p99": 228.80000621080399 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1275144192, - "combineLogicalBytes": 1275144192, - "fanoutMean": 5.42889404296875, - "recvTokensMax": 11606, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.944002866745, + "p90": 118.46400052309036, + "p95": 121.5360015630722, + "p99": 127.83999741077423 + }, + "combine": { + "p50": 106.6880002617836, + "p90": 111.26399785280228, + "p95": 115.52000045776367, + "p99": 120.19199877977371 + }, + "roundtrip": { + "p50": 188.1600022315979, + "p90": 198.36799800395966, + "p95": 201.88799500465393, + "p99": 214.62400257587433 + }, + "isolatedSum": { + "p50": 217.6320031285286, + "p90": 229.72799837589264, + "p95": 237.05600202083588, + "p99": 248.03199619054794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1084.3839645385742, - "p90": 1092.7679538726807, - "p95": 1101.5679836273193, - "p99": 1113.6959791183472 + "p50": 127.00800597667694, + "p90": 135.68000495433807, + "p95": 140.6400054693222, + "p99": 147.48799800872803 }, "combine": { - "p50": 1567.4560070037842, - "p90": 1576.8959522247314, - "p95": 1578.976035118103, - "p99": 1629.3120384216309 + "p50": 124.09599870443344, + "p90": 132.51200318336487, + "p95": 133.95200669765472, + "p99": 138.84800672531128 }, "roundtrip": { - "p50": 2638.4639739990234, - "p90": 2648.47993850708, - "p95": 2653.088092803955, - "p99": 2690.3679370880127 + "p50": 224.2880016565323, + "p90": 231.9359928369522, + "p95": 235.29599606990814, + "p99": 241.18399620056152 }, "isolatedSum": { - "p50": 2651.8399715423584, - "p90": 2669.663906097412, - "p95": 2680.5440187454224, - "p99": 2743.008017539978 + "p50": 251.10400468111038, + "p90": 268.19200813770294, + "p95": 274.59201216697693, + "p99": 286.3360047340393 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2546374656, - "combineLogicalBytes": 2546374656, - "fanoutMean": 5.420562744140625, - "recvTokensMax": 23170, - "stragglerRank": 4, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -23935,292 +23421,367 @@ ] }, { - "id": "cx-cae00445", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", - "colorKey": "b300_7ab35d34", - "comparisonKey": "d9d28463325111a5", + "id": "cx-e3eecced", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_b3a88763", + "comparisonKey": "1521f576cce519c9", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:16.226066+00:00", + "generatedAt": "2026-06-29T13:40:26.333111+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-mild", + "label": "GB300 EP8 · deepep · bf16 · zipf", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cf93f8f6b52e428", - "workloadId": "set:6:a224603e5a1640b8", - "workloadSource": "canonical-serialized", + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285635254", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285635254", - "createdAt": "2026-06-27T09:49:16.226066+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 102.78400033712387, - "p90": 104.80000078678131, - "p95": 105.66399991512299, - "p99": 110.23999750614166 + "p50": 94.40000355243683, + "p90": 106.39999806880951, + "p95": 112.64000087976456, + "p99": 123.00799787044525 }, "combine": { - "p50": 126.65599584579468, - "p90": 128.09599936008453, - "p95": 128.89599800109863, - "p99": 141.85599982738495 + "p50": 81.53600245714188, + "p90": 85.11999994516373, + "p95": 91.71199798583984, + "p99": 96.03200107812881 }, "roundtrip": { - "p50": 205.85599541664124, - "p90": 213.15200626850128, - "p95": 215.55200219154358, - "p99": 228.15999388694763 + "p50": 149.59999918937683, + "p90": 160.3199988603592, + "p95": 165.18400609493256, + "p99": 179.32799458503723 }, "isolatedSum": { - "p50": 229.43999618291855, - "p90": 232.89600014686584, - "p95": 234.55999791622162, - "p99": 252.0959973335266 + "p50": 175.9360060095787, + "p90": 191.51999801397324, + "p95": 204.3519988656044, + "p99": 219.03999894857407 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 4, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 139.8719996213913, - "p90": 141.63200557231903, - "p95": 143.23200285434723, - "p99": 152.8320014476776 + "p50": 95.20000219345093, + "p90": 107.26399719715118, + "p95": 111.455999314785, + "p99": 117.34399944543839 }, "combine": { - "p50": 176.9919991493225, - "p90": 186.8479996919632, - "p95": 187.96800076961517, - "p99": 201.05600357055664 + "p50": 78.65600287914276, + "p90": 84.25600081682205, + "p95": 86.17600053548813, + "p99": 94.01600062847137 }, "roundtrip": { - "p50": 305.5039942264557, - "p90": 311.2640082836151, - "p95": 312.1599853038788, - "p99": 315.8400058746338 + "p50": 150.7200002670288, + "p90": 162.08000481128693, + "p95": 164.92800414562225, + "p99": 174.97600615024567 }, "isolatedSum": { - "p50": 316.8639987707138, - "p90": 328.4800052642822, - "p95": 331.2000036239624, - "p99": 353.88800501823425 + "p50": 173.8560050725937, + "p90": 191.51999801397324, + "p95": 197.63199985027313, + "p99": 211.36000007390976 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 140879872, - "combineLogicalBytes": 140879872, - "fanoutMean": 4.79833984375, - "recvTokensMax": 1972, - "stragglerRank": 4, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 213.6639952659607, - "p90": 221.88800573349, - "p95": 222.91199862957, - "p99": 224.63999688625336 + "p50": 97.37599641084671, + "p90": 109.76000130176544, + "p95": 113.8560026884079, + "p99": 125.5359947681427 }, "combine": { - "p50": 326.4960050582886, - "p90": 335.55200695991516, - "p95": 336.8639945983887, - "p99": 396.9919979572296 + "p50": 81.727996468544, + "p90": 87.16800063848495, + "p95": 93.66399794816971, + "p99": 105.02400249242783 }, "roundtrip": { - "p50": 522.2079753875732, - "p90": 529.8879742622375, - "p95": 531.4239859580994, - "p99": 539.2640233039856 + "p50": 152.38399803638458, + "p90": 164.8319959640503, + "p95": 168.12799870967865, + "p99": 175.90400576591492 }, "isolatedSum": { - "p50": 540.1600003242493, - "p90": 557.4400126934052, - "p95": 559.7759932279587, - "p99": 621.631994843483 + "p50": 179.10399287939072, + "p90": 196.9280019402504, + "p95": 207.5200006365776, + "p99": 230.55999726057053 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 282333184, - "combineLogicalBytes": 282333184, - "fanoutMean": 4.80810546875, - "recvTokensMax": 3936, - "stragglerRank": 4, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 355.6160032749176, - "p90": 362.9760146141052, - "p95": 364.8639917373657, - "p99": 368.3519959449768 + "p50": 98.1760025024414, + "p90": 108.38399827480316, + "p95": 112.70400136709213, + "p99": 125.95200538635254 }, "combine": { - "p50": 569.4720149040222, - "p90": 572.7360248565674, - "p95": 580.7039737701416, - "p99": 594.1759943962097 + "p50": 83.03999900817871, + "p90": 86.91199868917465, + "p95": 92.3520028591156, + "p99": 97.18400239944458 }, "roundtrip": { - "p50": 920.2880263328552, - "p90": 929.0239810943604, - "p95": 936.7679953575134, - "p99": 955.5839896202087 + "p50": 155.10399639606476, + "p90": 167.1680063009262, + "p95": 170.30400037765503, + "p99": 176.64000391960144 }, "isolatedSum": { - "p50": 925.0880181789398, - "p90": 935.7120394706726, - "p95": 945.5679655075073, - "p99": 962.5279903411865 + "p50": 181.21600151062012, + "p90": 195.2959969639778, + "p95": 205.05600422620773, + "p99": 223.13600778579712 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 566716416, - "combineLogicalBytes": 566716416, - "fanoutMean": 4.8255615234375, - "recvTokensMax": 7855, - "stragglerRank": 4, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 658.6880087852478, - "p90": 667.9679751396179, - "p95": 669.2479848861694, - "p99": 679.6159744262695 + "p50": 97.95200079679489, + "p90": 109.02400314807892, + "p95": 112.22399771213531, + "p99": 118.01599711179733 }, "combine": { - "p50": 1048.1280088424683, - "p90": 1052.191972732544, - "p95": 1061.1519813537598, - "p99": 1171.712040901184 + "p50": 83.29600095748901, + "p90": 91.36000275611877, + "p95": 94.01600062847137, + "p99": 98.9760011434555 }, "roundtrip": { - "p50": 1691.648006439209, - "p90": 1700.1279592514038, - "p95": 1703.5839557647705, - "p99": 1764.7039890289307 + "p50": 156.00000321865082, + "p90": 168.5120016336441, + "p95": 172.06400632858276, + "p99": 178.3359944820404 }, "isolatedSum": { - "p50": 1706.816017627716, - "p90": 1720.1599478721619, - "p95": 1730.3999662399292, - "p99": 1851.3280153274536 + "p50": 181.2480017542839, + "p90": 200.3840059041977, + "p95": 206.2399983406067, + "p99": 216.99199825525284 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1132285952, - "combineLogicalBytes": 1132285952, - "fanoutMean": 4.8206787109375, - "recvTokensMax": 15694, - "stragglerRank": 0, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1285.375952720642, - "p90": 1298.6559867858887, - "p95": 1301.2160062789917, - "p99": 1428.063988685608 + "p50": 100.03200173377991, + "p90": 111.55200004577637, + "p95": 115.42399972677231, + "p99": 124.1919994354248 }, "combine": { - "p50": 2018.496036529541, - "p90": 2022.7839946746826, - "p95": 2031.3599109649658, - "p99": 2082.4639797210693 + "p50": 85.82399785518646, + "p90": 94.81599926948547, + "p95": 96.76799923181534, + "p99": 106.81600123643875 }, "roundtrip": { - "p50": 3294.048070907593, - "p90": 3308.799982070923, - "p95": 3315.9360885620117, - "p99": 3368.2239055633545 + "p50": 162.11199760437012, + "p90": 172.19200730323792, + "p95": 175.20000040531158, + "p99": 180.57599663734436 }, "isolatedSum": { - "p50": 3303.871989250183, - "p90": 3321.4399814605713, - "p95": 3332.5759172439575, - "p99": 3510.5279684066772 + "p50": 185.85599958896637, + "p90": 206.36799931526184, + "p95": 212.19199895858765, + "p99": 231.00800067186356 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2267840512, - "combineLogicalBytes": 2267840512, - "fanoutMean": 4.82763671875, - "recvTokensMax": 31357, - "stragglerRank": 4, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.36799848079681, + "p90": 119.93599683046341, + "p95": 122.46400117874146, + "p99": 131.74399733543396 + }, + "combine": { + "p50": 103.58399897813797, + "p90": 108.76800119876862, + "p95": 110.75200140476227, + "p99": 119.90399658679962 + }, + "roundtrip": { + "p50": 187.29600310325623, + "p90": 197.4399983882904, + "p95": 201.88799500465393, + "p99": 228.19200158119202 + }, + "isolatedSum": { + "p50": 213.95199745893478, + "p90": 228.70399802923203, + "p95": 233.21600258350372, + "p99": 251.64799392223358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.60800325870514, + "p90": 138.68799805641174, + "p95": 141.9840008020401, + "p99": 149.1519957780838 + }, + "combine": { + "p50": 134.0160071849823, + "p90": 142.91200041770935, + "p95": 144.57599818706512, + "p99": 154.84799444675446 + }, + "roundtrip": { + "p50": 235.00800132751465, + "p90": 243.20000410079956, + "p95": 245.9840029478073, + "p99": 251.64800882339478 + }, + "isolatedSum": { + "p50": 262.62401044368744, + "p90": 281.5999984741211, + "p95": 286.5599989891052, + "p99": 303.99999022483826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -24228,47 +23789,48 @@ ] }, { - "id": "cx-17599843", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", - "colorKey": "b300_5e3d915a", - "comparisonKey": "0397aa2abeee044f", + "id": "cx-c0b8c5b4", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||7eace9164e82cd6", + "colorKey": "gb300_961589b9", + "comparisonKey": "484727a851531c1a", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:19.827351+00:00", + "generatedAt": "2026-06-29T13:46:03.238873+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_05", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -24276,292 +23838,219 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "27ddc85ded0add9", - "workloadId": "set:6:a224603e5a1640b8", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.545684814453125, - "eplbImbalanceAfter": 1.0001495361328125, - "backendVersion": "1.2.1", + "traceSignature": "7eace9164e82cd6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285637742", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285637742", - "createdAt": "2026-06-27T09:49:19.827351+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 94.94400024414062, - "p90": 96.89600020647049, - "p95": 99.16800260543823, - "p99": 108.99200290441513 + "p50": 91.39200299978256, + "p90": 108.76800119876862, + "p95": 117.63200163841248, + "p99": 187.99999356269836 }, "combine": { - "p50": 115.4559999704361, - "p90": 116.80000275373459, - "p95": 117.76000261306763, - "p99": 127.6479959487915 + "p50": 70.23999840021133, + "p90": 74.94399696588516, + "p95": 81.11999928951263, + "p99": 110.46399921178818 }, "roundtrip": { - "p50": 193.6960071325302, - "p90": 199.2959976196289, - "p95": 201.75999402999878, - "p99": 233.11999440193176 + "p50": 139.16799426078796, + "p90": 152.67199277877808, + "p95": 158.33599865436554, + "p99": 228.70400547981262 }, "isolatedSum": { - "p50": 210.40000021457672, - "p90": 213.69600296020508, - "p95": 216.92800521850586, - "p99": 236.63999885320663 + "p50": 161.6320013999939, + "p90": 183.71199816465378, + "p95": 198.7520009279251, + "p99": 298.46399277448654 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78159872, - "combineLogicalBytes": 78159872, - "fanoutMean": 5.32421875, - "recvTokensMax": 702, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 132.25600123405457, - "p90": 138.62399756908417, - "p95": 140.06400108337402, - "p99": 148.28799664974213 + "p50": 96.67199850082397, + "p90": 112.96000331640244, + "p95": 138.11199367046356, + "p99": 177.91999876499176 }, "combine": { - "p50": 163.4880006313324, - "p90": 164.73600268363953, - "p95": 165.53600132465363, - "p99": 188.48000466823578 + "p50": 73.88799637556076, + "p90": 83.23200047016144, + "p95": 85.37600189447403, + "p99": 131.9359987974167 }, "roundtrip": { - "p50": 273.3759880065918, - "p90": 280.89600801467896, - "p95": 283.4239900112152, - "p99": 295.0400114059448 + "p50": 146.7519998550415, + "p90": 161.3440066576004, + "p95": 187.00799345970154, + "p99": 256.0960054397583 }, "isolatedSum": { - "p50": 295.74400186538696, - "p90": 303.3600002527237, - "p95": 305.60000240802765, - "p99": 336.7680013179779 + "p50": 170.55999487638474, + "p90": 196.19200378656387, + "p95": 223.4879955649376, + "p99": 309.85599756240845 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156563456, - "combineLogicalBytes": 156563456, - "fanoutMean": 5.33251953125, - "recvTokensMax": 1393, - "stragglerRank": 4, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 193.63200664520264, - "p90": 196.83200120925903, - "p95": 202.55999267101288, - "p99": 238.27199637889862 + "p50": 100.44799745082855, + "p90": 114.04799669981003, + "p95": 135.16800105571747, + "p99": 194.36800479888916 }, "combine": { - "p50": 264.384001493454, - "p90": 274.2399871349335, - "p95": 274.9119997024536, - "p99": 299.6160089969635 + "p50": 82.87999778985977, + "p90": 90.17600119113922, + "p95": 94.4959968328476, + "p99": 146.04799449443817 }, "roundtrip": { - "p50": 442.78401136398315, - "p90": 448.4800100326538, - "p95": 453.8559913635254, - "p99": 481.1199903488159 + "p50": 155.87200224399567, + "p90": 170.59199512004852, + "p95": 186.17600202560425, + "p99": 261.4400088787079 }, "isolatedSum": { - "p50": 458.0160081386566, - "p90": 471.0719883441925, - "p95": 477.4719923734665, - "p99": 537.8880053758621 + "p50": 183.32799524068832, + "p90": 204.22399789094925, + "p95": 229.66399788856506, + "p99": 340.41599929332733 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312410112, - "combineLogicalBytes": 312410112, - "fanoutMean": 5.3203125, - "recvTokensMax": 2773, - "stragglerRank": 4, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 326.33599638938904, - "p90": 329.21600341796875, - "p95": 330.1120102405548, - "p99": 349.08801317214966 + "p50": 123.45600128173828, + "p90": 139.8719996213913, + "p95": 160.70400178432465, + "p99": 219.32800114154816 }, "combine": { - "p50": 459.26401019096375, - "p90": 470.43201327323914, - "p95": 471.48799896240234, - "p99": 483.68000984191895 + "p50": 130.40000200271606, + "p90": 135.3279948234558, + "p95": 142.20799505710602, + "p99": 169.76000368595123 }, "roundtrip": { - "p50": 768.5440182685852, - "p90": 775.3919959068298, - "p95": 784.3199968338013, - "p99": 826.6239762306213 + "p50": 225.21600127220154, + "p90": 238.78400027751923, + "p95": 248.35200607776642, + "p99": 308.7039887905121 }, "isolatedSum": { - "p50": 785.6000065803528, - "p90": 799.6480166912079, - "p95": 801.6000092029572, - "p99": 832.7680230140686 + "p50": 253.85600328445435, + "p90": 275.1999944448471, + "p95": 302.91199684143066, + "p99": 389.0880048274994 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 622712832, - "combineLogicalBytes": 622712832, - "fanoutMean": 5.3023681640625, - "recvTokensMax": 5498, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 565.6960010528564, - "p90": 575.6480097770691, - "p95": 583.6799740791321, - "p99": 610.4320287704468 - }, - "combine": { - "p50": 815.7439827919006, - "p90": 827.8719782829285, - "p95": 830.6559920310974, - "p99": 852.6080250740051 - }, - "roundtrip": { - "p50": 1371.8719482421875, - "p90": 1386.7199420928955, - "p95": 1397.7919816970825, - "p99": 1450.4319429397583 - }, - "isolatedSum": { - "p50": 1381.439983844757, - "p90": 1403.5199880599976, - "p95": 1414.3359661102295, - "p99": 1463.040053844452 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1245038592, - "combineLogicalBytes": 1245038592, - "fanoutMean": 5.30072021484375, - "recvTokensMax": 10955, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1069.599986076355, - "p90": 1080.8639526367188, - "p95": 1088.8639688491821, - "p99": 1109.0879440307617 - }, - "combine": { - "p50": 1531.3600301742554, - "p90": 1552.8000593185425, - "p95": 1564.2240047454834, - "p99": 1616.8960332870483 - }, - "roundtrip": { - "p50": 2586.0159397125244, - "p90": 2608.6719036102295, - "p95": 2621.151924133301, - "p99": 2671.7441082000732 - }, - "isolatedSum": { - "p50": 2600.9600162506104, - "p90": 2633.6640119552612, - "p95": 2653.0879735946655, - "p99": 2725.98397731781 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2489460736, - "combineLogicalBytes": 2489460736, - "fanoutMean": 5.299407958984375, - "recvTokensMax": 21864, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 } ] }, { - "id": "cx-4c124953", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "b300_fdf55523", - "comparisonKey": "61f6ca66d0cc490b", + "id": "cx-33e9cd0d", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb300_db9a43b5", + "comparisonKey": "d24055c7960098e6", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:50.578369+00:00", + "generatedAt": "2026-06-29T13:52:52.053215+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-moderate", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -24569,244 +24058,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:6709a02c31933a9f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285646148", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285646148", - "createdAt": "2026-06-27T09:49:50.578369+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 100.73599964380264, - "p90": 103.16800326108932, - "p95": 105.12000322341919, - "p99": 109.53599959611893 + "p50": 98.49599748849869, + "p90": 111.64800077676773, + "p95": 115.03999680280685, + "p99": 126.91199779510498 }, "combine": { - "p50": 126.20800733566284, - "p90": 127.71199643611908, - "p95": 128.25599312782288, - "p99": 138.65600526332855 + "p50": 76.51200145483017, + "p90": 84.70399677753448, + "p95": 86.27200126647949, + "p99": 92.32000261545181 }, "roundtrip": { - "p50": 208.3200067281723, - "p90": 212.70400285720825, - "p95": 213.50400149822235, - "p99": 231.04000091552734 + "p50": 154.2080044746399, + "p90": 165.0560051202774, + "p95": 169.11999881267548, + "p99": 225.24799406528473 }, "isolatedSum": { - "p50": 226.94400697946548, - "p90": 230.8799996972084, - "p95": 233.37599635124207, - "p99": 248.19200485944748 + "p50": 175.00799894332886, + "p90": 196.35199755430222, + "p95": 201.31199806928635, + "p99": 219.2320004105568 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 6, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 135.93600690364838, - "p90": 139.39200341701508, - "p95": 140.83200693130493, - "p99": 151.7760008573532 + "p50": 98.39999675750732, + "p90": 111.64800077676773, + "p95": 116.83200299739838, + "p99": 131.9040060043335 }, "combine": { - "p50": 176.86399817466736, - "p90": 179.07199263572693, - "p95": 180.03199994564056, - "p99": 189.63199853897095 + "p50": 81.88799768686295, + "p90": 86.84799820184708, + "p95": 90.71999788284302, + "p99": 97.02400118112564 }, "roundtrip": { - "p50": 297.63200879096985, - "p90": 303.3599853515625, - "p95": 305.63199520111084, - "p99": 315.71200489997864 + "p50": 156.22399747371674, + "p90": 167.64800250530243, + "p95": 170.49600183963776, + "p99": 186.94399297237396 }, "isolatedSum": { - "p50": 312.80000507831573, - "p90": 318.463996052742, - "p95": 320.8640068769455, - "p99": 341.40799939632416 + "p50": 180.28799444437027, + "p90": 198.4959989786148, + "p95": 207.5520008802414, + "p99": 228.92800718545914 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 3, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 202.4639993906021, - "p90": 210.207998752594, - "p95": 211.2320065498352, - "p99": 216.76799654960632 + "p50": 99.45599734783173, + "p90": 111.77600175142288, + "p95": 116.99199676513672, + "p99": 135.83999872207642 }, "combine": { - "p50": 325.1520097255707, - "p90": 335.07201075553894, - "p95": 335.7760012149811, - "p99": 359.23200845718384 + "p50": 84.03199911117554, + "p90": 87.67999708652496, + "p95": 92.51199662685394, + "p99": 100.96000134944916 }, "roundtrip": { - "p50": 506.84797763824463, - "p90": 513.5999917984009, - "p95": 517.7599787712097, - "p99": 538.4640097618103 + "p50": 160.64000129699707, + "p90": 170.78399658203125, + "p95": 175.04000663757324, + "p99": 184.35199558734894 }, "isolatedSum": { - "p50": 527.6160091161728, - "p90": 545.2800095081329, - "p95": 547.0080077648163, - "p99": 576.0000050067902 + "p50": 183.48799645900726, + "p90": 199.45599883794785, + "p95": 209.50399339199066, + "p99": 236.80000007152557 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 7, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 347.55200147628784, - "p90": 352.9599905014038, - "p95": 354.0799915790558, - "p99": 366.2079870700836 + "p50": 100.35199671983719, + "p90": 112.92800307273865, + "p95": 116.41599982976913, + "p99": 124.95999783277512 }, "combine": { - "p50": 582.6560258865356, - "p90": 592.3839807510376, - "p95": 593.4720039367676, - "p99": 617.0560121536255 + "p50": 85.11999994516373, + "p90": 90.14400094747543, + "p95": 94.7519987821579, + "p99": 106.08000308275223 }, "roundtrip": { - "p50": 910.431981086731, - "p90": 917.8879857063293, - "p95": 920.0000166893005, - "p99": 955.6159973144531 + "p50": 162.30399906635284, + "p90": 172.2559928894043, + "p95": 175.84000527858734, + "p99": 182.75199830532074 }, "isolatedSum": { - "p50": 930.2080273628235, - "p90": 945.3439712524414, - "p95": 947.5519955158234, - "p99": 983.2639992237091 + "p50": 185.47199666500092, + "p90": 203.07200402021408, + "p95": 211.16799861192703, + "p99": 231.04000091552734 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 7, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 640.7679915428162, - "p90": 647.4559903144836, - "p95": 654.7520160675049, - "p99": 680.7680130004883 + "p50": 102.52799838781357, + "p90": 113.79200220108032, + "p95": 118.07999759912491, + "p99": 128.80000472068787 }, "combine": { - "p50": 1063.1359815597534, - "p90": 1072.8960037231445, - "p95": 1073.6639499664307, - "p99": 1096.384048461914 + "p50": 86.11200004816055, + "p90": 94.30400282144547, + "p95": 97.08800166845322, + "p99": 108.25599730014801 }, "roundtrip": { - "p50": 1693.8879489898682, - "p90": 1702.7519941329956, - "p95": 1707.0399522781372, - "p99": 1791.648030281067 + "p50": 163.2000058889389, + "p90": 172.83199727535248, + "p95": 176.256000995636, + "p99": 183.1039935350418 }, "isolatedSum": { - "p50": 1703.9039731025696, - "p90": 1720.3519940376282, - "p95": 1728.4159660339355, - "p99": 1777.1520614624023 + "p50": 188.63999843597412, + "p90": 208.0960050225258, + "p95": 215.16799926757812, + "p99": 237.05600202083588 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1251.9999742507935, - "p90": 1263.10396194458, - "p95": 1265.504002571106, - "p99": 1327.9999494552612 + "p50": 104.92800176143646, + "p90": 115.87200313806534, + "p95": 122.65600264072418, + "p99": 137.11999356746674 }, "combine": { - "p50": 2043.5841083526611, - "p90": 2046.623945236206, - "p95": 2055.6159019470215, - "p99": 2118.272066116333 + "p50": 92.03200042247772, + "p90": 97.69599884748459, + "p95": 99.32799637317657, + "p99": 105.18400371074677 }, "roundtrip": { - "p50": 3285.952091217041, - "p90": 3299.0078926086426, - "p95": 3308.896064758301, - "p99": 3355.7119369506836 + "p50": 168.67199540138245, + "p90": 179.71199750900269, + "p95": 183.3920031785965, + "p99": 199.39200580120087 }, "isolatedSum": { - "p50": 3295.5840826034546, - "p90": 3309.727907180786, - "p95": 3321.1199045181274, - "p99": 3446.2720155715942 + "p50": 196.96000218391418, + "p90": 213.56800198554993, + "p95": 221.98399901390076, + "p99": 242.3039972782135 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 3, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.73600161075592, + "p90": 122.04799801111221, + "p95": 127.26399302482605, + "p99": 146.40000462532043 + }, + "combine": { + "p50": 107.19999670982361, + "p90": 111.80800199508667, + "p95": 114.81600254774094, + "p99": 120.95999717712402 + }, + "roundtrip": { + "p50": 189.79200720787048, + "p90": 197.56799936294556, + "p95": 200.95999538898468, + "p99": 207.8399956226349 + }, + "isolatedSum": { + "p50": 219.93599832057953, + "p90": 233.85600000619888, + "p95": 242.079995572567, + "p99": 267.36000180244446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.5360028743744, + "p90": 137.7280056476593, + "p95": 141.15199446678162, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 124.25599992275238, + "p90": 132.1599930524826, + "p95": 134.14399325847626, + "p99": 137.63199746608734 + }, + "roundtrip": { + "p50": 226.55999660491943, + "p90": 236.25600337982178, + "p95": 239.3600046634674, + "p99": 247.3279982805252 + }, + "isolatedSum": { + "p50": 253.79200279712677, + "p90": 269.8879987001419, + "p95": 275.2959877252579, + "p99": 291.00799560546875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -24814,47 +24377,48 @@ ] }, { - "id": "cx-5c56d46f", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "b300_4eade0db", - "comparisonKey": "0fc5df79c3e0429b", + "id": "cx-f55a7c17", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb300_15a35db4", + "comparisonKey": "2d8b83ad658760e4", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:49:49.297184+00:00", + "generatedAt": "2026-06-29T13:43:00.976664+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_08", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb", - "model": "DeepSeek-V3 (EPLB physical)", + "label": "GB300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": true, + "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -24862,244 +24426,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:6709a02c31933a9f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285648797", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285648797", - "createdAt": "2026-06-27T09:49:49.297184+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 95.0080007314682, - "p90": 97.59999811649323, - "p95": 99.61599856615067, - "p99": 111.16799712181091 + "p50": 90.04800021648407, + "p90": 101.40799731016159, + "p95": 105.24799674749374, + "p99": 114.14399743080139 }, "combine": { - "p50": 115.29599875211716, - "p90": 116.95999652147293, - "p95": 118.8800036907196, - "p99": 139.52000439167023 + "p50": 79.48800176382065, + "p90": 83.77599716186523, + "p95": 86.01599931716919, + "p99": 100.09600222110748 }, "roundtrip": { - "p50": 193.24800372123718, - "p90": 199.42399859428406, - "p95": 200.70399343967438, - "p99": 229.08799350261688 + "p50": 146.464005112648, + "p90": 156.89599514007568, + "p95": 161.72799468040466, + "p99": 172.2240000963211 }, "isolatedSum": { - "p50": 210.30399948358536, - "p90": 214.55999463796616, - "p95": 218.49600225687027, - "p99": 250.68800151348114 + "p50": 169.53600198030472, + "p90": 185.18399447202682, + "p95": 191.26399606466293, + "p99": 214.23999965190887 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 4, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 137.56799697875977, - "p90": 140.35199582576752, - "p95": 142.2400027513504, - "p99": 159.58400070667267 + "p50": 91.16800129413605, + "p90": 101.9200012087822, + "p95": 106.6880002617836, + "p99": 114.33599889278412 }, "combine": { - "p50": 154.7520011663437, - "p90": 163.93600404262543, - "p95": 164.32000696659088, - "p99": 166.4000004529953 + "p50": 81.7599967122078, + "p90": 86.46400272846222, + "p95": 93.02400052547455, + "p99": 96.38399630784988 }, "roundtrip": { - "p50": 272.2559869289398, - "p90": 278.01600098609924, - "p95": 280.64000606536865, - "p99": 293.66400837898254 + "p50": 148.99200201034546, + "p90": 158.11200439929962, + "p95": 161.56800091266632, + "p99": 169.08800601959229 }, "isolatedSum": { - "p50": 292.31999814510345, - "p90": 304.28799986839294, - "p95": 306.5600097179413, - "p99": 325.98400115966797 + "p50": 172.92799800634384, + "p90": 188.38400393724442, + "p95": 199.71200078725815, + "p99": 210.719995200634 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 4, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 193.4719979763031, - "p90": 199.23199713230133, - "p95": 200.54399967193604, - "p99": 207.48800039291382 + "p50": 92.16000139713287, + "p90": 104.76800054311752, + "p95": 109.3439981341362, + "p99": 124.67200309038162 }, "combine": { - "p50": 265.79201221466064, - "p90": 274.52799677848816, - "p95": 274.9119997024536, - "p99": 285.8879864215851 + "p50": 81.91999793052673, + "p90": 87.67999708652496, + "p95": 91.90399944782257, + "p99": 96.3520035147667 }, "roundtrip": { - "p50": 444.2239999771118, - "p90": 450.5600035190582, - "p95": 459.3920111656189, - "p99": 474.016010761261 + "p50": 150.59199929237366, + "p90": 162.59199380874634, + "p95": 166.33599996566772, + "p99": 178.6240041255951 }, "isolatedSum": { - "p50": 459.26401019096375, - "p90": 473.7599939107895, - "p95": 475.45599937438965, - "p99": 493.3759868144989 + "p50": 174.0799993276596, + "p90": 192.4479976296425, + "p95": 201.24799758195877, + "p99": 221.02400660514832 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 4, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 325.0240087509155, - "p90": 329.8240005970001, - "p95": 331.0079872608185, - "p99": 350.14399886131287 + "p50": 93.47199648618698, + "p90": 103.93600165843964, + "p95": 108.0000028014183, + "p99": 115.90400338172913 }, "combine": { - "p50": 457.7920138835907, - "p90": 459.4239890575409, - "p95": 461.95200085639954, - "p99": 473.66398572921753 + "p50": 82.87999778985977, + "p90": 90.7519981265068, + "p95": 93.21600198745728, + "p99": 100.89600086212158 }, "roundtrip": { - "p50": 760.479986667633, - "p90": 767.7119970321655, - "p95": 772.2240090370178, - "p99": 781.9520235061646 + "p50": 153.6960005760193, + "p90": 163.5199934244156, + "p95": 168.03200542926788, + "p99": 176.32000148296356 }, "isolatedSum": { - "p50": 782.8160226345062, - "p90": 789.247989654541, - "p95": 792.959988117218, - "p99": 823.8079845905304 + "p50": 176.35199427604675, + "p90": 194.68799978494644, + "p95": 201.21600478887558, + "p99": 216.8000042438507 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 4, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 574.9120116233826, - "p90": 582.2719931602478, - "p95": 583.9999914169312, - "p99": 640.3840184211731 + "p50": 94.30400282144547, + "p90": 105.47199845314026, + "p95": 109.02400314807892, + "p99": 117.0239970088005 }, "combine": { - "p50": 830.016016960144, - "p90": 839.9360179901123, - "p95": 840.287983417511, - "p99": 852.4479866027832 + "p50": 84.44800227880478, + "p90": 91.45600348711014, + "p95": 93.28000247478485, + "p99": 98.88000041246414 }, "roundtrip": { - "p50": 1387.8079652786255, - "p90": 1396.7679738998413, - "p95": 1398.9759683609009, - "p99": 1455.1680088043213 + "p50": 155.2640050649643, + "p90": 166.4000004529953, + "p95": 169.40799355506897, + "p99": 179.6800047159195 }, "isolatedSum": { - "p50": 1404.9280285835266, - "p90": 1422.20801115036, - "p95": 1424.2879748344421, - "p99": 1492.8320050239563 + "p50": 178.75200510025024, + "p90": 196.9280019402504, + "p95": 202.30400562286377, + "p99": 215.90399742126465 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 4, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1065.2480125427246, - "p90": 1072.6079940795898, - "p95": 1079.2319774627686, - "p99": 1102.720022201538 + "p50": 99.07200187444687, + "p90": 109.6000000834465, + "p95": 113.21599781513214, + "p99": 132.4159950017929 }, "combine": { - "p50": 1539.5840406417847, - "p90": 1542.464017868042, - "p95": 1552.2559881210327, - "p99": 1614.7840023040771 + "p50": 93.24800223112106, + "p90": 97.08800166845322, + "p95": 101.6319990158081, + "p99": 108.67200046777725 }, "roundtrip": { - "p50": 2586.3358974456787, - "p90": 2598.720073699951, - "p95": 2605.4399013519287, - "p99": 2665.247917175293 + "p50": 163.455992937088, + "p90": 172.12800681591034, + "p95": 175.00799894332886, + "p99": 187.16800212860107 }, "isolatedSum": { - "p50": 2604.8320531845093, - "p90": 2615.072011947632, - "p95": 2631.4879655838013, - "p99": 2717.5040245056152 + "p50": 192.32000410556793, + "p90": 206.68800175189972, + "p95": 214.84799683094025, + "p99": 241.08799546957016 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 4, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.20799726247787, + "p90": 117.66400188207626, + "p95": 121.0239976644516, + "p99": 127.13600695133209 + }, + "combine": { + "p50": 105.85600137710571, + "p90": 110.01600325107574, + "p95": 115.13599753379822, + "p99": 120.54400146007538 + }, + "roundtrip": { + "p50": 187.83999979496002, + "p90": 196.31999731063843, + "p95": 199.48799908161163, + "p99": 208.639994263649 + }, + "isolatedSum": { + "p50": 216.0639986395836, + "p90": 227.680005133152, + "p95": 236.15999519824982, + "p99": 247.68000841140747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.51199507713318, + "p90": 136.99199259281158, + "p95": 139.42399621009827, + "p99": 148.28799664974213 + }, + "combine": { + "p50": 134.20799374580383, + "p90": 142.46399700641632, + "p95": 144.51199769973755, + "p99": 151.90400183200836 + }, + "roundtrip": { + "p50": 238.5600060224533, + "p90": 246.2719976902008, + "p95": 249.63200092315674, + "p99": 260.1599991321564 + }, + "isolatedSum": { + "p50": 262.719988822937, + "p90": 279.4559895992279, + "p95": 283.9359939098358, + "p99": 300.1919984817505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -25107,47 +24745,48 @@ ] }, { - "id": "cx-fb4f7eef", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "b300_f1ea991b", - "comparisonKey": "c5288b3181a71a36", + "id": "cx-9d14c709", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb300_46b172da", + "comparisonKey": "23a6c8c598f2838f", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:48:56.789691+00:00", + "generatedAt": "2026-06-29T13:43:59.087832+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_13", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 · zipf+eplb", + "label": "GB300 EP8 · deepep · bf16 · zipf-mild+eplb", "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -25155,243 +24794,317 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285627928", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285627928", - "createdAt": "2026-06-27T09:48:56.789691+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 94.43199634552002, - "p90": 96.73599898815155, - "p95": 99.0080013871193, - "p99": 112.5119999051094 + "p50": 98.36799651384354, + "p90": 110.52799969911575, + "p95": 113.6000007390976, + "p99": 123.48800152540207 }, "combine": { - "p50": 115.35999923944473, - "p90": 116.22399836778641, - "p95": 117.37599968910217, - "p99": 128.4160017967224 + "p50": 81.24800026416779, + "p90": 85.15200018882751, + "p95": 87.96799927949905, + "p99": 99.58399832248688 }, "roundtrip": { - "p50": 195.71200013160706, - "p90": 200.51200687885284, - "p95": 201.31200551986694, - "p99": 211.61599457263947 + "p50": 154.88000214099884, + "p90": 166.33599996566772, + "p95": 170.9119975566864, + "p99": 182.43199586868286 }, "isolatedSum": { - "p50": 209.79199558496475, - "p90": 212.95999735593796, - "p95": 216.38400107622147, - "p99": 240.92800170183182 + "p50": 179.61599677801132, + "p90": 195.67999988794327, + "p95": 201.56800001859665, + "p99": 223.07199984788895 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 4, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 137.40800321102142, - "p90": 139.90400731563568, - "p95": 140.83200693130493, - "p99": 150.176003575325 - }, - "combine": { - "p50": 153.85599434375763, - "p90": 163.2319986820221, - "p95": 163.7440025806427, - "p99": 175.6799966096878 + "p50": 101.15200281143188, + "p90": 114.01599645614624, + "p95": 119.48800086975098, + "p99": 142.59199798107147 + }, + "combine": { + "p50": 83.8719978928566, + "p90": 89.37600255012512, + "p95": 92.92799979448318, + "p99": 98.65599870681763 }, "roundtrip": { - "p50": 272.09600806236267, - "p90": 277.15200185775757, - "p95": 278.4639894962311, - "p99": 286.46400570869446 + "p50": 158.24000537395477, + "p90": 171.10399901866913, + "p95": 174.8799979686737, + "p99": 182.72000551223755 }, "isolatedSum": { - "p50": 291.26399755477905, - "p90": 303.1360059976578, - "p95": 304.57600951194763, - "p99": 325.8560001850128 + "p50": 185.02400070428848, + "p90": 203.39199900627136, + "p95": 212.41600066423416, + "p99": 241.2479966878891 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 4, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 193.53599846363068, - "p90": 200.8959949016571, - "p95": 202.2079974412918, - "p99": 214.84799683094025 + "p50": 99.5199978351593, + "p90": 112.03200370073318, + "p95": 116.38399958610535, + "p99": 130.3039938211441 }, "combine": { - "p50": 265.1839852333069, - "p90": 274.1760015487671, - "p95": 274.78399872779846, - "p99": 279.04000878334045 + "p50": 84.03199911117554, + "p90": 91.16800129413605, + "p95": 93.91999989748001, + "p99": 103.84000092744827 }, "roundtrip": { - "p50": 440.8000111579895, - "p90": 447.7440118789673, - "p95": 449.15199279785156, - "p99": 459.03998613357544 + "p50": 159.58400070667267, + "p90": 172.60800302028656, + "p95": 175.74399709701538, + "p99": 189.2479956150055 }, "isolatedSum": { - "p50": 458.71998369693756, - "p90": 475.0719964504242, - "p95": 476.99199616909027, - "p99": 493.8880056142807 + "p50": 183.55199694633484, + "p90": 203.20000499486923, + "p95": 210.30399948358536, + "p99": 234.14399474859238 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 4, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 325.6959915161133, - "p90": 330.3680121898651, - "p95": 331.84000849723816, - "p99": 342.72000193595886 + "p50": 100.09600222110748, + "p90": 112.92800307273865, + "p95": 118.27199906110764, + "p99": 129.56799566745758 }, "combine": { - "p50": 450.3360092639923, - "p90": 459.3920111656189, - "p95": 460.4479968547821, - "p99": 472.6080000400543 + "p50": 84.95999872684479, + "p90": 93.72799843549728, + "p95": 96.47999703884125, + "p99": 103.35999727249146 }, "roundtrip": { - "p50": 759.4239711761475, - "p90": 766.2720084190369, - "p95": 770.3679800033569, - "p99": 786.6560220718384 + "p50": 162.49600052833557, + "p90": 172.2559928894043, + "p95": 175.64800381660461, + "p99": 183.61599743366241 }, "isolatedSum": { - "p50": 776.0320007801056, - "p90": 789.760023355484, - "p95": 792.2880053520203, - "p99": 815.3280019760132 + "p50": 185.05600094795227, + "p90": 206.65600150823593, + "p95": 214.75199609994888, + "p99": 232.92799293994904 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 4, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 574.4640231132507, - "p90": 581.9839835166931, - "p95": 583.9359760284424, - "p99": 595.9039926528931 + "p50": 100.70399940013885, + "p90": 112.89600282907486, + "p95": 116.64000153541565, + "p99": 131.071999669075 }, "combine": { - "p50": 828.9600014686584, - "p90": 839.9680256843567, - "p95": 840.4160141944885, - "p99": 851.9359827041626 + "p50": 86.04799956083298, + "p90": 94.71999853849411, + "p95": 96.38399630784988, + "p99": 100.99200159311295 }, "roundtrip": { - "p50": 1387.0079517364502, - "p90": 1396.83198928833, - "p95": 1399.5200395584106, - "p99": 1415.1999950408936 + "p50": 164.99200463294983, + "p90": 175.77600479125977, + "p95": 179.71199750900269, + "p99": 195.51999866962433 }, "isolatedSum": { - "p50": 1403.4240245819092, - "p90": 1421.9520092010498, - "p95": 1424.351990222931, - "p99": 1447.8399753570557 + "p50": 186.75199896097183, + "p90": 207.61600136756897, + "p95": 213.02399784326553, + "p99": 232.06400126218796 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1063.904047012329, - "p90": 1069.216012954712, - "p95": 1072.543978691101, - "p99": 1081.1200141906738 + "p50": 103.74400019645691, + "p90": 114.72000181674957, + "p95": 117.88800358772278, + "p99": 125.5359947681427 }, "combine": { - "p50": 1530.303955078125, - "p90": 1540.4800176620483, - "p95": 1541.9520139694214, - "p99": 1576.799988746643 + "p50": 94.27200257778168, + "p90": 98.81599992513657, + "p95": 102.62399911880493, + "p99": 115.39199948310852 }, "roundtrip": { - "p50": 2580.832004547119, - "p90": 2592.2560691833496, - "p95": 2598.8481044769287, - "p99": 2691.8399333953857 + "p50": 170.23999989032745, + "p90": 180.7360053062439, + "p95": 184.12800133228302, + "p99": 192.89599359035492 }, "isolatedSum": { - "p50": 2594.208002090454, - "p90": 2609.6960306167603, - "p95": 2614.4959926605225, - "p99": 2657.920002937317 + "p50": 198.0160027742386, + "p90": 213.53600174188614, + "p95": 220.5120027065277, + "p99": 240.92799425125122 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.27999830245972, + "p90": 122.01599776744843, + "p95": 124.60800260305405, + "p99": 134.5279961824417 + }, + "combine": { + "p50": 108.03200304508209, + "p90": 111.90400272607803, + "p95": 116.41599982976913, + "p99": 121.95199728012085 + }, + "roundtrip": { + "p50": 191.00800156593323, + "p90": 200.95999538898468, + "p95": 204.79999482631683, + "p99": 214.20800685882568 + }, + "isolatedSum": { + "p50": 221.3120013475418, + "p90": 233.92000049352646, + "p95": 241.02400243282318, + "p99": 256.47999346256256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.18399274349213, + "p90": 138.047993183136, + "p95": 141.4400041103363, + "p99": 150.04800260066986 + }, + "combine": { + "p50": 123.90399724245071, + "p90": 132.6719969511032, + "p95": 134.46399569511414, + "p99": 142.0159935951233 + }, + "roundtrip": { + "p50": 227.7120053768158, + "p90": 236.35199666023254, + "p95": 238.8480007648468, + "p99": 244.32000517845154 + }, + "isolatedSum": { + "p50": 253.08798998594284, + "p90": 270.7199901342392, + "p95": 275.90399980545044, + "p99": 292.06399619579315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, "stragglerRank": 6, "correct": true, "samplesPooled": 600, @@ -25400,878 +25113,735 @@ ] }, { - "id": "cx-e7727ce9", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_c1ad910f", - "comparisonKey": "9532205a80f3d757", + "id": "cx-e40c9223", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_6e04dda3", + "comparisonKey": "a225bda519f2d24b", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:38:48.516779+00:00", + "generatedAt": "2026-06-29T13:47:11.009762+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm)", + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254469772", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", - "createdAt": "2026-06-26T17:38:48.516779+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 94.11200135946274, - "p90": 98.9760011434555, - "p95": 100.54399818181992, - "p99": 116.44800007343292 + "p50": 91.93599969148636, + "p90": 106.33599758148193, + "p95": 110.944002866745, + "p99": 121.0239976644516 }, "combine": { - "p50": 115.1999980211258, - "p90": 115.9679964184761, - "p95": 116.89600348472595, - "p99": 129.02399897575378 + "p50": 77.2479996085167, + "p90": 83.39200168848038, + "p95": 85.24800091981888, + "p99": 94.7519987821579 }, "roundtrip": { - "p50": 193.2159960269928, - "p90": 198.43199849128723, - "p95": 199.8080015182495, - "p99": 217.50399470329285 + "p50": 148.51200580596924, + "p90": 160.16000509262085, + "p95": 163.7440025806427, + "p99": 172.89599776268005 }, "isolatedSum": { - "p50": 209.31199938058853, - "p90": 214.9439975619316, - "p95": 217.44000166654587, - "p99": 245.4719990491867 + "p50": 169.18399930000305, + "p90": 189.7279992699623, + "p95": 196.19200378656387, + "p99": 215.7759964466095 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 135.42400300502777, - "p90": 138.75199854373932, - "p95": 141.184002161026, - "p99": 151.0079950094223 + "p50": 92.25600212812424, + "p90": 105.05600273609161, + "p95": 109.79200154542923, + "p99": 123.00799787044525 }, "combine": { - "p50": 154.59200739860535, - "p90": 163.90399634838104, - "p95": 164.5440012216568, - "p99": 176.54399573802948 + "p50": 80.19199967384338, + "p90": 83.96799862384796, + "p95": 86.33600175380707, + "p99": 95.74399888515472 }, "roundtrip": { - "p50": 271.67999744415283, - "p90": 277.6319980621338, - "p95": 280.70399165153503, - "p99": 291.3599908351898 + "p50": 147.77599275112152, + "p90": 159.36000645160675, + "p95": 164.38399255275726, + "p99": 175.23199319839478 }, "isolatedSum": { - "p50": 290.0160104036331, - "p90": 302.65599489212036, - "p95": 305.7280033826828, - "p99": 327.5519907474518 + "p50": 172.44800180196762, + "p90": 189.02400135993958, + "p95": 196.1280032992363, + "p99": 218.75199675559998 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 193.24800372123718, - "p90": 199.61600005626678, - "p95": 200.80000162124634, - "p99": 206.68800175189972 + "p50": 94.33600306510925, + "p90": 107.2319969534874, + "p95": 113.47199976444244, + "p99": 124.7360035777092 }, "combine": { - "p50": 265.8880054950714, - "p90": 274.59201216697693, - "p95": 275.2000093460083, - "p99": 286.78399324417114 + "p50": 80.99199831485748, + "p90": 84.95999872684479, + "p95": 89.31200206279755, + "p99": 120.12799829244614 }, "roundtrip": { - "p50": 442.59199500083923, - "p90": 448.96000623703003, - "p95": 455.00800013542175, - "p99": 461.40798926353455 + "p50": 150.07999539375305, + "p90": 161.0880047082901, + "p95": 164.19200599193573, + "p99": 173.98400604724884 }, "isolatedSum": { - "p50": 459.1360092163086, - "p90": 474.2080122232437, - "p95": 476.00001096725464, - "p99": 493.47199499607086 + "p50": 175.32800137996674, + "p90": 192.19199568033218, + "p95": 202.78400182724, + "p99": 244.86400187015533 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 326.2079954147339, - "p90": 329.75998520851135, - "p95": 331.6799998283386, - "p99": 341.6000008583069 + "p50": 95.74399888515472, + "p90": 107.35999792814255, + "p95": 112.44799941778183, + "p99": 123.96799772977829 }, "combine": { - "p50": 457.66401290893555, - "p90": 459.77601408958435, - "p95": 469.760000705719, - "p99": 473.7600088119507 + "p50": 82.40000158548355, + "p90": 87.0399996638298, + "p95": 92.76799857616425, + "p99": 97.37599641084671 }, "roundtrip": { - "p50": 762.5920176506042, - "p90": 771.7440128326416, - "p95": 774.2080092430115, - "p99": 789.6320223808289 + "p50": 151.48800611495972, + "p90": 164.000004529953, + "p95": 167.9999977350235, + "p99": 179.00800704956055 }, "isolatedSum": { - "p50": 783.8720083236694, - "p90": 789.5359992980957, - "p95": 801.4400005340576, - "p99": 815.3600096702576 + "p50": 178.14400047063828, + "p90": 194.39999759197235, + "p95": 205.21599799394608, + "p99": 221.343994140625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 577.1200060844421, - "p90": 582.5920104980469, - "p95": 583.5520029067993, - "p99": 591.2960171699524 + "p50": 96.3200032711029, + "p90": 108.15999656915665, + "p95": 113.34399878978729, + "p99": 134.5600038766861 }, "combine": { - "p50": 817.2799944877625, - "p90": 828.4159898757935, - "p95": 831.8719863891602, - "p99": 913.4079813957214 + "p50": 83.45600217580795, + "p90": 87.71199733018875, + "p95": 92.6079973578453, + "p99": 97.08800166845322 }, "roundtrip": { - "p50": 1376.9279718399048, - "p90": 1386.9119882583618, - "p95": 1392.7680253982544, - "p99": 1453.8240432739258 + "p50": 155.61600029468536, + "p90": 167.04000532627106, + "p95": 171.36000096797943, + "p99": 181.69599771499634 }, "isolatedSum": { - "p50": 1394.4000005722046, - "p90": 1411.0080003738403, - "p95": 1415.4239892959595, - "p99": 1504.7039985656738 + "p50": 179.77600544691086, + "p90": 195.8719938993454, + "p95": 205.9519961476326, + "p99": 231.6480055451393 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1069.5040225982666, - "p90": 1078.0160427093506, - "p95": 1080.2559852600098, - "p99": 1090.880036354065 + "p50": 97.21600264310837, + "p90": 107.42399841547012, + "p95": 110.81600189208984, + "p99": 119.32799965143204 }, "combine": { - "p50": 1528.8959741592407, - "p90": 1540.4479503631592, - "p95": 1542.688012123108, - "p99": 1554.751992225647 + "p50": 87.52000331878662, + "p90": 95.90400010347366, + "p95": 97.47199714183807, + "p99": 105.6319996714592 }, "roundtrip": { - "p50": 2581.9520950317383, - "p90": 2594.6240425109863, - "p95": 2602.303981781006, - "p99": 2637.9199028015137 + "p50": 162.08000481128693, + "p90": 172.57599532604218, + "p95": 176.09600722789764, + "p99": 182.01600015163422 }, "isolatedSum": { - "p50": 2598.3999967575073, - "p90": 2618.4639930725098, - "p95": 2622.9439973831177, - "p99": 2645.632028579712 + "p50": 184.736005961895, + "p90": 203.3279985189438, + "p95": 208.28799903392792, + "p99": 224.95999932289124 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.47999835014343, + "p90": 113.69600147008896, + "p95": 117.11999773979187, + "p99": 125.08800625801086 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 108.76800119876862, + "p95": 110.36799848079681, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 186.52799725532532, + "p90": 194.7840005159378, + "p95": 197.37599790096283, + "p99": 208.99200439453125 + }, + "isolatedSum": { + "p50": 207.5520008802414, + "p90": 222.46400266885757, + "p95": 227.48799622058868, + "p99": 246.08000367879868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.47199428081512, + "p90": 134.11200046539307, + "p95": 136.63999736309052, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 133.760005235672, + "p90": 142.14399456977844, + "p95": 144.896000623703, + "p99": 150.87999403476715 + }, + "roundtrip": { + "p50": 236.60799860954285, + "p90": 243.96799504756927, + "p95": 247.26399779319763, + "p99": 255.0719976425171 + }, + "isolatedSum": { + "p50": 259.2319995164871, + "p90": 276.2559950351715, + "p95": 281.5359979867935, + "p99": 295.3279912471771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-5fd5a06c", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", - "colorKey": "b300_0622d929", - "comparisonKey": "8c83b99af9d27709", + "id": "cx-3ab662a4", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_04de5a5b", + "comparisonKey": "3a5f0bb6e0d0b96c", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:11:00.153293+00:00", + "generatedAt": "2026-06-29T13:48:08.393602+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_10", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · balanced", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254508907", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907", - "createdAt": "2026-06-26T18:11:00.153293+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 105.79200088977814, - "p90": 108.83200168609619, - "p95": 111.00800335407257, - "p99": 118.9119964838028 + "p50": 96.6079980134964, + "p90": 120.19199877977371, + "p95": 128.09599936008453, + "p99": 145.28000354766846 }, "combine": { - "p50": 130.0159990787506, - "p90": 139.20000195503235, - "p95": 139.74399864673615, - "p99": 150.84800124168396 + "p50": 80.4160013794899, + "p90": 84.76799726486206, + "p95": 86.97599917650223, + "p99": 96.3520035147667 }, "roundtrip": { - "p50": 228.38400304317474, - "p90": 234.65600609779358, - "p95": 235.61599850654602, - "p99": 252.28801369667053 + "p50": 151.13599598407745, + "p90": 162.84799575805664, + "p95": 167.61599481105804, + "p99": 174.97600615024567 }, "isolatedSum": { - "p50": 235.80799996852875, - "p90": 248.03200364112854, - "p95": 250.75200200080872, - "p99": 269.75999772548676 + "p50": 177.0239993929863, + "p90": 204.95999604463577, + "p95": 215.07199853658676, + "p99": 241.63200706243515 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 7, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 159.36000645160675, - "p90": 162.56000101566315, - "p95": 163.90399634838104, - "p99": 170.59199512004852 + "p50": 93.53599697351456, + "p90": 108.57599973678589, + "p95": 112.5440001487732, + "p99": 122.17599898576736 }, "combine": { - "p50": 201.34399831295013, - "p90": 203.96800339221954, - "p95": 211.45600080490112, - "p99": 224.86400604248047 + "p50": 82.8159973025322, + "p90": 88.3840024471283, + "p95": 94.91200000047684, + "p99": 96.99200093746185 }, "roundtrip": { - "p50": 334.879994392395, - "p90": 340.03201127052307, - "p95": 342.0479893684387, - "p99": 360.28799414634705 + "p50": 152.79999375343323, + "p90": 172.28800058364868, + "p95": 193.82399320602417, + "p99": 212.6079946756363 }, "isolatedSum": { - "p50": 360.7040047645569, - "p90": 366.5280044078827, - "p95": 375.35999715328217, - "p99": 395.456001162529 + "p50": 176.35199427604675, + "p90": 196.96000218391418, + "p95": 207.45600014925003, + "p99": 219.16799992322922 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 7, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 232.67200589179993, - "p90": 240.76800048351288, - "p95": 244.60799992084503, - "p99": 252.22399830818176 + "p50": 93.79199892282486, + "p90": 107.42399841547012, + "p95": 110.01600325107574, + "p99": 119.35999989509583 }, "combine": { - "p50": 338.01600337028503, - "p90": 347.8719890117645, - "p95": 348.7040102481842, - "p99": 361.407995223999 + "p50": 83.03999900817871, + "p90": 87.71199733018875, + "p95": 93.79199892282486, + "p99": 99.32799637317657 }, "roundtrip": { - "p50": 553.9519786834717, - "p90": 560.2239966392517, - "p95": 564.3839836120605, - "p99": 589.8879766464233 + "p50": 154.27200496196747, + "p90": 167.4560010433197, + "p95": 171.10399901866913, + "p99": 189.82400000095367 }, "isolatedSum": { - "p50": 570.688009262085, - "p90": 588.6399894952774, - "p95": 593.3120101690292, - "p99": 613.6319935321808 + "p50": 176.83199793100357, + "p90": 195.13599574565887, + "p95": 203.8080021739006, + "p99": 218.6879962682724 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 7, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 409.5360040664673, - "p90": 415.0719940662384, - "p95": 416.76801443099976, - "p99": 433.50398540496826 + "p50": 97.18400239944458, + "p90": 109.8880022764206, + "p95": 113.40799927711487, + "p99": 124.32000041007996 }, "combine": { - "p50": 594.3359732627869, - "p90": 599.7120141983032, - "p95": 606.2399744987488, - "p99": 619.2640066146851 + "p50": 84.76799726486206, + "p90": 93.91999989748001, + "p95": 96.09600156545639, + "p99": 104.44799810647964 }, "roundtrip": { - "p50": 986.1119985580444, - "p90": 993.5680031776428, - "p95": 998.8160133361816, - "p99": 1015.8400535583496 + "p50": 158.27199816703796, + "p90": 171.03999853134155, + "p95": 175.35999417304993, + "p99": 183.67999792099 }, "isolatedSum": { - "p50": 1003.8719773292542, - "p90": 1014.7840082645416, - "p95": 1023.0079889297485, - "p99": 1052.7679920196533 + "p50": 181.95199966430664, + "p90": 203.8080021739006, + "p95": 209.50400084257126, + "p99": 228.7679985165596 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 7, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 756.384015083313, - "p90": 767.3280239105225, - "p95": 769.6639895439148, - "p99": 787.7439856529236 + "p50": 97.69599884748459, + "p90": 109.92000252008438, + "p95": 114.75200206041336, + "p99": 137.11999356746674 }, "combine": { - "p50": 1112.671971321106, - "p90": 1122.8480339050293, - "p95": 1133.6640119552612, - "p99": 1208.4800004959106 + "p50": 85.08799970149994, + "p90": 92.44800359010696, + "p95": 95.13600170612335, + "p99": 105.98400235176086 }, "roundtrip": { - "p50": 1856.0960292816162, - "p90": 1870.6879615783691, - "p95": 1877.087950706482, - "p99": 1941.5040016174316 + "p50": 159.61599349975586, + "p90": 171.58399522304535, + "p95": 174.94399845600128, + "p99": 185.08799374103546 }, "isolatedSum": { - "p50": 1869.055986404419, - "p90": 1890.1760578155518, - "p95": 1903.328001499176, - "p99": 1996.2239861488342 + "p50": 182.78399854898453, + "p90": 202.36800611019135, + "p95": 209.8880037665367, + "p99": 243.1039959192276 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1458.8799476623535, - "p90": 1475.0720262527466, - "p95": 1481.4079999923706, - "p99": 1536.8640422821045 - }, - "combine": { - "p50": 2142.047882080078, - "p90": 2154.560089111328, - "p95": 2158.9438915252686, - "p99": 2215.9039974212646 - }, - "roundtrip": { - "p50": 3584.160089492798, - "p90": 3605.760097503662, - "p95": 3613.152027130127, - "p99": 3669.503927230835 - }, - "isolatedSum": { - "p50": 3600.9278297424316, - "p90": 3629.6321153640747, - "p95": 3640.351891517639, - "p99": 3752.768039703369 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6620cae5", - "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", - "colorKey": "b300_01ab5b1a", - "comparisonKey": "5702bf02b3927f32", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:38:15.541333+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271231753", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753", - "createdAt": "2026-06-26T23:38:15.541333+00:00", - "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 101.3759970664978, - "p90": 104.76800054311752, - "p95": 106.01600259542465, - "p99": 111.90400272607803 - }, - "combine": { - "p50": 126.11199915409088, - "p90": 127.3919939994812, - "p95": 127.83999741077423, - "p99": 129.18399274349213 - }, - "roundtrip": { - "p50": 207.8080028295517, - "p90": 212.6079946756363, - "p95": 213.69600296020508, - "p99": 224.2559939622879 - }, - "isolatedSum": { - "p50": 227.48799622058868, - "p90": 232.15999454259872, - "p95": 233.85600000619888, - "p99": 241.08799546957016 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 136.31999492645264, - "p90": 139.80799913406372, - "p95": 140.86399972438812, - "p99": 150.43200552463531 - }, - "combine": { - "p50": 176.35199427604675, - "p90": 178.78399789333344, - "p95": 180.03199994564056, - "p99": 188.60800564289093 - }, - "roundtrip": { - "p50": 297.5679934024811, - "p90": 303.45600843429565, - "p95": 306.46398663520813, - "p99": 319.2960023880005 - }, - "isolatedSum": { - "p50": 312.6719892024994, - "p90": 318.59199702739716, - "p95": 320.8959996700287, - "p99": 339.04001116752625 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 203.90400290489197, - "p90": 211.58400177955627, - "p95": 212.51200139522552, - "p99": 223.32799434661865 - }, - "combine": { - "p50": 325.1839876174927, - "p90": 335.55200695991516, - "p95": 335.80800890922546, - "p99": 337.8559947013855 - }, - "roundtrip": { - "p50": 506.20800256729126, - "p90": 514.4960284233093, - "p95": 519.7759866714478, - "p99": 534.0160131454468 - }, - "isolatedSum": { - "p50": 529.0879905223846, - "p90": 547.1360087394714, - "p95": 548.320010304451, - "p99": 561.1839890480042 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 348.86398911476135, - "p90": 353.40800881385803, - "p95": 354.7520041465759, - "p99": 364.22398686408997 + "p50": 101.6319990158081, + "p90": 112.96000331640244, + "p95": 116.44800007343292, + "p99": 121.85599654912949 }, "combine": { - "p50": 582.4000239372253, - "p90": 585.9519839286804, - "p95": 593.0879712104797, - "p99": 594.5919752120972 + "p50": 93.02400052547455, + "p90": 97.50399738550186, + "p95": 100.63999891281128, + "p99": 108.0000028014183 }, "roundtrip": { - "p50": 909.5680117607117, - "p90": 917.2160029411316, - "p95": 918.5600280761719, - "p99": 924.127995967865 + "p50": 163.93600404262543, + "p90": 175.55199563503265, + "p95": 179.71199750900269, + "p99": 188.48000466823578 }, "isolatedSum": { - "p50": 931.2640130519867, - "p90": 939.3599927425385, - "p95": 947.8399753570557, - "p99": 958.8159620761871 + "p50": 194.65599954128265, + "p90": 210.4640007019043, + "p95": 217.0879989862442, + "p99": 229.8559993505478 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 7, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 641.8560147285461, - "p90": 648.639976978302, - "p95": 655.135989189148, - "p99": 660.256028175354 + "p50": 111.23199760913849, + "p90": 121.85599654912949, + "p95": 126.8479973077774, + "p99": 160.35200655460358 }, "combine": { - "p50": 1062.7520084381104, - "p90": 1072.7039575576782, - "p95": 1073.4080076217651, - "p99": 1076.5119791030884 + "p50": 107.58399963378906, + "p90": 115.39199948310852, + "p95": 118.8800036907196, + "p99": 122.52800166606903 }, "roundtrip": { - "p50": 1693.343997001648, - "p90": 1700.6080150604248, - "p95": 1702.847957611084, - "p99": 1706.6559791564941 + "p50": 189.15200233459473, + "p90": 197.66399264335632, + "p95": 201.1519968509674, + "p99": 207.61600136756897 }, "isolatedSum": { - "p50": 1704.6080231666565, - "p90": 1721.3439345359802, - "p95": 1728.543996810913, - "p99": 1736.7680072784424 + "p50": 218.81599724292755, + "p90": 237.247996032238, + "p95": 245.728000998497, + "p99": 282.8800082206726 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1252.1920204162598, - "p90": 1262.719988822937, - "p95": 1264.7360563278198, - "p99": 1276.8640518188477 + "p50": 126.46399438381195, + "p90": 136.9280070066452, + "p95": 140.00000059604645, + "p99": 148.03199470043182 }, "combine": { - "p50": 2043.4560775756836, - "p90": 2045.151948928833, - "p95": 2047.1999645233154, - "p99": 2067.392110824585 + "p50": 124.03199821710587, + "p90": 132.7359974384308, + "p95": 133.63200426101685, + "p99": 135.5839967727661 }, "roundtrip": { - "p50": 3284.6720218658447, - "p90": 3295.1040267944336, - "p95": 3299.0400791168213, - "p99": 3313.3440017700195 + "p50": 225.92000663280487, + "p90": 235.20000278949738, + "p95": 240.31999707221985, + "p99": 262.81601190567017 }, "isolatedSum": { - "p50": 3295.6480979919434, - "p90": 3307.87193775177, - "p95": 3311.9360208511353, - "p99": 3344.2561626434326 + "p50": 250.49599260091782, + "p90": 269.664004445076, + "p95": 273.6320048570633, + "p99": 283.61599147319794 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 7, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -26279,28 +25849,28 @@ ] }, { - "id": "cx-9b7dbfc5", - "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", - "colorKey": "b300_085c12d4", - "comparisonKey": "afb8d29f702ca3c1", + "id": "cx-48c02d24", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_8cda999b", + "comparisonKey": "f43e80b5c2df2021", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:21:45.459593+00:00", + "generatedAt": "2026-06-29T13:40:58.816700+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb", + "label": "GB300 EP8 · deepep · bf16 · zipf+eplb", "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, @@ -26312,288 +25882,363 @@ "unevenTokens": "none", "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28255311146", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146", - "createdAt": "2026-06-26T18:21:45.459593+00:00", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 93.28000247478485, - "p90": 96.16000205278397, - "p95": 98.78399968147278, - "p99": 129.2479932308197 + "p50": 88.60799670219421, + "p90": 101.6319990158081, + "p95": 104.8320010304451, + "p99": 119.74400281906128 }, "combine": { - "p50": 114.94400352239609, - "p90": 115.55200070142746, - "p95": 115.93600362539291, - "p99": 126.3359934091568 + "p50": 74.11199808120728, + "p90": 81.91999793052673, + "p95": 83.64800363779068, + "p99": 92.32000261545181 }, "roundtrip": { - "p50": 195.6160068511963, - "p90": 199.42399859428406, - "p95": 200.83199441432953, - "p99": 215.16799926757812 + "p50": 144.3520039319992, + "p90": 154.27200496196747, + "p95": 158.2079976797104, + "p99": 168.2880073785782 }, "isolatedSum": { - "p50": 208.22400599718094, - "p90": 211.71200275421143, - "p95": 214.7200033068657, - "p99": 255.5839866399765 + "p50": 162.7199947834015, + "p90": 183.55199694633484, + "p95": 188.48000466823578, + "p99": 212.0640054345131 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 136.25599443912506, - "p90": 139.00800049304962, - "p95": 141.50400459766388, - "p99": 155.03999590873718 + "p50": 89.37600255012512, + "p90": 105.12000322341919, + "p95": 113.63200098276138, + "p99": 151.99999511241913 }, "combine": { - "p50": 153.72799336910248, - "p90": 163.2319986820221, - "p95": 163.80800306797028, - "p99": 167.67999529838562 + "p50": 80.64000308513641, + "p90": 85.4400023818016, + "p95": 92.86399930715561, + "p99": 119.6800023317337 }, "roundtrip": { - "p50": 269.9199914932251, - "p90": 275.64799785614014, - "p95": 276.92800760269165, - "p99": 291.77600145339966 + "p50": 148.22399616241455, + "p90": 164.63999450206757, + "p95": 176.79999768733978, + "p99": 213.6639952659607 }, "isolatedSum": { - "p50": 289.98398780822754, - "p90": 302.2399991750717, - "p95": 305.31200766563416, - "p99": 322.7199912071228 + "p50": 170.01600563526154, + "p90": 190.5600056052208, + "p95": 206.496000289917, + "p99": 271.67999744415283 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 7, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 192.671999335289, - "p90": 200.095996260643, - "p95": 201.1840045452118, - "p99": 211.99999749660492 + "p50": 88.76799792051315, + "p90": 100.89600086212158, + "p95": 105.24799674749374, + "p99": 112.73600161075592 }, "combine": { - "p50": 264.70398902893066, - "p90": 274.2399871349335, - "p95": 274.9119997024536, - "p99": 286.3999903202057 + "p50": 80.92799782752991, + "p90": 84.86399799585342, + "p95": 87.5839963555336, + "p99": 96.89600020647049 }, "roundtrip": { - "p50": 439.7439956665039, - "p90": 445.279985666275, - "p95": 447.519987821579, - "p99": 459.9039852619171 + "p50": 147.90399372577667, + "p90": 158.01599621772766, + "p95": 161.8880033493042, + "p99": 186.49600446224213 }, "isolatedSum": { - "p50": 457.37598836421967, - "p90": 474.3359833955765, - "p95": 476.0960042476654, - "p99": 498.3999878168106 + "p50": 169.69599574804306, + "p90": 185.759998857975, + "p95": 192.83199310302734, + "p99": 209.6320018172264 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 319.90399956703186, - "p90": 325.8560001850128, - "p95": 327.1999955177307, - "p99": 333.44000577926636 + "p50": 90.65599739551544, + "p90": 104.92800176143646, + "p95": 109.50399935245514, + "p99": 151.90400183200836 }, "combine": { - "p50": 450.78399777412415, - "p90": 458.8800072669983, - "p95": 459.77601408958435, - "p99": 482.87999629974365 + "p50": 83.10399949550629, + "p90": 88.35200220346451, + "p95": 94.52799707651138, + "p99": 108.22399705648422 }, "roundtrip": { - "p50": 756.1600208282471, - "p90": 761.5039944648743, - "p95": 763.5840177536011, - "p99": 783.5519909858704 + "p50": 151.8400013446808, + "p90": 162.33600676059723, + "p95": 167.84000396728516, + "p99": 185.15199422836304 }, "isolatedSum": { - "p50": 770.687997341156, - "p90": 784.7360074520111, - "p95": 786.9760096073151, - "p99": 816.32000207901 + "p50": 173.75999689102173, + "p90": 193.28000396490097, + "p95": 204.03199642896652, + "p99": 260.1279988884926 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 573.0559825897217, - "p90": 581.6959738731384, - "p95": 583.7119817733765, - "p99": 671.4879870414734 + "p50": 93.47199648618698, + "p90": 104.12800312042236, + "p95": 108.25599730014801, + "p99": 117.50400066375732 }, "combine": { - "p50": 827.4880051612854, - "p90": 838.6240005493164, - "p95": 839.9040102958679, - "p99": 863.4560108184814 + "p50": 83.96799862384796, + "p90": 88.70399743318558, + "p95": 93.75999867916107, + "p99": 97.95200079679489 }, "roundtrip": { - "p50": 1382.9760551452637, - "p90": 1392.9920196533203, - "p95": 1396.8960046768188, - "p99": 1428.1599521636963 + "p50": 154.2080044746399, + "p90": 165.56799411773682, + "p95": 169.18399930000305, + "p99": 176.86399817466736 }, "isolatedSum": { - "p50": 1400.543987751007, - "p90": 1420.3199744224548, - "p95": 1423.6159920692444, - "p99": 1534.9439978599548 + "p50": 177.43999511003494, + "p90": 192.83200055360794, + "p95": 202.01599597930908, + "p99": 215.45600146055222 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 4, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1061.8879795074463, - "p90": 1068.7040090560913, - "p95": 1075.9040117263794, - "p99": 1094.048023223877 + "p50": 98.1760025024414, + "p90": 111.61600053310394, + "p95": 123.32800030708313, + "p99": 153.85599434375763 }, "combine": { - "p50": 1530.2079916000366, - "p90": 1540.7040119171143, - "p95": 1551.2640476226807, - "p99": 1662.6559495925903 + "p50": 91.00800007581711, + "p90": 96.73599898815155, + "p95": 104.51199859380722, + "p99": 131.3599944114685 }, "roundtrip": { - "p50": 2579.9999237060547, - "p90": 2593.7600135803223, - "p95": 2600.543975830078, - "p99": 2645.440101623535 + "p50": 161.43999993801117, + "p90": 174.20800030231476, + "p95": 183.16799402236938, + "p99": 232.44799673557281 }, "isolatedSum": { - "p50": 2592.095971107483, - "p90": 2609.4080209732056, - "p95": 2627.16805934906, - "p99": 2756.7039728164673 + "p50": 189.18400257825851, + "p90": 208.3519995212555, + "p95": 227.83999890089035, + "p99": 285.21598875522614 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.60799998044968, + "p90": 114.3999993801117, + "p95": 117.8240031003952, + "p99": 126.11199915409088 + }, + "combine": { + "p50": 106.175996363163, + "p90": 109.82400178909302, + "p95": 112.73600161075592, + "p99": 119.80800330638885 + }, + "roundtrip": { + "p50": 185.72799861431122, + "p90": 193.27999651432037, + "p95": 196.73599302768707, + "p99": 210.11200547218323 + }, + "isolatedSum": { + "p50": 214.78399634361267, + "p90": 224.2240011692047, + "p95": 230.56000471115112, + "p99": 245.92000246047974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.07199835777283, + "p90": 132.79999792575836, + "p95": 137.9839926958084, + "p99": 175.7120043039322 + }, + "combine": { + "p50": 122.72000312805176, + "p90": 131.48799538612366, + "p95": 133.91999900341034, + "p99": 151.8079936504364 + }, + "roundtrip": { + "p50": 223.00800681114197, + "p90": 230.6559979915619, + "p95": 234.49599742889404, + "p99": 251.71199440956116 + }, + "isolatedSum": { + "p50": 245.79200148582458, + "p90": 264.287993311882, + "p95": 271.90399169921875, + "p99": 327.5199979543686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 } ] }, { - "id": "cx-07a9b9e5", - "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_63f1354f", - "comparisonKey": "e1e888fe005f12d0", + "id": "cx-99af315f", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_20de545c", + "comparisonKey": "fcd0e10182ca372c", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:43:21.918392+00:00", + "generatedAt": "2026-06-29T13:45:32.504465+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 (norm) [cl]", + "label": "GB300 EP8 · deepep · bf16 [cl]", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -26605,259 +26250,334 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254489726", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726", - "createdAt": "2026-06-26T17:43:21.918392+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 86.43200248479843, - "p90": 88.95999938249588, - "p95": 91.58399701118469, - "p99": 99.55199807882309 + "p50": 76.09599828720093, + "p90": 89.79199826717377, + "p95": 93.82399916648865, + "p99": 100.03200173377991 }, "combine": { - "p50": 115.35999923944473, - "p90": 116.03199690580368, - "p95": 116.38399958610535, - "p99": 121.56800180673599 + "p50": 82.0159986615181, + "p90": 87.52000331878662, + "p95": 93.72799843549728, + "p99": 97.18400239944458 }, "roundtrip": { - "p50": 186.8479996919632, - "p90": 192.47999787330627, - "p95": 193.31200420856476, - "p99": 215.45599400997162 + "p50": 137.11999356746674, + "p90": 149.47199821472168, + "p95": 152.8639942407608, + "p99": 170.30400037765503 }, "isolatedSum": { - "p50": 201.79200172424316, - "p90": 204.99199628829956, - "p95": 207.96799659729004, - "p99": 221.11999988555908 + "p50": 158.11199694871902, + "p90": 177.3120015859604, + "p95": 187.55199760198593, + "p99": 197.2160041332245 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 128.60800325870514, - "p90": 131.48799538612366, - "p95": 132.79999792575836, - "p99": 147.20000326633453 + "p50": 78.59200239181519, + "p90": 91.90399944782257, + "p95": 98.49599748849869, + "p99": 109.24799740314484 }, "combine": { - "p50": 156.19200468063354, - "p90": 164.48000073432922, - "p95": 164.76799547672272, - "p99": 167.71200299263 + "p50": 82.84799754619598, + "p90": 87.93599903583527, + "p95": 92.32000261545181, + "p99": 96.92800045013428 }, "roundtrip": { - "p50": 264.8000121116638, - "p90": 271.232008934021, - "p95": 274.6239900588989, - "p99": 307.20001459121704 + "p50": 140.9599930047989, + "p90": 152.3520052433014, + "p95": 156.44800662994385, + "p99": 162.78399527072906 }, "isolatedSum": { - "p50": 284.8000079393387, - "p90": 295.9679961204529, - "p95": 297.5679934024811, - "p99": 314.91200625896454 + "p50": 161.43999993801117, + "p90": 179.83999848365784, + "p95": 190.8160001039505, + "p99": 206.1759978532791 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 186.46399676799774, - "p90": 192.86400079727173, - "p95": 195.360004901886, - "p99": 208.3200067281723 + "p50": 80.09599894285202, + "p90": 93.59999746084213, + "p95": 99.93600100278854, + "p99": 114.97599631547928 }, "combine": { - "p50": 266.6879892349243, - "p90": 274.78399872779846, - "p95": 275.2639949321747, - "p99": 287.1359884738922 + "p50": 83.00799876451492, + "p90": 90.11200070381165, + "p95": 94.7519987821579, + "p99": 104.3199971318245 }, "roundtrip": { - "p50": 437.4080002307892, - "p90": 442.30398535728455, - "p95": 445.6320106983185, - "p99": 468.51199865341187 + "p50": 140.76800644397736, + "p90": 152.73599326610565, + "p95": 155.7759940624237, + "p99": 162.84799575805664 }, "isolatedSum": { - "p50": 453.15198600292206, - "p90": 467.6479995250702, - "p95": 470.62399983406067, - "p99": 495.4559952020645 + "p50": 163.10399770736694, + "p90": 183.71199816465378, + "p95": 194.68799978494644, + "p99": 219.29599344730377 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 316.0319924354553, - "p90": 319.16800141334534, - "p95": 320.99199295043945, - "p99": 330.01598715782166 - }, - "combine": { - "p50": 458.8479995727539, - "p90": 461.66399121284485, - "p95": 470.20798921585083, - "p99": 483.39200019836426 + "p50": 80.92799782752991, + "p90": 91.61599725484848, + "p95": 95.77599912881851, + "p99": 104.38399761915207 + }, + "combine": { + "p50": 84.3840017914772, + "p90": 91.61599725484848, + "p95": 93.9520001411438, + "p99": 97.21600264310837 }, "roundtrip": { - "p50": 752.0639896392822, - "p90": 761.3440155982971, - "p95": 763.6799812316895, - "p99": 787.6480221748352 + "p50": 143.45599710941315, + "p90": 155.2640050649643, + "p95": 159.10400450229645, + "p99": 169.66399550437927 }, "isolatedSum": { - "p50": 774.8799920082092, - "p90": 780.8319926261902, - "p95": 791.1999821662903, - "p99": 813.4079873561859 + "p50": 165.3119996190071, + "p90": 183.23199450969696, + "p95": 189.7279992699623, + "p99": 201.60000026226044 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 557.2800040245056, - "p90": 565.0240182876587, - "p95": 566.3679838180542, - "p99": 600.0319719314575 + "p50": 81.28000050783157, + "p90": 91.23200178146362, + "p95": 94.08000111579895, + "p99": 107.64800012111664 }, "combine": { - "p50": 817.4399733543396, - "p90": 827.8399705886841, - "p95": 832.0639729499817, - "p99": 854.3999791145325 + "p50": 86.14400029182434, + "p90": 94.01600062847137, + "p95": 96.22400254011154, + "p99": 103.87200117111206 }, "roundtrip": { - "p50": 1359.328031539917, - "p90": 1370.911955833435, - "p95": 1380.5760145187378, - "p99": 1444.640040397644 + "p50": 145.63199877738953, + "p90": 156.99200332164764, + "p95": 160.7999950647354, + "p99": 173.47200214862823 }, "isolatedSum": { - "p50": 1374.7199773788452, - "p90": 1392.8639888763428, - "p95": 1398.431956768036, - "p99": 1454.43195104599 + "p50": 167.42400079965591, + "p90": 185.248002409935, + "p95": 190.3040036559105, + "p99": 211.5200012922287 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1037.4079942703247, - "p90": 1044.800043106079, - "p95": 1047.4879741668701, - "p99": 1074.3039846420288 + "p50": 85.50400286912918, + "p90": 95.32800316810608, + "p95": 98.62399846315384, + "p99": 109.0560033917427 }, "combine": { - "p50": 1529.6319723129272, - "p90": 1541.375994682312, - "p95": 1552.0639419555664, - "p99": 1577.1199464797974 + "p50": 93.05600076913834, + "p90": 98.04800152778625, + "p95": 100.41599720716476, + "p99": 107.744000852108 }, "roundtrip": { - "p50": 2550.9119033813477, - "p90": 2564.2240047454834, - "p95": 2571.199893951416, - "p99": 2613.2800579071045 + "p50": 149.56800639629364, + "p90": 161.15200519561768, + "p95": 164.41600024700165, + "p99": 173.47200214862823 }, "isolatedSum": { - "p50": 2567.039966583252, - "p90": 2586.176037788391, - "p95": 2599.5519161224365, - "p99": 2651.423931121826 + "p50": 178.56000363826752, + "p90": 193.37600469589233, + "p95": 199.0399956703186, + "p99": 216.8000042438507 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.05600142478943, + "p90": 107.26399719715118, + "p95": 111.35999858379364, + "p99": 125.88800489902496 + }, + "combine": { + "p50": 106.81600123643875, + "p90": 112.06399649381638, + "p95": 116.44800007343292, + "p99": 120.4800009727478 + }, + "roundtrip": { + "p50": 174.17599260807037, + "p90": 182.97599256038666, + "p95": 186.24000251293182, + "p99": 202.04800367355347 + }, + "isolatedSum": { + "p50": 203.87200266122818, + "p90": 219.32799369096756, + "p95": 227.80799865722656, + "p99": 246.36800587177277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.52799969911575, + "p90": 118.65600198507309, + "p95": 122.079998254776, + "p99": 128.35200130939484 + }, + "combine": { + "p50": 124.60800260305405, + "p90": 132.32000172138214, + "p95": 134.65599715709686, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 208.8319957256317, + "p90": 217.3440009355545, + "p95": 220.22399306297302, + "p99": 224.89599883556366 + }, + "isolatedSum": { + "p50": 235.1360023021698, + "p90": 250.97600370645523, + "p95": 256.73599541187286, + "p99": 270.30399441719055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -26865,28 +26585,28 @@ ] }, { - "id": "cx-179c0247", - "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_33311fdc", - "comparisonKey": "6deb8b087f7b728f", + "id": "cx-bd5b38a4", + "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_8d02a479", + "comparisonKey": "661dd1b497fcaeac", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:46:35.384079+00:00", + "generatedAt": "2026-06-29T13:51:03.850938+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", - "mode": "normal", + "phase": "decode", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · bf16 [cl]", + "label": "GB300 EP8 · deepep · bf16 LL", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -26898,259 +26618,334 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285576352", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285576352", - "createdAt": "2026-06-27T09:46:35.384079+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 87.26400136947632, - "p90": 88.8959988951683, - "p95": 91.20000153779984, - "p99": 106.08000308275223 + "p50": 61.76000088453293, + "p90": 65.98400324583054, + "p95": 67.32799857854843, + "p99": 73.27999919652939 }, "combine": { - "p50": 115.55200070142746, - "p90": 116.80000275373459, - "p95": 117.60000139474869, - "p99": 140.32000303268433 + "p50": 60.095999389886856, + "p90": 63.77600133419037, + "p95": 68.70400160551071, + "p99": 72.86400347948074 }, "roundtrip": { - "p50": 186.17600202560425, - "p90": 192.76799261569977, - "p95": 193.82399320602417, - "p99": 217.75999665260315 + "p50": 93.79199892282486, + "p90": 99.7759997844696, + "p95": 101.18400305509567, + "p99": 104.44799810647964 }, "isolatedSum": { - "p50": 202.81600207090378, - "p90": 205.6960016489029, - "p95": 208.80000293254852, - "p99": 246.40000611543655 + "p50": 121.85600027441978, + "p90": 129.7600045800209, + "p95": 136.03200018405914, + "p99": 146.14400267601013 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 130.20800054073334, - "p90": 132.35199451446533, - "p95": 133.34399461746216, - "p99": 138.7840062379837 + "p50": 64.09599632024765, + "p90": 68.35199892520905, + "p95": 73.15199822187424, + "p99": 77.91999727487564 }, "combine": { - "p50": 155.45600652694702, - "p90": 164.51199352741241, - "p95": 164.8319959640503, - "p99": 176.83200538158417 + "p50": 60.63999980688095, + "p90": 69.11999732255936, + "p95": 70.49600034952164, + "p99": 74.17599856853485 }, "roundtrip": { - "p50": 266.4639949798584, - "p90": 271.61601185798645, - "p95": 274.59201216697693, - "p99": 283.3600044250488 + "p50": 98.43199700117111, + "p90": 102.30399668216705, + "p95": 103.32799702882767, + "p99": 106.62399977445602 }, "isolatedSum": { - "p50": 285.66400706768036, - "p90": 296.86398804187775, - "p95": 298.17599058151245, - "p99": 315.61601161956787 + "p50": 124.7359961271286, + "p90": 137.4719962477684, + "p95": 143.64799857139587, + "p99": 152.0959958434105 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 1, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 186.81600689888, - "p90": 192.83199310302734, - "p95": 195.0400024652481, - "p99": 202.7519941329956 + "p50": 65.0240033864975, + "p90": 74.07999783754349, + "p95": 75.71200281381607, + "p99": 78.97599786520004 }, "combine": { - "p50": 274.52799677848816, - "p90": 275.64799785614014, - "p95": 276.5760123729706, - "p99": 286.624014377594 + "p50": 62.17600032687187, + "p90": 71.52000069618225, + "p95": 72.35199958086014, + "p99": 75.03999769687653 }, "roundtrip": { - "p50": 440.064013004303, - "p90": 445.3119933605194, - "p95": 451.61598920822144, - "p99": 459.77601408958435 + "p50": 101.79200023412704, + "p90": 106.175996363163, + "p95": 108.22399705648422, + "p99": 114.14399743080139 }, "isolatedSum": { - "p50": 461.34400367736816, - "p90": 468.4799909591675, - "p95": 471.6160148382187, - "p99": 489.3760085105896 + "p50": 127.20000371336937, + "p90": 145.59999853372574, + "p95": 148.0640023946762, + "p99": 154.01599556207657 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 316.25598669052124, - "p90": 319.68000531196594, - "p95": 321.1199939250946, - "p99": 329.120010137558 + "p50": 67.77600198984146, + "p90": 76.73600316047668, + "p95": 77.79199630022049, + "p99": 80.60800284147263 }, "combine": { - "p50": 459.00800824165344, - "p90": 461.0239863395691, - "p95": 462.5920057296753, - "p99": 473.66398572921753 + "p50": 69.023996591568, + "p90": 72.92799651622772, + "p95": 74.01599735021591, + "p99": 80.44800162315369 }, "roundtrip": { - "p50": 752.5119781494141, - "p90": 760.9919905662537, - "p95": 763.3919715881348, - "p99": 770.4640030860901 + "p50": 110.01600325107574, + "p90": 113.79200220108032, + "p95": 115.48800021409988, + "p99": 120.12799829244614 }, "isolatedSum": { - "p50": 775.2639949321747, - "p90": 780.703991651535, - "p95": 783.7119996547699, - "p99": 802.7839958667755 + "p50": 136.79999858140945, + "p90": 149.6639996767044, + "p95": 151.8079936504364, + "p99": 161.0560044646263 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 0, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 558.5920214653015, - "p90": 565.3759837150574, - "p95": 566.9119954109192, - "p99": 578.7839889526367 + "p50": 76.64000242948532, + "p90": 79.8719972372055, + "p95": 81.4720019698143, + "p99": 88.67199718952179 }, "combine": { - "p50": 819.0079927444458, - "p90": 828.4800052642822, - "p95": 830.9760093688965, - "p99": 844.8960185050964 + "p50": 74.5600014925003, + "p90": 83.26400071382523, + "p95": 84.54400300979614, + "p99": 87.90399879217148 }, "roundtrip": { - "p50": 1360.640048980713, - "p90": 1367.583990097046, - "p95": 1372.320055961609, - "p99": 1414.1119718551636 + "p50": 127.3919939994812, + "p90": 132.47999548912048, + "p95": 136.25599443912506, + "p99": 144.19199526309967 }, "isolatedSum": { - "p50": 1377.6000142097473, - "p90": 1393.8559889793396, - "p95": 1397.8880047798157, - "p99": 1423.6800074577332 + "p50": 151.20000392198563, + "p90": 163.13599795103073, + "p95": 166.01600497961044, + "p99": 176.57599598169327 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 6, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1036.255955696106, - "p90": 1045.151948928833, - "p95": 1047.584056854248, - "p99": 1086.5919589996338 + "p50": 104.51199859380722, + "p90": 112.5119999051094, + "p95": 113.8560026884079, + "p99": 116.89600348472595 }, "combine": { - "p50": 1528.480052947998, - "p90": 1540.544033050537, - "p95": 1543.2319641113281, - "p99": 1555.2959442138672 + "p50": 105.34399747848511, + "p90": 110.88000237941742, + "p95": 111.68000102043152, + "p99": 113.24799805879593 }, "roundtrip": { - "p50": 2546.976089477539, - "p90": 2557.1839809417725, - "p95": 2563.4560585021973, - "p99": 2601.2799739837646 + "p50": 187.1040016412735, + "p90": 191.71200692653656, + "p95": 193.53599846363068, + "p99": 199.77599382400513 }, "isolatedSum": { - "p50": 2564.736008644104, - "p90": 2585.69598197937, - "p95": 2590.816020965576, - "p99": 2641.887903213501 + "p50": 209.85599607229233, + "p90": 223.39200228452682, + "p95": 225.53600370883942, + "p99": 230.14400154352188 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 6, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 187.80800700187683, + "p90": 198.71999323368073, + "p95": 200.6399929523468, + "p99": 204.6079933643341 + }, + "combine": { + "p50": 193.15199553966522, + "p90": 315.5840039253235, + "p95": 317.50398874282837, + "p99": 320.3200101852417 + }, + "roundtrip": { + "p50": 344.2560136318207, + "p90": 355.0400137901306, + "p95": 360.28799414634705, + "p99": 370.2720105648041 + }, + "isolatedSum": { + "p50": 380.96000254154205, + "p90": 514.3039971590042, + "p95": 518.1439816951752, + "p99": 524.9280035495758 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 363.0400002002716, + "p90": 542.2400236129761, + "p95": 544.6400046348572, + "p99": 555.1360249519348 + }, + "combine": { + "p50": 535.3279709815979, + "p90": 541.5359735488892, + "p95": 543.008029460907, + "p99": 549.7919917106628 + }, + "roundtrip": { + "p50": 585.5039954185486, + "p90": 642.9759860038757, + "p95": 653.760015964508, + "p99": 675.7760047912598 + }, + "isolatedSum": { + "p50": 898.3679711818695, + "p90": 1083.7759971618652, + "p95": 1087.6480340957642, + "p99": 1104.9280166625977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -27158,321 +26953,396 @@ ] }, { - "id": "cx-d90a63c5", - "identity": "b300|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "0a8b502bd3614965", + "id": "cx-7b4b7034", + "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_99f44a59", + "comparisonKey": "b22da9163d34e85f", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:26.432170+00:00", + "generatedAt": "2026-06-29T13:52:16.884650+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_01", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", - "mode": "normal", + "phase": "decode", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Qwen3.5", + "label": "GB300 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287509502", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287509502", - "createdAt": "2026-06-27T11:14:26.432170+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 80.60800284147263, - "p90": 83.45600217580795, - "p95": 84.6719965338707, - "p99": 93.56799721717834 + "p50": 61.344001442193985, + "p90": 64.64000046253204, + "p95": 66.11199676990509, + "p99": 73.18399846553802 }, "combine": { - "p50": 89.82399851083755, - "p90": 92.12800115346909, - "p95": 93.40800344944, - "p99": 99.84000027179718 + "p50": 59.61599946022034, + "p90": 67.16799736022949, + "p95": 69.31199878454208, + "p99": 71.58400118350983 }, "roundtrip": { - "p50": 183.4239959716797, - "p90": 186.46399676799774, - "p95": 187.68000602722168, - "p99": 198.17599654197693 + "p50": 92.44800359010696, + "p90": 98.7199991941452, + "p95": 100.09600222110748, + "p99": 104.35199737548828 }, "isolatedSum": { - "p50": 170.43200135231018, - "p90": 175.58400332927704, - "p95": 178.0799999833107, - "p99": 193.40799748897552 + "p50": 120.96000090241432, + "p90": 131.80799782276154, + "p95": 135.42399555444717, + "p99": 144.76799964904785 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 108.15999656915665, - "p90": 112.19199746847153, - "p95": 116.60800129175186, - "p99": 134.62400436401367 + "p50": 63.64800035953522, + "p90": 67.32799857854843, + "p95": 71.87200337648392, + "p99": 77.08799839019775 }, "combine": { - "p50": 124.92799758911133, - "p90": 128.4479945898056, - "p95": 129.98400628566742, - "p99": 134.91199910640717 + "p50": 60.06399914622307, + "p90": 69.023996591568, + "p95": 70.04799693822861, + "p99": 72.06399738788605 }, "roundtrip": { - "p50": 259.99999046325684, - "p90": 264.3519937992096, - "p95": 266.07999205589294, - "p99": 281.6320061683655 + "p50": 97.50399738550186, + "p90": 102.01600193977356, + "p95": 104.16000336408615, + "p99": 111.00800335407257 }, "isolatedSum": { - "p50": 233.08799415826797, - "p90": 240.63999205827713, - "p95": 246.59200757741928, - "p99": 269.53600347042084 + "p50": 123.71199950575829, + "p90": 136.35199517011642, + "p95": 141.92000031471252, + "p99": 149.1519957780838 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 7, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 152.79999375343323, - "p90": 157.02399611473083, - "p95": 159.2639982700348, - "p99": 168.16000640392303 + "p50": 64.25599753856659, + "p90": 71.07199728488922, + "p95": 74.52800124883652, + "p99": 77.40800082683563 }, "combine": { - "p50": 189.60000574588776, - "p90": 192.7040070295334, - "p95": 194.46399807929993, - "p99": 207.71199464797974 + "p50": 61.47199869155884, + "p90": 70.88000327348709, + "p95": 71.84000313282013, + "p99": 74.11199808120728 }, "roundtrip": { - "p50": 395.04000544548035, - "p90": 400.2879858016968, - "p95": 402.3360013961792, - "p99": 415.6799912452698 + "p50": 101.02400183677673, + "p90": 105.21599650382996, + "p95": 107.00800269842148, + "p99": 113.0559965968132 }, "isolatedSum": { - "p50": 342.399999499321, - "p90": 349.7280031442642, - "p95": 353.7279963493347, - "p99": 375.87200105190277 + "p50": 125.72799623012543, + "p90": 141.9520005583763, + "p95": 146.36800438165665, + "p99": 151.5199989080429 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 4, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 245.92000246047974, - "p90": 251.3599991798401, - "p95": 252.99200415611267, - "p99": 264.0640139579773 + "p50": 66.43199920654297, + "p90": 75.68000257015228, + "p95": 77.40800082683563, + "p99": 87.00799942016602 }, "combine": { - "p50": 390.75198769569397, - "p90": 398.9120125770569, - "p95": 401.66398882865906, - "p99": 409.0240001678467 + "p50": 68.89600306749344, + "p90": 73.21599870920181, + "p95": 74.14399832487106, + "p99": 76.06399804353714 }, "roundtrip": { - "p50": 774.0160226821899, - "p90": 781.9200158119202, - "p95": 786.4320278167725, - "p99": 796.3839769363403 + "p50": 110.3999987244606, + "p90": 114.23999816179276, + "p95": 116.28799885511398, + "p99": 123.96799772977829 }, "isolatedSum": { - "p50": 636.6719901561737, - "p90": 650.272011756897, - "p95": 654.6559929847717, - "p99": 673.088014125824 + "p50": 135.3280022740364, + "p90": 148.8960012793541, + "p95": 151.5519991517067, + "p99": 163.07199746370316 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 7, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 438.1119906902313, - "p90": 448.09600710868835, - "p95": 452.4799883365631, - "p99": 461.63201332092285 + "p50": 75.80800354480743, + "p90": 78.17599922418594, + "p95": 79.1039988398552, + "p99": 86.04799956083298 }, "combine": { - "p50": 750.6240010261536, - "p90": 756.4160227775574, - "p95": 758.2399845123291, - "p99": 767.0400142669678 + "p50": 72.64000177383423, + "p90": 81.53600245714188, + "p95": 83.10399949550629, + "p99": 84.6719965338707 }, "roundtrip": { - "p50": 1456.3839435577393, - "p90": 1466.4959907531738, - "p95": 1470.3359603881836, - "p99": 1482.3039770126343 + "p50": 128.7039965391159, + "p90": 136.1279934644699, + "p95": 137.69599795341492, + "p99": 141.82400703430176 }, "isolatedSum": { - "p50": 1188.735991716385, - "p90": 1204.5120298862457, - "p95": 1210.7199728488922, - "p99": 1228.6720275878906 + "p50": 148.44800531864166, + "p90": 159.71200168132782, + "p95": 162.20799833536148, + "p99": 170.71999609470367 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 854.6559810638428, - "p90": 867.7120208740234, - "p95": 873.2159733772278, - "p99": 887.8080248832703 + "p50": 103.58399897813797, + "p90": 112.8000020980835, + "p95": 114.30399864912033, + "p99": 124.41600114107132 }, "combine": { - "p50": 1436.5119934082031, - "p90": 1444.5120096206665, - "p95": 1448.3519792556763, - "p99": 1471.9359874725342 + "p50": 101.43999755382538, + "p90": 109.11999642848969, + "p95": 110.3999987244606, + "p99": 120.06399780511856 }, "roundtrip": { - "p50": 2809.664011001587, - "p90": 2821.1519718170166, - "p95": 2827.1679878234863, - "p99": 2873.1839656829834 + "p50": 186.81600689888, + "p90": 192.83199310302734, + "p95": 196.76800072193146, + "p99": 202.78400182724 }, "isolatedSum": { - "p50": 2291.167974472046, - "p90": 2312.22403049469, - "p95": 2321.567952632904, - "p99": 2359.7440123558044 + "p50": 205.02399653196335, + "p90": 221.91999852657318, + "p95": 224.70399737358093, + "p99": 244.47999894618988 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 7, - "correct": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 186.49600446224213, + "p90": 195.96800208091736, + "p95": 199.3280053138733, + "p99": 204.92799580097198 + }, + "combine": { + "p50": 183.58400464057922, + "p90": 196.83200120925903, + "p95": 200.03199577331543, + "p99": 208.064004778862 + }, + "roundtrip": { + "p50": 342.72000193595886, + "p90": 358.17599296569824, + "p95": 361.63198947906494, + "p99": 370.33599615097046 + }, + "isolatedSum": { + "p50": 370.08000910282135, + "p90": 392.8000032901764, + "p95": 399.3600010871887, + "p99": 412.992000579834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 337.2479975223541, + "p90": 365.34398794174194, + "p95": 374.752014875412, + "p99": 394.81601119041443 + }, + "combine": { + "p50": 529.2800068855286, + "p90": 540.6079888343811, + "p95": 541.9840216636658, + "p99": 545.2160239219666 + }, + "roundtrip": { + "p50": 585.9519839286804, + "p90": 642.3360109329224, + "p95": 655.9680104255676, + "p99": 683.1039786338806 + }, + "isolatedSum": { + "p50": 866.5280044078827, + "p90": 905.951976776123, + "p95": 916.7360365390778, + "p99": 940.032035112381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, "samplesPooled": 600, "trials": 3 } ] }, { - "id": "cx-acd7c4ed", - "identity": "b300|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "b300_c4c63f07", - "comparisonKey": "31714ccd7ce96f8f", + "id": "cx-1a41c2ea", + "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "100b396b86e03573", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:16.802838+00:00", + "generatedAt": "2026-06-29T14:01:21.648224+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", + "label": "GB300 EP8 · deepep · fp8", "model": "Qwen3.5", "shape": { "hidden": 4096, @@ -27484,14 +27354,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -27499,244 +27370,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285696261", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285696261", - "createdAt": "2026-06-27T09:52:16.802838+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 1875.615954399109, - "p90": 2675.9040355682373, - "p95": 2804.8319816589355, - "p99": 3161.2160205841064 + "p50": 408.8959991931915, + "p90": 433.1839978694916, + "p95": 441.18401408195496, + "p99": 458.624005317688 }, "combine": { - "p50": 1791.424036026001, - "p90": 2183.648109436035, - "p95": 2710.495948791504, - "p99": 2984.6720695495605 + "p50": 65.72800129652023, + "p90": 71.52000069618225, + "p95": 74.17599856853485, + "p99": 82.91199803352356 }, "roundtrip": { - "p50": 1945.6959962844849, - "p90": 2103.775978088379, - "p95": 2727.839946746826, - "p99": 3128.959894180298 + "p50": 453.66400480270386, + "p90": 478.36801409721375, + "p95": 484.8639965057373, + "p99": 517.6960229873657 }, "isolatedSum": { - "p50": 3667.03999042511, - "p90": 4859.5521450042725, - "p95": 5515.327930450439, - "p99": 6145.888090133667 + "p50": 474.62400048971176, + "p90": 504.7039985656738, + "p95": 515.3600126504898, + "p99": 541.5360033512115 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 1994.4000244140625, - "p90": 2822.2079277038574, - "p95": 3089.344024658203, - "p99": 4134.687900543213 + "p50": 402.5599956512451, + "p90": 429.6639859676361, + "p95": 439.9360120296478, + "p99": 457.63200521469116 }, "combine": { - "p50": 1834.3039751052856, - "p90": 2468.640089035034, - "p95": 2714.9438858032227, - "p99": 3004.672050476074 + "p50": 67.61600077152252, + "p90": 72.80000299215317, + "p95": 76.38400048017502, + "p99": 82.62400329113007 }, "roundtrip": { - "p50": 2093.0240154266357, - "p90": 2329.024076461792, - "p95": 2922.7840900421143, - "p99": 3284.0960025787354 + "p50": 440.12799859046936, + "p90": 471.2640047073364, + "p95": 478.84801030158997, + "p99": 491.456001996994 }, "isolatedSum": { - "p50": 3828.703999519348, - "p90": 5290.848016738892, - "p95": 5804.287910461426, - "p99": 7139.359951019287 + "p50": 470.17599642276764, + "p90": 502.4639889597893, + "p95": 516.3200125098228, + "p99": 540.2560085058212 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 5, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 2082.495927810669, - "p90": 2575.5200386047363, - "p95": 3061.8879795074463, - "p99": 3882.4000358581543 + "p50": 399.7119963169098, + "p90": 434.84801054000854, + "p95": 495.2000081539154, + "p99": 603.8399934768677 }, "combine": { - "p50": 1895.7120180130005, - "p90": 2081.5999507904053, - "p95": 2722.0799922943115, - "p99": 3054.0480613708496 + "p50": 68.09599697589874, + "p90": 74.94399696588516, + "p95": 79.83999699354172, + "p99": 108.70400071144104 }, "roundtrip": { - "p50": 2248.447895050049, - "p90": 2507.391929626465, - "p95": 3178.4000396728516, - "p99": 3517.632007598877 + "p50": 439.9679899215698, + "p90": 469.08798813819885, + "p95": 509.8239779472351, + "p99": 644.5440053939819 }, "isolatedSum": { - "p50": 3978.2079458236694, - "p90": 4657.119989395142, - "p95": 5783.967971801758, - "p99": 6936.448097229004 + "p50": 467.80799329280853, + "p90": 509.7920075058937, + "p95": 575.0400051474571, + "p99": 712.5439941883087 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 5, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 2266.335964202881, - "p90": 2846.816062927246, - "p95": 3325.5679607391357, - "p99": 3900.8638858795166 + "p50": 410.6239974498749, + "p90": 436.8000030517578, + "p95": 446.4319944381714, + "p99": 454.46398854255676 }, "combine": { - "p50": 2154.8800468444824, - "p90": 2735.584020614624, - "p95": 3072.096109390259, - "p99": 3418.11203956604 + "p50": 70.52800059318542, + "p90": 76.19199901819229, + "p95": 78.68800312280655, + "p99": 89.28000181913376 }, "roundtrip": { - "p50": 2644.864082336426, - "p90": 3269.08802986145, - "p95": 3706.2718868255615, - "p99": 6074.7199058532715 + "p50": 451.29600167274475, + "p90": 476.25601291656494, + "p95": 482.40000009536743, + "p99": 499.7119903564453 }, "isolatedSum": { - "p50": 4421.216011047363, - "p90": 5582.40008354187, - "p95": 6397.6640701293945, - "p99": 7318.975925445557 + "p50": 481.1519980430603, + "p90": 512.9920020699501, + "p95": 525.1199975609779, + "p99": 543.7439903616905 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 5, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 2653.8240909576416, - "p90": 2906.4319133758545, - "p95": 3221.951961517334, - "p99": 3759.3278884887695 + "p50": 407.99999237060547, + "p90": 434.112012386322, + "p95": 444.38400864601135, + "p99": 462.72000670433044 }, "combine": { - "p50": 2523.5838890075684, - "p90": 2799.743890762329, - "p95": 3378.0479431152344, - "p99": 3780.8001041412354 + "p50": 72.35199958086014, + "p90": 77.34400033950806, + "p95": 79.93599772453308, + "p99": 86.84799820184708 }, "roundtrip": { - "p50": 3350.048065185547, - "p90": 3693.056106567383, - "p95": 4236.576080322266, - "p99": 4646.240234375 + "p50": 450.46401023864746, + "p90": 475.13601183891296, + "p95": 482.7519953250885, + "p99": 500.19198656082153 }, "isolatedSum": { - "p50": 5177.40797996521, - "p90": 5706.175804138184, - "p95": 6599.999904632568, - "p99": 7540.127992630005 + "p50": 480.3519919514656, + "p90": 511.4560127258301, + "p95": 524.3200063705444, + "p99": 549.5680049061775 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 6, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 3389.280080795288, - "p90": 3919.840097427368, - "p95": 4479.1998863220215, - "p99": 6919.424057006836 + "p50": 401.6000032424927, + "p90": 423.8719940185547, + "p95": 436.352014541626, + "p99": 457.92001485824585 }, "combine": { - "p50": 3219.4879055023193, - "p90": 3500.704050064087, - "p95": 4088.6402130126953, - "p99": 4587.488174438477 + "p50": 73.98399710655212, + "p90": 78.87999713420868, + "p95": 81.15199953317642, + "p99": 87.87199854850769 }, "roundtrip": { - "p50": 4788.127899169922, - "p90": 4992.767810821533, - "p95": 5423.679828643799, - "p99": 6249.695777893066 + "p50": 444.8640048503876, + "p90": 467.7119851112366, + "p95": 476.73600912094116, + "p99": 497.47198820114136 }, "isolatedSum": { - "p50": 6608.767986297607, - "p90": 7420.544147491455, - "p95": 8567.840099334717, - "p99": 11506.912231445312 + "p50": 475.5840003490448, + "p90": 502.75199115276337, + "p95": 517.5040140748024, + "p99": 545.7920134067535 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 7, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 404.5119881629944, + "p90": 432.96000361442566, + "p95": 440.41600823402405, + "p99": 459.77601408958435 + }, + "combine": { + "p50": 85.88799834251404, + "p90": 92.86399930715561, + "p95": 97.08800166845322, + "p99": 130.94399869441986 + }, + "roundtrip": { + "p50": 451.61598920822144, + "p90": 477.88798809051514, + "p95": 501.0560154914856, + "p99": 640.2559876441956 + }, + "isolatedSum": { + "p50": 490.3999865055084, + "p90": 525.8240029215813, + "p95": 537.5040099024773, + "p99": 590.7200127840042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 403.29599380493164, + "p90": 433.53599309921265, + "p95": 451.9039988517761, + "p99": 577.0879983901978 + }, + "combine": { + "p50": 101.9200012087822, + "p90": 108.83200168609619, + "p95": 112.64000087976456, + "p99": 144.44799721240997 + }, + "roundtrip": { + "p50": 468.83198618888855, + "p90": 489.8880124092102, + "p95": 501.4079809188843, + "p99": 646.4959979057312 + }, + "isolatedSum": { + "p50": 505.21599501371384, + "p90": 542.3679947853088, + "p95": 564.5439997315407, + "p99": 721.5359956026077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -27744,28 +27689,28 @@ ] }, { - "id": "cx-a725beb5", - "identity": "b300|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "b300_c4c63f07", - "comparisonKey": "9a5b239287748a0a", + "id": "cx-214b01d3", + "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "6b1e52df2e686455", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:25.584381+00:00", + "generatedAt": "2026-06-29T14:05:39.896445+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_12", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", + "label": "GB300 EP8 · deepep · fp8", "model": "shape 5120/8/160", "shape": { "hidden": 5120, @@ -27777,14 +27722,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -27792,244 +27738,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", - "workloadSource": "canonical-serialized", + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285707789", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285707789", - "createdAt": "2026-06-27T09:52:25.584381+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 155.20000457763672, - "p90": 159.2320054769516, - "p95": 161.9199961423874, - "p99": 180.83199858665466 + "p50": 432.5439929962158, + "p90": 458.3680033683777, + "p95": 472.896009683609, + "p99": 526.4000296592712 }, "combine": { - "p50": 95.74399888515472, - "p90": 98.14400225877762, - "p95": 99.10400211811066, - "p99": 110.36799848079681 + "p50": 69.56800073385239, + "p90": 78.68800312280655, + "p95": 82.20800012350082, + "p99": 96.92800045013428 }, "roundtrip": { - "p50": 242.5920069217682, - "p90": 246.20799720287323, - "p95": 248.28800559043884, - "p99": 264.0959918498993 + "p50": 474.2079973220825, + "p90": 498.6560046672821, + "p95": 505.3759813308716, + "p99": 537.2160077095032 }, "isolatedSum": { - "p50": 250.94400346279144, - "p90": 257.3760077357292, - "p95": 261.02399826049805, - "p99": 291.1999970674515 + "p50": 502.1119937300682, + "p90": 537.0560064911842, + "p95": 555.1040098071098, + "p99": 623.3280301094055 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 4, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 201.1519968509674, - "p90": 206.2080055475235, - "p95": 208.00000429153442, - "p99": 217.69599616527557 + "p50": 433.9520037174225, + "p90": 462.4960124492645, + "p95": 474.3039906024933, + "p99": 519.7759866714478 }, "combine": { - "p50": 131.84000551700592, - "p90": 134.65599715709686, - "p95": 135.77599823474884, - "p99": 142.46399700641632 + "p50": 70.27199864387512, + "p90": 77.08799839019775, + "p95": 81.82399719953537, + "p99": 91.90399944782257 }, "roundtrip": { - "p50": 328.3199965953827, - "p90": 334.56000685691833, - "p95": 336.8000090122223, - "p99": 351.77600383758545 + "p50": 475.6479859352112, + "p90": 499.61599707603455, + "p95": 515.7120227813721, + "p99": 560.7360005378723 }, "isolatedSum": { - "p50": 332.9920023679733, - "p90": 340.86400270462036, - "p95": 343.77600252628326, - "p99": 360.1599931716919 + "p50": 504.2240023612976, + "p90": 539.5840108394623, + "p95": 556.1279878020287, + "p99": 611.6799861192703 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55552000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 4, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 302.94400453567505, - "p90": 307.2640001773834, - "p95": 309.82398986816406, - "p99": 326.07999444007874 + "p50": 431.3279986381531, + "p90": 458.49600434303284, + "p95": 470.14400362968445, + "p99": 529.9199819564819 }, "combine": { - "p50": 206.4639925956726, - "p90": 211.71200275421143, - "p95": 213.24799954891205, - "p99": 225.8879989385605 + "p50": 72.41600006818771, + "p90": 79.42400127649307, + "p95": 84.1279998421669, + "p99": 115.167997777462 }, "roundtrip": { - "p50": 523.4879851341248, - "p90": 529.8240184783936, - "p95": 533.3120226860046, - "p99": 555.6480288505554 + "p50": 474.43199157714844, + "p90": 499.07198548316956, + "p95": 509.7600221633911, + "p99": 560.2239966392517 }, "isolatedSum": { - "p50": 509.40799713134766, - "p90": 518.9760029315948, - "p95": 523.0719894170761, - "p99": 551.9679933786392 + "p50": 503.7439987063408, + "p90": 537.9200056195259, + "p95": 554.2720034718513, + "p99": 645.0879797339439 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 111549440, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 4, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 526.5600085258484, - "p90": 532.480001449585, - "p95": 534.8799824714661, - "p99": 544.8639988899231 + "p50": 434.9760115146637, + "p90": 463.55199813842773, + "p95": 471.1039960384369, + "p99": 509.0559720993042 }, "combine": { - "p50": 429.8880100250244, - "p90": 435.232013463974, - "p95": 437.855988740921, - "p99": 454.0480077266693 + "p50": 74.43200051784515, + "p90": 82.59200304746628, + "p95": 86.56000345945358, + "p99": 113.24799805879593 }, "roundtrip": { - "p50": 936.2559914588928, - "p90": 944.0320134162903, - "p95": 946.6879963874817, - "p99": 960.096001625061 + "p50": 475.0080108642578, + "p90": 496.41600251197815, + "p95": 506.20800256729126, + "p99": 553.056001663208 }, "isolatedSum": { - "p50": 956.4480185508728, - "p90": 967.712014913559, - "p95": 972.7359712123871, - "p99": 998.9120066165924 + "p50": 509.40801203250885, + "p90": 546.144001185894, + "p95": 557.6639994978905, + "p99": 622.3039701581001 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 223365120, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 4, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 967.9359793663025, - "p90": 977.728009223938, - "p95": 980.7999730110168, - "p99": 989.5679950714111 + "p50": 432.2560131549835, + "p90": 459.7119987010956, + "p95": 472.76800870895386, + "p99": 531.4239859580994 }, "combine": { - "p50": 777.8559923171997, - "p90": 783.9679718017578, - "p95": 787.1999740600586, - "p99": 800.000011920929 + "p50": 75.6160020828247, + "p90": 82.43200182914734, + "p95": 85.37600189447403, + "p99": 93.85599941015244 }, "roundtrip": { - "p50": 1729.024052619934, - "p90": 1740.5760288238525, - "p95": 1744.0320253372192, - "p99": 1758.9759826660156 + "p50": 472.9920029640198, + "p90": 491.9680058956146, + "p95": 498.9440143108368, + "p99": 548.192024230957 }, "isolatedSum": { - "p50": 1745.7919716835022, - "p90": 1761.6959810256958, - "p95": 1767.9999470710754, - "p99": 1789.56800699234 + "p50": 507.8720152378082, + "p90": 542.1440005302429, + "p95": 558.1440106034279, + "p99": 625.2799853682518 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 446817280, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 4, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1878.9119720458984, - "p90": 1891.3919925689697, - "p95": 1897.055983543396, - "p99": 1933.2799911499023 + "p50": 430.88001012802124, + "p90": 466.0159945487976, + "p95": 493.151992559433, + "p99": 558.9439868927002 }, "combine": { - "p50": 1474.8159646987915, - "p90": 1484.8320484161377, - "p95": 1491.3280010223389, - "p99": 1509.2159509658813 + "p50": 79.1039988398552, + "p90": 86.84799820184708, + "p95": 92.51199662685394, + "p99": 112.70400136709213 }, "roundtrip": { - "p50": 3333.631992340088, - "p90": 3347.424030303955, - "p95": 3355.1039695739746, - "p99": 3383.3279609680176 + "p50": 476.063996553421, + "p90": 496.3200092315674, + "p95": 506.335973739624, + "p99": 553.8560152053833 }, "isolatedSum": { - "p50": 3353.72793674469, - "p90": 3376.2240409851074, - "p95": 3388.383984565735, - "p99": 3442.4959421157837 + "p50": 509.98400896787643, + "p90": 552.8639927506447, + "p95": 585.6639891862869, + "p99": 671.6479882597923 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 893132800, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 4, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 430.88001012802124, + "p90": 456.1919867992401, + "p95": 464.54399824142456, + "p99": 488.6400103569031 + }, + "combine": { + "p50": 93.98400038480759, + "p90": 101.27999633550644, + "p95": 104.67199981212616, + "p99": 110.944002866745 + }, + "roundtrip": { + "p50": 486.55998706817627, + "p90": 509.0240240097046, + "p95": 521.5680003166199, + "p99": 577.5359869003296 + }, + "isolatedSum": { + "p50": 524.8640105128288, + "p90": 557.4719831347466, + "p95": 569.2159980535507, + "p99": 599.5840132236481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 433.98401141166687, + "p90": 461.5359902381897, + "p95": 473.4399914741516, + "p99": 519.5840001106262 + }, + "combine": { + "p50": 110.49599945545197, + "p90": 118.33599954843521, + "p95": 121.15199863910675, + "p99": 134.8479986190796 + }, + "roundtrip": { + "p50": 508.54402780532837, + "p90": 526.9759893417358, + "p95": 534.8160266876221, + "p99": 572.2560286521912 + }, + "isolatedSum": { + "p50": 544.4800108671188, + "p90": 579.8719897866249, + "p95": 594.5919901132584, + "p99": 654.4319987297058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28037,28 +28057,28 @@ ] }, { - "id": "cx-a5fb5961", - "identity": "b300|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "6214ef692f2daf2b", + "id": "cx-5095ae79", + "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "e0aaecfc18971490", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:24.890661+00:00", + "generatedAt": "2026-06-29T14:10:09.168558+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", + "label": "GB300 EP8 · deepep · fp8", "model": "MiniMax-M3", "shape": { "hidden": 6144, @@ -28070,14 +28090,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -28085,276 +28106,350 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287498289", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287498289", - "createdAt": "2026-06-27T11:14:24.890661+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 81.18399977684021, - "p90": 83.83999764919281, - "p95": 84.95999872684479, - "p99": 91.90399944782257 + "p50": 427.67998576164246, + "p90": 451.84001326560974, + "p95": 460.1280093193054, + "p99": 497.24799394607544 }, "combine": { - "p50": 102.27199643850327, - "p90": 105.40799796581268, - "p95": 106.36799782514572, - "p99": 112.99200356006622 + "p50": 71.3919997215271, + "p90": 76.12799853086472, + "p95": 79.19999957084656, + "p99": 83.00799876451492 }, "roundtrip": { - "p50": 204.96000349521637, - "p90": 208.41600000858307, - "p95": 210.30400693416595, - "p99": 227.743998169899 + "p50": 471.52000665664673, + "p90": 494.1120147705078, + "p95": 502.1119713783264, + "p99": 534.8160266876221 }, "isolatedSum": { - "p50": 183.45599621534348, - "p90": 189.2479956150055, - "p95": 191.3279965519905, - "p99": 204.8960030078888 + "p50": 499.07198548316956, + "p90": 527.9680117964745, + "p95": 539.328008890152, + "p99": 580.2559927105904 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 113.40799927711487, - "p90": 116.70400202274323, - "p95": 118.6240017414093, - "p99": 128.80000472068787 + "p50": 431.90398812294006, + "p90": 452.9919922351837, + "p95": 459.6799910068512, + "p99": 501.9199848175049 }, "combine": { - "p50": 140.9599930047989, - "p90": 143.93599331378937, - "p95": 145.31199634075165, - "p99": 153.4080058336258 + "p50": 70.68800181150436, + "p90": 76.4160007238388, + "p95": 79.00799810886383, + "p99": 88.3840024471283 }, "roundtrip": { - "p50": 306.0159981250763, - "p90": 310.8159899711609, - "p95": 313.2160007953644, - "p99": 340.5759930610657 + "p50": 478.5600006580353, + "p90": 510.591983795166, + "p95": 535.5200171470642, + "p99": 614.687979221344 }, "isolatedSum": { - "p50": 254.36799228191376, - "p90": 260.6399953365326, - "p95": 263.93599808216095, - "p99": 282.20801055431366 + "p50": 502.5919899344444, + "p90": 529.4079929590225, + "p95": 538.687989115715, + "p99": 590.3039872646332 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 173.43999445438385, - "p90": 178.30400168895721, - "p95": 179.967999458313, - "p99": 184.54399704933167 + "p50": 431.4880073070526, + "p90": 456.959992647171, + "p95": 466.2080109119415, + "p99": 506.6559910774231 }, "combine": { - "p50": 239.04000222682953, - "p90": 245.27999758720398, - "p95": 247.23200500011444, - "p99": 258.59200954437256 + "p50": 73.31199944019318, + "p90": 79.03999835252762, + "p95": 83.03999900817871, + "p99": 87.26400136947632 }, "roundtrip": { - "p50": 515.4240131378174, - "p90": 521.5039849281311, - "p95": 525.0880122184753, - "p99": 547.4560260772705 + "p50": 477.728009223938, + "p90": 497.72799015045166, + "p95": 505.21600246429443, + "p99": 524.5440006256104 }, "isolatedSum": { - "p50": 412.4799966812134, - "p90": 423.5839992761612, - "p95": 427.20000445842743, - "p99": 443.1360065937042 + "p50": 504.8000067472458, + "p90": 535.9999909996986, + "p95": 549.2480099201202, + "p99": 593.9199924468994 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 290.52799940109253, - "p90": 296.35199904441833, - "p95": 299.1040050983429, - "p99": 307.93601274490356 + "p50": 427.90400981903076, + "p90": 453.18400859832764, + "p95": 460.4479968547821, + "p99": 525.3440141677856 }, "combine": { - "p50": 438.6560022830963, - "p90": 443.4239864349365, - "p95": 447.00801372528076, - "p99": 467.3919975757599 + "p50": 74.8480036854744, + "p90": 81.44000172615051, + "p95": 86.36800199747086, + "p99": 95.83999961614609 }, "roundtrip": { - "p50": 922.2720265388489, - "p90": 931.007981300354, - "p95": 934.719979763031, - "p99": 978.7840247154236 + "p50": 472.28801250457764, + "p90": 500.8000135421753, + "p95": 514.4000053405762, + "p99": 534.9119901657104 }, "isolatedSum": { - "p50": 729.1840016841888, - "p90": 739.7759854793549, - "p95": 746.1120188236237, - "p99": 775.3280103206635 + "p50": 502.75201350450516, + "p90": 534.6240103244781, + "p95": 546.815998852253, + "p99": 621.1840137839317 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 532.3519706726074, - "p90": 539.2640233039856, - "p95": 543.5519814491272, - "p99": 568.0000185966492 + "p50": 428.19198966026306, + "p90": 454.912006855011, + "p95": 463.4239971637726, + "p99": 491.90399050712585 }, "combine": { - "p50": 796.064019203186, - "p90": 802.4960160255432, - "p95": 809.5679879188538, - "p99": 841.3119912147522 + "p50": 76.51200145483017, + "p90": 82.94399827718735, + "p95": 85.95199882984161, + "p99": 92.3520028591156 }, "roundtrip": { - "p50": 1721.9840288162231, - "p90": 1732.4479818344116, - "p95": 1739.743947982788, - "p99": 1767.4560546875 + "p50": 472.351998090744, + "p90": 494.4640100002289, + "p95": 501.8879771232605, + "p99": 524.9599814414978 }, "isolatedSum": { - "p50": 1328.4159898757935, - "p90": 1341.7600393295288, - "p95": 1353.119969367981, - "p99": 1409.3120098114014 + "p50": 504.70399111509323, + "p90": 537.8560051321983, + "p95": 549.3759959936142, + "p99": 584.2559933662415 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1001.7600059509277, - "p90": 1012.6080513000488, - "p95": 1018.2720422744751, - "p99": 1040.3200387954712 + "p50": 432.6080083847046, + "p90": 458.9439928531647, + "p95": 469.08798813819885, + "p99": 504.57602739334106 }, "combine": { - "p50": 1498.3359575271606, - "p90": 1507.5520277023315, - "p95": 1513.983964920044, - "p99": 1537.0559692382812 + "p50": 80.64000308513641, + "p90": 86.43200248479843, + "p95": 91.10400080680847, + "p99": 96.70399874448776 }, "roundtrip": { - "p50": 3295.1040267944336, - "p90": 3310.1439476013184, - "p95": 3322.4000930786133, - "p99": 3358.4959506988525 + "p50": 472.0959961414337, + "p90": 492.15999245643616, + "p95": 499.7439980506897, + "p99": 512.2560262680054 }, "isolatedSum": { - "p50": 2500.0959634780884, - "p90": 2520.1600790023804, - "p95": 2532.256007194519, - "p99": 2577.3760080337524 + "p50": 513.248011469841, + "p90": 545.3759953379631, + "p95": 560.1919889450073, + "p99": 601.2800261378288 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 430.7839870452881, + "p90": 454.43201065063477, + "p95": 462.20800280570984, + "p99": 486.33599281311035 + }, + "combine": { + "p50": 94.14400160312653, + "p90": 100.41599720716476, + "p95": 103.74400019645691, + "p99": 108.92800241708755 + }, + "roundtrip": { + "p50": 484.3519926071167, + "p90": 501.21599435806274, + "p95": 507.32797384262085, + "p99": 533.951997756958 + }, + "isolatedSum": { + "p50": 524.9279886484146, + "p90": 554.8480078577995, + "p95": 565.9520030021667, + "p99": 595.2639952301979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 433.27999114990234, + "p90": 455.1039934158325, + "p95": 462.94400095939636, + "p99": 492.2240078449249 + }, + "combine": { + "p50": 114.68800157308578, + "p90": 121.15199863910675, + "p95": 124.03199821710587, + "p99": 128.63999605178833 + }, + "roundtrip": { + "p50": 505.2800178527832, + "p90": 527.679979801178, + "p95": 535.1359844207764, + "p99": 585.312008857727 + }, + "isolatedSum": { + "p50": 547.9679927229881, + "p90": 576.2559920549393, + "p95": 586.9759991765022, + "p99": 620.8640038967133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, { - "id": "cx-fba134bd", - "identity": "b300|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_c4c63f07", - "comparisonKey": "690e54d4fc20f43e", + "id": "cx-d75d6ecc", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_c4ac4643", + "comparisonKey": "817cf09679b30bf0", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:52:55.540924+00:00", + "generatedAt": "2026-06-29T13:49:31.371479+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "MiniMax-M3", + "label": "GB300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 6144, + "hidden": 7168, "topk": 8, "experts": 256, "routing": "uniform", @@ -28363,14 +28458,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -28378,244 +28474,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285718802", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285718802", - "createdAt": "2026-06-27T09:52:55.540924+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 156.67200088500977, - "p90": 160.863995552063, - "p95": 164.48000073432922, - "p99": 179.03999984264374 + "p50": 94.78399902582169, + "p90": 109.31199789047241, + "p95": 112.70400136709213, + "p99": 122.94399738311768 }, "combine": { - "p50": 101.6639992594719, - "p90": 103.67999970912933, - "p95": 104.3199971318245, - "p99": 107.26399719715118 + "p50": 73.05599749088287, + "p90": 80.03199845552444, + "p95": 83.42400193214417, + "p99": 89.66399729251862 }, "roundtrip": { - "p50": 251.3599991798401, - "p90": 255.23200631141663, - "p95": 258.87998938560486, - "p99": 285.7919931411743 + "p50": 249.34400618076324, + "p90": 270.30399441719055, + "p95": 275.90399980545044, + "p99": 288.8000011444092 }, "isolatedSum": { - "p50": 258.33600014448166, - "p90": 264.5439952611923, - "p95": 268.7999978661537, - "p99": 286.3039970397949 + "p50": 167.83999651670456, + "p90": 189.34399634599686, + "p95": 196.1280032992363, + "p99": 212.6079946756363 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 210.40000021457672, - "p90": 215.07200598716736, - "p95": 218.9439982175827, - "p99": 230.880007147789 + "p50": 96.25600278377533, + "p90": 109.15199667215347, + "p95": 114.68800157308578, + "p99": 125.34399330615997 }, "combine": { - "p50": 140.86399972438812, - "p90": 144.6080058813095, - "p95": 145.7280069589615, - "p99": 171.29600048065186 + "p50": 75.80800354480743, + "p90": 81.727996468544, + "p95": 84.25600081682205, + "p99": 91.87199920415878 }, "roundtrip": { - "p50": 349.95201230049133, - "p90": 354.2720079421997, - "p95": 357.91999101638794, - "p99": 378.62399220466614 + "p50": 253.4399926662445, + "p90": 272.3200023174286, + "p95": 277.536004781723, + "p99": 288.60801458358765 }, "isolatedSum": { - "p50": 351.26399993896484, - "p90": 359.68001186847687, - "p95": 364.6720051765442, - "p99": 402.17600762844086 + "p50": 172.06400632858276, + "p90": 190.87999314069748, + "p95": 198.94400238990784, + "p99": 217.21599251031876 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 335.58401465415955, - "p90": 340.12800455093384, - "p95": 341.15201234817505, - "p99": 352.28800773620605 + "p50": 94.14400160312653, + "p90": 108.89600217342377, + "p95": 114.27199840545654, + "p99": 136.4479959011078 }, "combine": { - "p50": 239.1359955072403, - "p90": 245.02399563789368, - "p95": 247.13599681854248, - "p99": 252.70399451255798 + "p50": 74.68800246715546, + "p90": 80.60800284147263, + "p95": 84.16000008583069, + "p99": 87.07199990749359 }, "roundtrip": { - "p50": 575.872004032135, - "p90": 582.8160047531128, - "p95": 585.4079723358154, - "p99": 596.6399908065796 + "p50": 248.28800559043884, + "p90": 267.10399985313416, + "p95": 275.6800055503845, + "p99": 288.7359857559204 }, "isolatedSum": { - "p50": 574.7200101613998, - "p90": 585.1520001888275, - "p95": 588.2880091667175, - "p99": 604.992002248764 + "p50": 168.83200407028198, + "p90": 189.5040050148964, + "p95": 198.43199849128723, + "p99": 223.51999580860138 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 584.1599702835083, - "p90": 589.8879766464233, - "p95": 592.2240018844604, - "p99": 601.472020149231 + "p50": 94.84799951314926, + "p90": 107.07200318574905, + "p95": 111.55200004577637, + "p99": 122.72000312805176 }, "combine": { - "p50": 437.5999867916107, - "p90": 441.8880045413971, - "p95": 445.43999433517456, - "p99": 456.7039906978607 + "p50": 77.56800204515457, + "p90": 83.45600217580795, + "p95": 86.27200126647949, + "p99": 91.2960022687912 }, "roundtrip": { - "p50": 1006.943941116333, - "p90": 1015.2640342712402, - "p95": 1019.10400390625, - "p99": 1030.9120416641235 + "p50": 251.23199820518494, + "p90": 268.92799139022827, + "p95": 274.3360102176666, + "p99": 284.2879891395569 }, "isolatedSum": { - "p50": 1021.759957075119, - "p90": 1031.7759811878204, - "p95": 1037.663996219635, - "p99": 1058.1760108470917 + "p50": 172.41600155830383, + "p90": 190.528005361557, + "p95": 197.82400131225586, + "p99": 214.01600539684296 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1078.8160562515259, - "p90": 1086.2720012664795, - "p95": 1088.8639688491821, - "p99": 1102.6240587234497 + "p50": 95.90400010347366, + "p90": 108.73600095510483, + "p95": 114.04799669981003, + "p99": 122.52800166606903 }, "combine": { - "p50": 797.0240116119385, - "p90": 804.7360181808472, - "p95": 809.9200129508972, - "p99": 828.2560110092163 + "p50": 79.26400005817413, + "p90": 85.40800213813782, + "p95": 89.1840010881424, + "p99": 94.87999975681305 }, "roundtrip": { - "p50": 1859.071969985962, - "p90": 1870.6560134887695, - "p95": 1876.1919736862183, - "p99": 1887.3920440673828 + "p50": 251.67998671531677, + "p90": 271.93599939346313, + "p95": 277.47198939323425, + "p99": 294.97599601745605 }, "isolatedSum": { - "p50": 1875.8400678634644, - "p90": 1891.0080194473267, - "p95": 1898.7839818000793, - "p99": 1930.880069732666 + "p50": 175.1680001616478, + "p90": 194.14400309324265, + "p95": 203.23199778795242, + "p99": 217.40800142288208 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2078.239917755127, - "p90": 2087.264060974121, - "p95": 2091.3920402526855, - "p99": 2107.840061187744 + "p50": 95.83999961614609, + "p90": 108.5439994931221, + "p95": 113.24799805879593, + "p99": 121.56800180673599 }, "combine": { - "p50": 1500.2559423446655, - "p90": 1509.8240375518799, - "p95": 1514.6880149841309, - "p99": 1528.4160375595093 + "p50": 83.77599716186523, + "p90": 90.2400016784668, + "p95": 92.99200028181076, + "p99": 99.29600358009338 }, "roundtrip": { - "p50": 3560.703992843628, - "p90": 3572.9920864105225, - "p95": 3578.847885131836, - "p99": 3600.7680892944336 + "p50": 259.16799902915955, + "p90": 278.52800488471985, + "p95": 283.4239900112152, + "p99": 288.5119915008545 }, "isolatedSum": { - "p50": 3578.4958600997925, - "p90": 3597.088098526001, - "p95": 3606.0800552368164, - "p99": 3636.2560987472534 + "p50": 179.61599677801132, + "p90": 198.7840011715889, + "p95": 206.2399983406067, + "p99": 220.86400538682938 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.61599856615067, + "p90": 111.64800077676773, + "p95": 114.94400352239609, + "p99": 127.29600071907043 + }, + "combine": { + "p50": 98.36799651384354, + "p90": 105.05600273609161, + "p95": 107.80800133943558, + "p99": 112.57600039243698 + }, + "roundtrip": { + "p50": 271.7440128326416, + "p90": 290.336012840271, + "p95": 294.14400458335876, + "p99": 308.8639974594116 + }, + "isolatedSum": { + "p50": 197.9839950799942, + "p90": 216.70400351285934, + "p95": 222.75200486183167, + "p99": 239.87200111150742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.78400164842606, + "p90": 119.64800208806992, + "p95": 122.78400361537933, + "p99": 135.5839967727661 + }, + "combine": { + "p50": 118.23999881744385, + "p90": 124.12799894809723, + "p95": 127.10399925708771, + "p99": 132.09599256515503 + }, + "roundtrip": { + "p50": 294.65600848197937, + "p90": 311.2959861755371, + "p95": 316.5439963340759, + "p99": 326.3680040836334 + }, + "isolatedSum": { + "p50": 229.0240004658699, + "p90": 243.77600103616714, + "p95": 249.88800287246704, + "p99": 267.67998933792114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28623,28 +28793,28 @@ ] }, { - "id": "cx-67e5feea", - "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "ff71982761f18df0", + "id": "cx-7733ba4c", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "44ee0b05a8b4a1e8", "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:31.663724+00:00", + "generatedAt": "2026-06-29T13:53:40.208752+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", + "label": "GB300 EP8 · deepep · fp8", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -28656,14 +28826,15 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -28671,244 +28842,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28286436120", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120", - "createdAt": "2026-06-27T10:26:31.663724+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 1799.5840311050415, - "p90": 2587.9039764404297, - "p95": 2896.159887313843, - "p99": 3459.968090057373 + "p50": 437.50399351119995, + "p90": 466.1119878292084, + "p95": 474.8480021953583, + "p99": 494.1120147705078 }, "combine": { - "p50": 1817.7920579910278, - "p90": 2162.816047668457, - "p95": 2672.192096710205, - "p99": 2924.3199825286865 + "p50": 77.02399790287018, + "p90": 82.97599852085114, + "p95": 85.69599688053131, + "p99": 88.92799913883209 }, "roundtrip": { - "p50": 1977.4080514907837, - "p90": 2173.4719276428223, - "p95": 2860.5120182037354, - "p99": 3130.8159828186035 + "p50": 484.8960041999817, + "p90": 512.9280090332031, + "p95": 523.4240293502808, + "p99": 548.6720204353333 }, "isolatedSum": { - "p50": 3617.3760890960693, - "p90": 4750.720024108887, - "p95": 5568.351984024048, - "p99": 6384.28807258606 + "p50": 514.5279914140701, + "p90": 549.0879863500595, + "p95": 560.5439990758896, + "p99": 583.0400139093399 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 1846.0479974746704, - "p90": 2604.5761108398438, - "p95": 2895.456075668335, - "p99": 3439.487934112549 + "p50": 449.5680034160614, + "p90": 479.99998927116394, + "p95": 490.1440143585205, + "p99": 515.3279900550842 }, "combine": { - "p50": 1870.6239461898804, - "p90": 2174.5920181274414, - "p95": 2705.2159309387207, - "p99": 3008.8319778442383 + "p50": 78.14399898052216, + "p90": 86.496002972126, + "p95": 91.48799628019333, + "p99": 133.31200182437897 }, "roundtrip": { - "p50": 2121.920108795166, - "p90": 2273.087978363037, - "p95": 2978.7840843200684, - "p99": 3390.048027038574 + "p50": 495.10401487350464, + "p90": 525.1200199127197, + "p95": 533.8559746742249, + "p99": 566.4960145950317 }, "isolatedSum": { - "p50": 3716.671943664551, - "p90": 4779.168128967285, - "p95": 5600.672006607056, - "p99": 6448.319911956787 + "p50": 527.7120023965836, + "p90": 566.49599224329, + "p95": 581.6320106387138, + "p99": 648.6399918794632 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 1958.3040475845337, - "p90": 2819.5838928222656, - "p95": 3096.895933151245, - "p99": 5452.991962432861 + "p50": 440.5759871006012, + "p90": 470.43201327323914, + "p95": 480.9280037879944, + "p99": 524.5760083198547 }, "combine": { - "p50": 1994.7839975357056, - "p90": 2250.5600452423096, - "p95": 2893.791913986206, - "p99": 3337.984085083008 + "p50": 78.78399640321732, + "p90": 85.79199761152267, + "p95": 88.60799670219421, + "p99": 95.29600292444229 }, "roundtrip": { - "p50": 2347.584009170532, - "p90": 2880.44810295105, - "p95": 3284.991979598999, - "p99": 3777.6639461517334 + "p50": 491.2959933280945, + "p90": 518.9759731292725, + "p95": 524.7359871864319, + "p99": 553.2799959182739 }, "isolatedSum": { - "p50": 3953.0880451202393, - "p90": 5070.143938064575, - "p95": 5990.687847137451, - "p99": 8790.97604751587 + "p50": 519.3599835038185, + "p90": 556.2240108847618, + "p95": 569.5360004901886, + "p99": 619.872011244297 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 443.83999705314636, + "p90": 471.6480076313019, + "p95": 480.6079864501953, + "p99": 500.19198656082153 + }, + "combine": { + "p50": 81.37600123882294, + "p90": 88.28800171613693, + "p95": 92.6399976015091, + "p99": 108.92800241708755 + }, + "roundtrip": { + "p50": 490.84800481796265, + "p90": 514.303982257843, + "p95": 522.5279927253723, + "p99": 539.2320156097412 + }, + "isolatedSum": { + "p50": 525.2159982919693, + "p90": 559.9360093474388, + "p95": 573.2479840517044, + "p99": 609.1199889779091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 2067.199945449829, - "p90": 2887.7758979797363, - "p95": 3118.6559200286865, - "p99": 3810.5599880218506 + "p50": 438.2399916648865, + "p90": 472.6719856262207, + "p95": 483.90400409698486, + "p99": 500.0320076942444 }, "combine": { - "p50": 2245.4400062561035, - "p90": 2792.095899581909, - "p95": 3188.5440349578857, - "p99": 3587.552070617676 + "p50": 83.83999764919281, + "p90": 90.36800265312195, + "p95": 94.40000355243683, + "p99": 98.49599748849869 }, "roundtrip": { - "p50": 2770.080089569092, - "p90": 2971.872091293335, - "p95": 3523.7441062927246, - "p99": 3988.640069961548 + "p50": 487.45599389076233, + "p90": 519.8400020599365, + "p95": 528.2559990882874, + "p99": 541.0240292549133 }, "isolatedSum": { - "p50": 4312.639951705933, - "p90": 5679.8717975616455, - "p95": 6307.199954986572, - "p99": 7398.112058639526 + "p50": 522.0799893140793, + "p90": 563.0399882793427, + "p95": 578.3040076494217, + "p99": 598.5280051827431 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2318.943977355957, - "p90": 2845.599889755249, - "p95": 3288.3200645446777, - "p99": 3567.9359436035156 + "p50": 443.36000084877014, + "p90": 471.5520143508911, + "p95": 478.4959852695465, + "p99": 490.3999865055084 }, "combine": { - "p50": 2601.759910583496, - "p90": 2804.192066192627, - "p95": 3261.3439559936523, - "p99": 3862.2400760650635 + "p50": 87.77599781751633, + "p90": 94.33600306510925, + "p95": 96.73599898815155, + "p99": 104.99200224876404 }, "roundtrip": { - "p50": 3612.5121116638184, - "p90": 4097.760200500488, - "p95": 4626.783847808838, - "p99": 6537.69588470459 + "p50": 496.832013130188, + "p90": 522.271990776062, + "p95": 527.9039740562439, + "p99": 549.3760108947754 }, "isolatedSum": { - "p50": 4920.703887939453, - "p90": 5649.791955947876, - "p95": 6549.66402053833, - "p99": 7430.176019668579 + "p50": 531.1359986662865, + "p90": 565.8880174160004, + "p95": 575.2319842576981, + "p99": 595.3919887542725 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 6, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 2815.4239654541016, - "p90": 3583.904027938843, - "p95": 3803.584098815918, - "p99": 4226.624011993408 + "p50": 442.49600172042847, + "p90": 474.2720127105713, + "p95": 480.3520143032074, + "p99": 500.5760192871094 }, "combine": { - "p50": 3305.2799701690674, - "p90": 3407.8400135040283, - "p95": 3562.688112258911, - "p99": 4382.976055145264 + "p50": 102.39999741315842, + "p90": 109.21599715948105, + "p95": 113.27999830245972, + "p99": 117.72800236940384 }, "roundtrip": { - "p50": 5279.6478271484375, - "p90": 5909.920215606689, - "p95": 6326.015949249268, - "p99": 6807.90376663208 + "p50": 505.9199929237366, + "p90": 532.3839783668518, + "p95": 538.8799905776978, + "p99": 555.2319884300232 }, "isolatedSum": { - "p50": 6120.703935623169, - "p90": 6991.744041442871, - "p95": 7366.272211074829, - "p99": 8609.600067138672 + "p50": 544.8959991335869, + "p90": 583.4880098700523, + "p95": 593.6320126056671, + "p99": 618.3040216565132 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 443.83999705314636, + "p90": 475.67999362945557, + "p95": 483.5839867591858, + "p99": 507.423996925354 + }, + "combine": { + "p50": 122.56000190973282, + "p90": 128.22400033473969, + "p95": 130.5920034646988, + "p99": 135.0719928741455 + }, + "roundtrip": { + "p50": 527.9359817504883, + "p90": 553.4719824790955, + "p95": 560.4479908943176, + "p99": 581.1840295791626 + }, + "isolatedSum": { + "p50": 566.3999989628792, + "p90": 603.9039939641953, + "p95": 614.1759902238846, + "p99": 642.4959897994995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -28916,47 +29161,48 @@ ] }, { - "id": "cx-45b4616a", - "identity": "b300|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_c4c63f07", - "comparisonKey": "56fe7b02fd8e6b1a", + "id": "cx-8eb16503", + "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "b2acbf95773921f9", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:04.276703+00:00", + "generatedAt": "2026-06-29T13:55:48.622903+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -28964,244 +29210,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285674665", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285674665", - "createdAt": "2026-06-27T09:51:04.276703+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 158.01599621772766, - "p90": 162.6559942960739, - "p95": 164.48000073432922, - "p99": 177.88800597190857 + "p50": 442.30398535728455, + "p90": 470.335990190506, + "p95": 489.9199903011322, + "p99": 534.1439843177795 }, "combine": { - "p50": 108.35199803113937, - "p90": 110.43199896812439, - "p95": 111.455999314785, - "p99": 118.56000125408173 + "p50": 73.56800138950348, + "p90": 80.6720033288002, + "p95": 85.69599688053131, + "p99": 125.02400577068329 }, "roundtrip": { - "p50": 259.39199328422546, - "p90": 266.36800169944763, - "p95": 268.22400093078613, - "p99": 283.55199098587036 + "p50": 482.04800486564636, + "p90": 516.0639882087708, + "p95": 532.6399803161621, + "p99": 585.8240127563477 }, "isolatedSum": { - "p50": 266.36799424886703, - "p90": 273.0879932641983, - "p95": 275.9360000491142, - "p99": 296.4480072259903 + "p50": 515.871986746788, + "p90": 551.0079935193062, + "p95": 575.6159871816635, + "p99": 659.1679900884628 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 219.32800114154816, - "p90": 222.52799570560455, - "p95": 223.55200350284576, - "p99": 230.335995554924 + "p50": 446.9760060310364, + "p90": 470.97599506378174, + "p95": 484.25599932670593, + "p99": 524.6719717979431 }, "combine": { - "p50": 152.28800475597382, - "p90": 155.74400126934052, - "p95": 157.4079990386963, - "p99": 167.64800250530243 + "p50": 75.32799988985062, + "p90": 84.03199911117554, + "p95": 88.25600147247314, + "p99": 120.35199999809265 }, "roundtrip": { - "p50": 371.36000394821167, - "p90": 375.5199909210205, - "p95": 376.99198722839355, - "p99": 389.0239894390106 + "p50": 489.79198932647705, + "p90": 513.375997543335, + "p95": 520.9919810295105, + "p99": 565.8559799194336 }, "isolatedSum": { - "p50": 371.616005897522, - "p90": 378.27199697494507, - "p95": 380.96000254154205, - "p99": 397.98399806022644 + "p50": 522.304005920887, + "p90": 555.0079941749573, + "p95": 572.5120007991791, + "p99": 645.0239717960358 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 360.9600067138672, - "p90": 365.1840090751648, - "p95": 367.23199486732483, - "p99": 394.9120044708252 + "p50": 426.2720048427582, + "p90": 454.367995262146, + "p95": 460.57599782943726, + "p99": 502.52801179885864 }, "combine": { - "p50": 264.5759880542755, - "p90": 268.7680125236511, - "p95": 271.232008934021, - "p99": 281.76000714302063 + "p50": 75.42400062084198, + "p90": 83.03999900817871, + "p95": 87.13600039482117, + "p99": 133.18400084972382 }, "roundtrip": { - "p50": 614.7840023040771, - "p90": 620.9279894828796, - "p95": 623.6799955368042, - "p99": 633.4720253944397 + "p50": 470.2399969100952, + "p90": 500.5760192871094, + "p95": 513.4080052375793, + "p99": 566.9119954109192 }, "isolatedSum": { - "p50": 625.5359947681427, - "p90": 633.9520215988159, - "p95": 638.4640038013458, - "p99": 676.6720116138458 + "p50": 501.69600546360016, + "p90": 537.4079942703247, + "p95": 547.7119982242584, + "p99": 635.7120126485825 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 627.9360055923462, - "p90": 633.5999965667725, - "p95": 635.8720064163208, - "p99": 650.111973285675 + "p50": 447.1360146999359, + "p90": 469.05601024627686, + "p95": 477.4399995803833, + "p99": 518.1440114974976 }, "combine": { - "p50": 453.0239999294281, - "p90": 457.66401290893555, - "p95": 460.31999588012695, - "p99": 473.56799244880676 + "p50": 79.32800054550171, + "p90": 86.46400272846222, + "p95": 89.37600255012512, + "p99": 105.27999699115753 }, "roundtrip": { - "p50": 1066.5600299835205, - "p90": 1073.6639499664307, - "p95": 1077.5359869003296, - "p99": 1090.1119709014893 + "p50": 490.1440143585205, + "p90": 511.74402236938477, + "p95": 549.6000051498413, + "p99": 588.0320072174072 }, "isolatedSum": { - "p50": 1080.9600055217743, - "p90": 1091.264009475708, - "p95": 1096.1920022964478, - "p99": 1123.6799657344818 + "p50": 526.4640152454376, + "p90": 555.5200129747391, + "p95": 566.8160021305084, + "p99": 623.4240084886551 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1180.3840398788452, - "p90": 1187.1999502182007, - "p95": 1190.4640197753906, - "p99": 1241.3439750671387 + "p50": 439.7760033607483, + "p90": 462.5599980354309, + "p95": 470.2399969100952, + "p99": 483.0079972743988 }, "combine": { - "p50": 815.3600096702576, - "p90": 822.5280046463013, - "p95": 825.8879780769348, - "p99": 834.2080116271973 + "p50": 80.54400235414505, + "p90": 86.91199868917465, + "p95": 91.39200299978256, + "p99": 125.98399817943573 }, "roundtrip": { - "p50": 1978.0479669570923, - "p90": 1988.8639450073242, - "p95": 1993.8240051269531, - "p99": 2242.1441078186035 + "p50": 485.56798696517944, + "p90": 505.0240159034729, + "p95": 513.1840109825134, + "p99": 553.2799959182739 }, "isolatedSum": { - "p50": 1995.7440495491028, - "p90": 2009.727954864502, - "p95": 2016.3519978523254, - "p99": 2075.551986694336 + "p50": 520.3200057148933, + "p90": 549.4719967246056, + "p95": 561.6319999098778, + "p99": 608.9919954538345 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2262.399911880493, - "p90": 2271.775960922241, - "p95": 2276.8959999084473, - "p99": 2323.2638835906982 + "p50": 438.62399458885193, + "p90": 462.43199706077576, + "p95": 473.31199049949646, + "p99": 510.24001836776733 }, "combine": { - "p50": 1527.232050895691, - "p90": 1535.6800556182861, - "p95": 1539.29603099823, - "p99": 1596.2879657745361 + "p50": 85.75999736785889, + "p90": 92.25600212812424, + "p95": 98.62399846315384, + "p99": 112.22399771213531 }, "roundtrip": { - "p50": 3780.895948410034, - "p90": 3792.6719188690186, - "p95": 3798.464059829712, - "p99": 3837.4719619750977 + "p50": 490.6559884548187, + "p90": 509.7600221633911, + "p95": 517.087996006012, + "p99": 546.7519760131836 }, "isolatedSum": { - "p50": 3789.631962776184, - "p90": 3807.4560165405273, - "p95": 3816.1920309066772, - "p99": 3919.5518493652344 + "p50": 524.3839919567108, + "p90": 554.6879991889, + "p95": 571.9359889626503, + "p99": 622.4640160799026 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 439.6800100803375, + "p90": 462.5599980354309, + "p95": 470.14400362968445, + "p99": 541.920006275177 + }, + "combine": { + "p50": 100.12800246477127, + "p90": 108.64000022411346, + "p95": 110.81600189208984, + "p99": 137.92000710964203 + }, + "roundtrip": { + "p50": 502.6879906654358, + "p90": 521.7919945716858, + "p95": 529.151976108551, + "p99": 575.7120251655579 + }, + "isolatedSum": { + "p50": 539.8080125451088, + "p90": 571.1999982595444, + "p95": 580.9600055217743, + "p99": 679.840013384819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 438.7199878692627, + "p90": 461.91999316215515, + "p95": 477.05599665641785, + "p99": 517.3439979553223 + }, + "combine": { + "p50": 120.09599804878235, + "p90": 128.7039965391159, + "p95": 133.34399461746216, + "p99": 160.863995552063 + }, + "roundtrip": { + "p50": 527.1360278129578, + "p90": 547.2319722175598, + "p95": 557.5680136680603, + "p99": 616.927981376648 + }, + "isolatedSum": { + "p50": 558.815985918045, + "p90": 590.6239897012711, + "p95": 610.39999127388, + "p99": 678.2079935073853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29209,47 +29529,48 @@ ] }, { - "id": "cx-d208a3bd", - "identity": "b300|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "b300_d6fd14c3", - "comparisonKey": "5ca15c20f75abaa9", + "id": "cx-f16587a8", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b1b733fb", + "comparisonKey": "1ce91864f23d9173", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:09.340656+00:00", + "generatedAt": "2026-06-29T13:51:35.154469+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", + "phase": "decode", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Kimi-K2", + "label": "GB300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -29257,244 +29578,318 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287503879", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503879", - "createdAt": "2026-06-27T11:14:09.340656+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 86.27200126647949, - "p90": 89.12000060081482, - "p95": 90.30400216579437, - "p99": 98.30400347709656 + "p50": 78.23999971151352, + "p90": 109.24799740314484, + "p95": 128.4479945898056, + "p99": 150.62400698661804 }, "combine": { - "p50": 108.86400192975998, - "p90": 110.97600311040878, - "p95": 112.2559979557991, - "p99": 117.76000261306763 + "p50": 75.45600086450577, + "p90": 111.64800077676773, + "p95": 116.31999909877777, + "p99": 137.85600662231445 }, "roundtrip": { - "p50": 221.18400037288666, - "p90": 224.99200701713562, - "p95": 226.68799757957458, - "p99": 240.12799561023712 + "p50": 230.04800081253052, + "p90": 263.0079984664917, + "p95": 269.567996263504, + "p99": 295.199990272522 }, "isolatedSum": { - "p50": 195.13600319623947, - "p90": 200.0960037112236, - "p95": 202.56000012159348, - "p99": 216.06400609016418 + "p50": 153.6960005760193, + "p90": 220.89599817991257, + "p95": 244.76799368858337, + "p99": 288.4800136089325 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 118.04799735546112, - "p90": 121.08799815177917, - "p95": 122.3360002040863, - "p99": 129.85600531101227 + "p50": 88.44800293445587, + "p90": 124.60800260305405, + "p95": 130.68799674510956, + "p99": 139.8719996213913 }, "combine": { - "p50": 157.05600380897522, - "p90": 161.15200519561768, - "p95": 161.95200383663177, - "p99": 169.8240041732788 + "p50": 77.82399654388428, + "p90": 114.94400352239609, + "p95": 131.9040060043335, + "p99": 143.74400675296783 }, "roundtrip": { - "p50": 329.3440043926239, - "p90": 333.5680067539215, - "p95": 335.32801270484924, - "p99": 343.58400106430054 + "p50": 230.56000471115112, + "p90": 258.432000875473, + "p95": 270.27198672294617, + "p99": 287.9999876022339 }, "isolatedSum": { - "p50": 275.10400116443634, - "p90": 282.24000334739685, - "p95": 284.2880040407181, - "p99": 299.6800094842911 + "p50": 166.27199947834015, + "p90": 239.55200612545013, + "p95": 262.59200274944305, + "p99": 283.61600637435913 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 4, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 176.92799866199493, - "p90": 180.67200481891632, - "p95": 182.46400356292725, - "p99": 189.60000574588776 + "p50": 81.56800270080566, + "p90": 114.43199962377548, + "p95": 130.3360015153885, + "p99": 151.5520066022873 + }, + "combine": { + "p50": 76.09599828720093, + "p90": 96.67199850082397, + "p95": 118.1119978427887, + "p99": 140.9280002117157 + }, + "roundtrip": { + "p50": 231.455996632576, + "p90": 267.2959864139557, + "p95": 275.64799785614014, + "p99": 290.6560003757477 + }, + "isolatedSum": { + "p50": 157.6640009880066, + "p90": 211.10399812459946, + "p95": 248.44799935817719, + "p99": 292.480006814003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.36800068616867, + "p90": 105.3759977221489, + "p95": 122.49600142240524, + "p99": 148.00000190734863 }, "combine": { - "p50": 266.975998878479, - "p90": 271.87201380729675, - "p95": 273.6319899559021, - "p99": 285.3119969367981 + "p50": 78.015998005867, + "p90": 105.76000064611435, + "p95": 120.57600170373917, + "p99": 147.039994597435 }, "roundtrip": { - "p50": 550.2079725265503, - "p90": 556.6719770431519, - "p95": 559.328019618988, - "p99": 570.8479881286621 + "p50": 227.26400196552277, + "p90": 254.5279860496521, + "p95": 263.4879946708679, + "p99": 288.38399052619934 }, "isolatedSum": { - "p50": 443.90399754047394, - "p90": 452.5440186262131, - "p95": 456.09599351882935, - "p99": 474.91200268268585 + "p50": 156.38399869203568, + "p90": 211.13599836826324, + "p95": 243.0720031261444, + "p99": 295.03999650478363 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 299.26401376724243, - "p90": 304.57600951194763, - "p95": 306.40000104904175, - "p99": 312.9279911518097 + "p50": 80.48000186681747, + "p90": 118.81600320339203, + "p95": 131.8719983100891, + "p99": 171.74400389194489 }, "combine": { - "p50": 455.9360146522522, - "p90": 462.0479941368103, - "p95": 467.6479995250702, - "p99": 488.5759949684143 + "p50": 81.44000172615051, + "p90": 115.74400216341019, + "p95": 128.25599312782288, + "p99": 149.47199821472168 }, "roundtrip": { - "p50": 977.5360226631165, - "p90": 984.0959906578064, - "p95": 988.3840084075928, - "p99": 1000.1920461654663 + "p50": 231.1359941959381, + "p90": 265.82399010658264, + "p95": 274.2399871349335, + "p99": 290.75199365615845 }, "isolatedSum": { - "p50": 755.2000284194946, - "p90": 766.6240036487579, - "p95": 774.0480005741119, - "p99": 801.503986120224 + "p50": 161.920003592968, + "p90": 234.56000536680222, + "p95": 260.127991437912, + "p99": 321.21600210666656 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 7, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 535.0720286369324, - "p90": 539.3919944763184, - "p95": 542.3679947853088, - "p99": 553.6320209503174 + "p50": 83.0719992518425, + "p90": 118.367999792099, + "p95": 134.14399325847626, + "p99": 146.81600034236908 }, "combine": { - "p50": 812.1280074119568, - "p90": 818.4319734573364, - "p95": 821.120023727417, - "p99": 830.8799862861633 + "p50": 85.4720026254654, + "p90": 123.52000176906586, + "p95": 145.31199634075165, + "p99": 153.24799716472626 }, "roundtrip": { - "p50": 1807.520031929016, - "p90": 1816.864013671875, - "p95": 1821.1840391159058, - "p99": 1864.832043647766 + "p50": 234.40000414848328, + "p90": 270.04799246788025, + "p95": 280.86400032043457, + "p99": 294.94398832321167 }, "isolatedSum": { - "p50": 1347.2000360488892, - "p90": 1357.8239679336548, - "p95": 1363.4880185127258, - "p99": 1384.5120072364807 + "p50": 168.5440018773079, + "p90": 241.88800156116486, + "p95": 279.4559895992279, + "p99": 300.06399750709534 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 7, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 64, + "globalTokens": 512, "dispatch": { - "p50": 1011.9999647140503, - "p90": 1018.9759731292725, - "p95": 1022.5919485092163, - "p99": 1036.6719961166382 + "p50": 84.99199897050858, + "p90": 112.03200370073318, + "p95": 129.37599420547485, + "p99": 150.81599354743958 }, "combine": { - "p50": 1512.671947479248, - "p90": 1519.5200443267822, - "p95": 1524.0000486373901, - "p99": 1541.6959524154663 + "p50": 101.34399682283401, + "p90": 148.03199470043182, + "p95": 153.79199385643005, + "p99": 163.2319986820221 }, "roundtrip": { - "p50": 3455.4879665374756, - "p90": 3466.2721157073975, - "p95": 3470.144033432007, - "p99": 3507.744073867798 + "p50": 254.55999374389648, + "p90": 292.7680015563965, + "p95": 306.68801069259644, + "p99": 338.75200152397156 }, "isolatedSum": { - "p50": 2524.6719121932983, - "p90": 2538.4960174560547, - "p95": 2546.5919971466064, - "p99": 2578.3679485321045 + "p50": 186.3359957933426, + "p90": 260.063998401165, + "p95": 283.1679880619049, + "p99": 314.04799222946167 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 4, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.9200005531311, + "p90": 147.8399932384491, + "p95": 152.19199657440186, + "p99": 160.8320027589798 + }, + "combine": { + "p50": 123.03999811410904, + "p90": 149.98400211334229, + "p95": 158.36800634860992, + "p99": 185.47199666500092 + }, + "roundtrip": { + "p50": 272.96000719070435, + "p90": 310.4639947414398, + "p95": 322.81601428985596, + "p99": 345.34400701522827 + }, + "isolatedSum": { + "p50": 220.95999866724014, + "p90": 297.8239953517914, + "p95": 310.5600029230118, + "p99": 346.3039994239807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29502,292 +29897,367 @@ ] }, { - "id": "cx-252efc4d", - "identity": "b300|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "b300_c4c63f07", - "comparisonKey": "d0265daf2fea0a3e", + "id": "cx-d1adb2c7", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_4ebffb62", + "comparisonKey": "b12bc00db050e57a", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:51:32.842462+00:00", + "generatedAt": "2026-06-29T13:53:46.301476+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_17", - "sku": "b300", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", - "mode": "normal", + "phase": "decode", + "mode": "ll", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8", - "model": "Kimi-K2", + "label": "GB300 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, - "experts": 384, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, - "configuredUnits": 20, - "deviceUnits": 148, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285685489", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285685489", - "createdAt": "2026-06-27T09:51:32.842462+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 164.86400365829468, - "p90": 169.855996966362, - "p95": 173.69599640369415, - "p99": 187.26399540901184 + "p50": 58.27200040221214, + "p90": 63.90400230884552, + "p95": 67.32799857854843, + "p99": 103.7760004401207 }, "combine": { - "p50": 108.2879975438118, - "p90": 110.68800091743469, - "p95": 112.15999722480774, - "p99": 124.64000284671783 + "p50": 73.11999797821045, + "p90": 79.68000322580338, + "p95": 83.67999643087387, + "p99": 113.02399635314941 }, "roundtrip": { - "p50": 267.1999931335449, - "p90": 272.0000147819519, - "p95": 274.7200131416321, - "p99": 301.472008228302 + "p50": 1549.1520166397095, + "p90": 1555.7760000228882, + "p95": 1558.4640502929688, + "p99": 1581.6960334777832 }, "isolatedSum": { - "p50": 273.1520012021065, - "p90": 280.5439978837967, - "p95": 285.8559936285019, - "p99": 311.9039982557297 + "p50": 131.3919983804226, + "p90": 143.5840055346489, + "p95": 151.0079950094223, + "p99": 216.7999967932701 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 221.95200622081757, - "p90": 225.0880002975464, - "p95": 227.84000635147095, - "p99": 246.11200392246246 + "p50": 60.7680007815361, + "p90": 66.30399823188782, + "p95": 67.90400296449661, + "p99": 71.29599899053574 }, "combine": { - "p50": 153.3759981393814, - "p90": 157.0879966020584, - "p95": 158.33599865436554, - "p99": 163.5199934244156 + "p50": 73.53600114583969, + "p90": 79.8719972372055, + "p95": 81.37600123882294, + "p99": 88.57599645853043 }, "roundtrip": { - "p50": 374.87998604774475, - "p90": 379.61599230766296, - "p95": 385.72800159454346, - "p99": 410.2720022201538 + "p50": 1551.6159534454346, + "p90": 1555.999994277954, + "p95": 1557.6319694519043, + "p99": 1560.6399774551392 }, "isolatedSum": { - "p50": 375.328004360199, - "p90": 382.1759968996048, - "p95": 386.1760050058365, - "p99": 409.63199734687805 + "p50": 134.3040019273758, + "p90": 146.17599546909332, + "p95": 149.28000420331955, + "p99": 159.87199544906616 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 6, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 358.271986246109, - "p90": 362.43200302124023, - "p95": 364.8639917373657, - "p99": 389.44000005722046 + "p50": 61.59999966621399, + "p90": 67.19999760389328, + "p95": 69.18399780988693, + "p99": 72.09599763154984 }, "combine": { - "p50": 265.4399871826172, - "p90": 270.6559896469116, - "p95": 273.8560140132904, - "p99": 306.68801069259644 + "p50": 73.85600358247757, + "p90": 80.57600259780884, + "p95": 82.59200304746628, + "p99": 86.7839977145195 }, "roundtrip": { - "p50": 616.159975528717, - "p90": 622.8799819946289, - "p95": 628.063976764679, - "p99": 656.4800143241882 + "p50": 1556.2880039215088, + "p90": 1561.2800121307373, + "p95": 1564.3199682235718, + "p99": 1581.503987312317 }, "isolatedSum": { - "p50": 623.7119734287262, - "p90": 633.0879926681519, - "p95": 638.7200057506561, - "p99": 696.1280107498169 + "p50": 135.45600324869156, + "p90": 147.77600020170212, + "p95": 151.7760008573532, + "p99": 158.87999534606934 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 4, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 629.1840076446533, - "p90": 634.1760158538818, - "p95": 637.2799873352051, - "p99": 658.3679914474487 + "p50": 64.19199705123901, + "p90": 70.11199742555618, + "p95": 72.25599884986877, + "p99": 75.83999633789062 }, "combine": { - "p50": 454.912006855011, - "p90": 460.1280093193054, - "p95": 465.2479887008667, - "p99": 487.61600255966187 + "p50": 82.24000036716461, + "p90": 90.04800021648407, + "p95": 91.93599969148636, + "p99": 96.57599776983261 }, "roundtrip": { - "p50": 1072.5760459899902, - "p90": 1080.7360410690308, - "p95": 1090.3040170669556, - "p99": 1124.351978302002 + "p50": 1561.7599487304688, + "p90": 1566.4960145950317, + "p95": 1568.2239532470703, + "p99": 1570.7520246505737 }, "isolatedSum": { - "p50": 1084.0960144996643, - "p90": 1094.3040251731873, - "p95": 1102.5279760360718, - "p99": 1145.9839940071106 + "p50": 146.43199741840363, + "p90": 160.15999764204025, + "p95": 164.19199854135513, + "p99": 172.41599410772324 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 6, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 1168.992042541504, - "p90": 1176.31995677948, - "p95": 1185.5360269546509, - "p99": 1203.6160230636597 + "p50": 68.25599819421768, + "p90": 73.82400333881378, + "p95": 75.71200281381607, + "p99": 78.87999713420868 }, "combine": { - "p50": 810.2719783782959, - "p90": 818.943977355957, - "p95": 826.1759877204895, - "p99": 878.6560297012329 + "p50": 89.79199826717377, + "p90": 97.59999811649323, + "p95": 100.54399818181992, + "p99": 109.56799983978271 }, "roundtrip": { - "p50": 1966.6880369186401, - "p90": 1979.6799421310425, - "p95": 1991.487979888916, - "p99": 2013.6001110076904 + "p50": 1575.32799243927, + "p90": 1579.8399448394775, + "p95": 1582.3999643325806, + "p99": 1598.6239910125732 }, "isolatedSum": { - "p50": 1979.2640209197998, - "p90": 1995.263934135437, - "p95": 2011.7120146751404, - "p99": 2082.2720527648926 + "p50": 158.04799646139145, + "p90": 171.424001455307, + "p95": 176.256000995636, + "p99": 188.4479969739914 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 4, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 2255.136013031006, - "p90": 2265.471935272217, - "p95": 2275.2959728240967, - "p99": 2326.7838954925537 + "p50": 92.47999638319016, + "p90": 96.99200093746185, + "p95": 98.2080027461052, + "p99": 101.50399804115295 }, "combine": { - "p50": 1510.5600357055664, - "p90": 1526.144027709961, - "p95": 1534.656047821045, - "p99": 1569.7920322418213 + "p50": 111.42399907112122, + "p90": 114.97599631547928, + "p95": 115.99999666213989, + "p99": 118.78400295972824 }, "roundtrip": { - "p50": 3753.2479763031006, - "p90": 3776.5119075775146, - "p95": 3788.383960723877, - "p99": 3816.6720867156982 + "p50": 1624.7680187225342, + "p90": 1630.3999423980713, + "p95": 1632.5119733810425, + "p99": 1636.1600160598755 }, "isolatedSum": { - "p50": 3765.6960487365723, - "p90": 3791.6159629821777, - "p95": 3809.9520206451416, - "p99": 3896.575927734375 + "p50": 203.90399545431137, + "p90": 211.96799725294113, + "p95": 214.2079994082451, + "p99": 220.2880010008812 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 6, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 152.12799608707428, + "p90": 168.35199296474457, + "p95": 173.2800006866455, + "p99": 187.71199882030487 + }, + "combine": { + "p50": 191.8720006942749, + "p90": 202.59200036525726, + "p95": 206.56000077724457, + "p99": 214.49600160121918 + }, + "roundtrip": { + "p50": 1760.4479789733887, + "p90": 1772.4159955978394, + "p95": 1775.264024734497, + "p99": 1783.4559679031372 + }, + "isolatedSum": { + "p50": 343.9999967813492, + "p90": 370.94399333000183, + "p95": 379.8400014638901, + "p99": 402.20800042152405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 252.83199548721313, + "p90": 273.9520072937012, + "p95": 282.6240062713623, + "p99": 301.85601115226746 + }, + "combine": { + "p50": 332.63999223709106, + "p90": 359.20000076293945, + "p95": 367.2960102558136, + "p99": 384.768009185791 + }, + "roundtrip": { + "p50": 2012.7360820770264, + "p90": 2045.7279682159424, + "p95": 2053.7281036376953, + "p99": 2084.736108779907 + }, + "isolatedSum": { + "p50": 585.4719877243042, + "p90": 633.1520080566406, + "p95": 649.9200165271759, + "p99": 686.6240203380585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -29795,28 +30265,28 @@ ] }, { - "id": "cx-c8d1506e", - "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_eee29686", - "comparisonKey": "efab2d3670b24be2", + "id": "cx-ef83f327", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b8af531e", + "comparisonKey": "71503b9e265e42a9", "schemaVersion": 3, - "generatedAt": "2026-06-26T17:42:54.702578+00:00", + "generatedAt": "2026-06-29T13:55:19.539361+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm)", + "label": "GB300 EP8 · deepep · fp8 LL", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -29828,259 +30298,334 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254479346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346", - "createdAt": "2026-06-26T17:42:54.702578+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1, + "globalTokens": 8, "dispatch": { - "p50": 83.45600217580795, - "p90": 86.14400029182434, - "p95": 87.2960016131401, - "p99": 102.08000242710114 + "p50": 58.111999183893204, + "p90": 71.84000313282013, + "p95": 78.015998005867, + "p99": 89.72799777984619 }, "combine": { - "p50": 108.38399827480316, - "p90": 110.75200140476227, - "p95": 111.61600053310394, - "p99": 114.9120032787323 + "p50": 75.96799731254578, + "p90": 83.03999900817871, + "p95": 86.04799956083298, + "p99": 90.84799885749817 }, "roundtrip": { - "p50": 218.33600103855133, - "p90": 221.6320037841797, - "p95": 222.84799814224243, - "p99": 235.23199558258057 + "p50": 1549.6000051498413, + "p90": 1556.5439462661743, + "p95": 1560.703992843628, + "p99": 1568.0639743804932 }, "isolatedSum": { - "p50": 191.84000045061111, - "p90": 196.8960016965866, - "p95": 198.91200214624405, - "p99": 216.99200570583344 + "p50": 134.07999649643898, + "p90": 154.88000214099884, + "p95": 164.06399756669998, + "p99": 180.57599663734436 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 2, + "globalTokens": 16, "dispatch": { - "p50": 116.70400202274323, - "p90": 119.64800208806992, - "p95": 121.15199863910675, - "p99": 135.3600025177002 + "p50": 60.447998344898224, + "p90": 66.3359984755516, + "p95": 68.2239979505539, + "p99": 75.48800110816956 }, "combine": { - "p50": 155.29599785804749, - "p90": 167.4560010433197, - "p95": 176.60799622535706, - "p99": 184.1599941253662 + "p50": 77.2479996085167, + "p90": 85.56800335645676, + "p95": 87.71199733018875, + "p99": 94.27200257778168 }, "roundtrip": { - "p50": 324.47999715805054, - "p90": 328.19199562072754, - "p95": 330.04799485206604, - "p99": 345.40799260139465 + "p50": 1554.1759729385376, + "p90": 1560.6720447540283, + "p95": 1563.7120008468628, + "p99": 1572.5120306015015 }, "isolatedSum": { - "p50": 271.9999998807907, - "p90": 287.1040031313896, - "p95": 297.7599948644638, - "p99": 319.5199966430664 + "p50": 137.69599795341492, + "p90": 151.90400183200836, + "p95": 155.93599528074265, + "p99": 169.76000368595123 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 4, + "globalTokens": 32, "dispatch": { - "p50": 177.7919977903366, - "p90": 182.27200210094452, - "p95": 183.9040070772171, - "p99": 191.103994846344 + "p50": 63.77600133419037, + "p90": 78.84799689054489, + "p95": 94.91200000047684, + "p99": 107.93600231409073 }, "combine": { - "p50": 267.520010471344, - "p90": 270.81599831581116, - "p95": 272.0640003681183, - "p99": 275.4879891872406 + "p50": 78.65600287914276, + "p90": 87.90399879217148, + "p95": 90.84799885749817, + "p99": 95.32800316810608 }, "roundtrip": { - "p50": 550.8480072021484, - "p90": 556.9599866867065, - "p95": 560.2560043334961, - "p99": 578.3360004425049 + "p50": 1556.0640096664429, + "p90": 1562.399983406067, + "p95": 1564.6400451660156, + "p99": 1574.1440057754517 }, "isolatedSum": { - "p50": 445.3120082616806, - "p90": 453.0880004167557, - "p95": 455.9680074453354, - "p99": 466.5919840335846 + "p50": 142.43200421333313, + "p90": 166.75199568271637, + "p95": 185.759998857975, + "p99": 203.2640054821968 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 8, + "globalTokens": 64, "dispatch": { - "p50": 298.0160117149353, - "p90": 302.4959862232208, - "p95": 304.4799864292145, - "p99": 319.07200813293457 + "p50": 67.77600198984146, + "p90": 79.83999699354172, + "p95": 87.99999952316284, + "p99": 112.86400258541107 }, "combine": { - "p50": 452.1920084953308, - "p90": 456.6720128059387, - "p95": 458.624005317688, - "p99": 467.9360091686249 + "p50": 83.45600217580795, + "p90": 93.08800101280212, + "p95": 95.87199985980988, + "p99": 100.80000013113022 }, "roundtrip": { - "p50": 976.5759706497192, - "p90": 983.8719964027405, - "p95": 991.5199875831604, - "p99": 1023.3279466629028 + "p50": 1566.1439895629883, + "p90": 1593.6000347137451, + "p95": 1602.560043334961, + "p99": 1620.2880144119263 }, "isolatedSum": { - "p50": 750.2080202102661, - "p90": 759.1679990291595, - "p95": 763.1039917469025, - "p99": 787.0080173015594 + "p50": 151.2320041656494, + "p90": 172.92799800634384, + "p95": 183.87199938297272, + "p99": 213.6640027165413 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 16, + "globalTokens": 128, "dispatch": { - "p50": 541.4720177650452, - "p90": 546.7519760131836, - "p95": 549.4080185890198, - "p99": 557.7920079231262 + "p50": 69.08799707889557, + "p90": 75.26399940252304, + "p95": 77.98399776220322, + "p99": 290.8799946308136 }, "combine": { - "p50": 814.7199749946594, - "p90": 820.8320140838623, - "p95": 824.0640163421631, - "p99": 847.2959995269775 + "p50": 86.11200004816055, + "p90": 92.6079973578453, + "p95": 95.77599912881851, + "p99": 100.92800110578537 }, "roundtrip": { - "p50": 1818.0160522460938, - "p90": 1827.712059020996, - "p95": 1832.0000171661377, - "p99": 1889.5679712295532 + "p50": 1577.9839754104614, + "p90": 1589.311957359314, + "p95": 1604.383945465088, + "p99": 1625.3440380096436 }, "isolatedSum": { - "p50": 1356.1919927597046, - "p90": 1367.583990097046, - "p95": 1373.4720349311829, - "p99": 1405.0880074501038 + "p50": 155.19999712705612, + "p90": 167.87199676036835, + "p95": 173.75999689102173, + "p99": 391.80799573659897 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 32, + "globalTokens": 256, "dispatch": { - "p50": 1019.6160078048706, - "p90": 1027.9040336608887, - "p95": 1031.391978263855, - "p99": 1045.2799797058105 + "p50": 92.54399687051773, + "p90": 98.14400225877762, + "p95": 101.08800232410431, + "p99": 138.11199367046356 }, "combine": { - "p50": 1529.4400453567505, - "p90": 1537.2480154037476, - "p95": 1540.8639907836914, - "p99": 1614.6240234375 + "p50": 111.93600296974182, + "p90": 118.49600076675415, + "p95": 121.2799996137619, + "p99": 125.37600100040436 }, "roundtrip": { - "p50": 3477.3120880126953, - "p90": 3490.272045135498, - "p95": 3495.3598976135254, - "p99": 3531.3920974731445 + "p50": 1626.3359785079956, + "p90": 1636.0960006713867, + "p95": 1640.5760049819946, + "p99": 1658.400058746338 }, "isolatedSum": { - "p50": 2549.056053161621, - "p90": 2565.1520490646362, - "p95": 2572.2559690475464, - "p99": 2659.9040031433105 + "p50": 204.47999984025955, + "p90": 216.64000302553177, + "p95": 222.3680019378662, + "p99": 263.4879946708679 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 151.19999647140503, + "p90": 165.75999557971954, + "p95": 170.27199268341064, + "p99": 181.0240000486374 + }, + "combine": { + "p50": 191.26400351524353, + "p90": 201.53599977493286, + "p95": 205.37599921226501, + "p99": 212.8320038318634 + }, + "roundtrip": { + "p50": 1757.9200267791748, + "p90": 1771.2639570236206, + "p95": 1776.0319709777832, + "p99": 1783.2000255584717 + }, + "isolatedSum": { + "p50": 342.46399998664856, + "p90": 367.2959953546524, + "p95": 375.64799189567566, + "p99": 393.8560038805008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 257.9199969768524, + "p90": 279.9359858036041, + "p95": 285.21600365638733, + "p99": 299.80799555778503 + }, + "combine": { + "p50": 336.2880051136017, + "p90": 367.64800548553467, + "p95": 376.5760064125061, + "p99": 390.04799723625183 + }, + "roundtrip": { + "p50": 2014.3039226531982, + "p90": 2045.85599899292, + "p95": 2056.9920539855957, + "p99": 2080.3520679473877 + }, + "isolatedSum": { + "p50": 594.2080020904541, + "p90": 647.5839912891388, + "p95": 661.7920100688934, + "p99": 689.8559927940369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30088,107 +30633,108 @@ ] }, { - "id": "cx-9971d342", - "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "b300_84b10b26", - "comparisonKey": "1c850249e23e1e8c", + "id": "cx-7e3a1c52", + "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "c1d0b67251736b2c", "schemaVersion": 3, - "generatedAt": "2026-06-26T18:09:25.013454+00:00", + "generatedAt": "2026-06-29T14:04:35.790815+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", "phase": "prefill", "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", + "resourceMode": "tuned", + "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 (norm) [cl]", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16", + "model": "Qwen3.5", "shape": { - "hidden": 7168, + "hidden": 4096, "topk": 8, - "experts": 256, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1824, - "configuredUnits": 27, - "deviceUnits": 148, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", "fixedKernel": false, - "paretoEligible": true + "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28254499301", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301", - "createdAt": "2026-06-26T18:09:25.013454+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 77.02399790287018, - "p90": 79.48800176382065, - "p95": 80.89599758386612, - "p99": 85.28000116348267 + "p50": 115.93600362539291, + "p90": 125.56800246238708, + "p95": 129.31199371814728, + "p99": 139.42399621009827 }, "combine": { - "p50": 108.5439994931221, - "p90": 111.29599809646606, - "p95": 112.35199868679047, - "p99": 124.41600114107132 + "p50": 106.9440022110939, + "p90": 111.93600296974182, + "p95": 117.11999773979187, + "p99": 121.31199985742569 }, "roundtrip": { - "p50": 211.74399554729462, - "p90": 214.4320011138916, - "p95": 216.0000056028366, - "p99": 233.15200209617615 + "p50": 193.37600469589233, + "p90": 202.55999267101288, + "p95": 205.05599677562714, + "p99": 213.47199380397797 }, "isolatedSum": { - "p50": 185.56799739599228, - "p90": 190.7839998602867, - "p95": 193.24799627065659, - "p99": 209.69600230455399 + "p50": 222.88000583648682, + "p90": 237.5040054321289, + "p95": 246.43199145793915, + "p99": 260.73599606752396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30197,35 +30743,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 110.1439967751503, - "p90": 113.53600025177002, - "p95": 115.90400338172913, - "p99": 132.6719969511032 + "p50": 145.82400023937225, + "p90": 155.2640050649643, + "p95": 158.55999290943146, + "p99": 168.2240068912506 }, "combine": { - "p50": 153.3759981393814, - "p90": 157.60000050067902, - "p95": 159.32799875736237, - "p99": 173.69599640369415 + "p50": 143.8080072402954, + "p90": 148.51200580596924, + "p95": 154.11199629306793, + "p99": 158.01599621772766 }, "roundtrip": { - "p50": 318.30400228500366, - "p90": 322.52800464630127, - "p95": 325.408011674881, - "p99": 346.49598598480225 + "p50": 260.5440020561218, + "p90": 269.1200077533722, + "p95": 272.352010011673, + "p99": 280.5120050907135 }, "isolatedSum": { - "p50": 263.5199949145317, - "p90": 271.13600075244904, - "p95": 275.2320021390915, - "p99": 306.36799335479736 + "p50": 289.63200747966766, + "p90": 303.77601087093353, + "p95": 312.6719892024994, + "p99": 326.24000310897827 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30234,35 +30780,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 171.26399278640747, - "p90": 176.15999281406403, - "p95": 178.6240041255951, - "p99": 194.815993309021 + "p50": 198.68800044059753, + "p90": 217.0879989862442, + "p95": 230.335995554924, + "p99": 242.68800020217896 }, "combine": { - "p50": 268.2879865169525, - "p90": 273.0560004711151, - "p95": 275.64799785614014, - "p99": 283.58399868011475 + "p50": 214.75200355052948, + "p90": 228.5120040178299, + "p95": 233.21600258350372, + "p99": 253.6959946155548 }, "roundtrip": { - "p50": 543.7120199203491, - "p90": 550.6880283355713, - "p95": 554.1120171546936, - "p99": 576.0639905929565 + "p50": 381.632000207901, + "p90": 396.4479863643646, + "p95": 404.4159948825836, + "p99": 415.3600037097931 }, "isolatedSum": { - "p50": 439.55197930336, - "p90": 449.21599328517914, - "p95": 454.27200198173523, - "p99": 478.39999198913574 + "p50": 413.440003991127, + "p90": 445.6000030040741, + "p95": 463.55199813842773, + "p99": 496.38399481773376 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30271,35 +30817,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 288.92800211906433, - "p90": 294.0160036087036, - "p95": 297.791987657547, - "p99": 315.3280019760132 + "p50": 296.79998755455017, + "p90": 312.9599988460541, + "p95": 322.7519989013672, + "p99": 346.8480110168457 }, "combine": { - "p50": 452.09598541259766, - "p90": 457.37600326538086, - "p95": 461.7280066013336, - "p99": 471.74400091171265 + "p50": 379.96798753738403, + "p90": 390.0800049304962, + "p95": 391.64799451828003, + "p99": 400.57599544525146 }, "roundtrip": { - "p50": 967.1040177345276, - "p90": 974.62397813797, - "p95": 977.5360226631165, - "p99": 995.6160187721252 + "p50": 616.5760159492493, + "p90": 625.5360245704651, + "p95": 629.9840211868286, + "p99": 637.8239989280701 }, "isolatedSum": { - "p50": 741.023987531662, - "p90": 751.3920068740845, - "p95": 759.5199942588806, - "p99": 787.0720028877258 + "p50": 676.7679750919342, + "p90": 703.0400037765503, + "p95": 714.3999934196472, + "p99": 747.4240064620972 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30308,35 +30854,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 523.3920216560364, - "p90": 529.2800068855286, - "p95": 533.3439707756042, - "p99": 550.1120090484619 + "p50": 505.7600140571594, + "p90": 518.559992313385, + "p95": 523.6160159111023, + "p99": 532.0000052452087 }, "combine": { - "p50": 816.32000207901, - "p90": 824.9599933624268, - "p95": 831.1359882354736, - "p99": 855.135977268219 + "p50": 800.5120158195496, + "p90": 810.5279803276062, + "p95": 811.743974685669, + "p99": 820.4159736633301 }, "roundtrip": { - "p50": 1800.096035003662, - "p90": 1811.743974685669, - "p95": 1825.7919549942017, - "p99": 1866.8160438537598 + "p50": 1271.83997631073, + "p90": 1283.2319736480713, + "p95": 1286.5279912948608, + "p99": 1292.9279804229736 }, "isolatedSum": { - "p50": 1339.7120237350464, - "p90": 1354.2400002479553, - "p95": 1364.4799590110779, - "p99": 1405.247986316681 + "p50": 1306.272029876709, + "p90": 1329.0879726409912, + "p95": 1335.3599905967712, + "p99": 1352.4159789085388 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30345,35 +30891,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 984.9280118942261, - "p90": 992.2239780426025, - "p95": 996.5760111808777, - "p99": 1026.9759893417358 + "p50": 932.4480295181274, + "p90": 948.2880234718323, + "p95": 953.8879990577698, + "p99": 972.2239971160889 }, "combine": { - "p50": 1529.312014579773, - "p90": 1539.1039848327637, - "p95": 1548.0320453643799, - "p99": 1564.3839836120605 + "p50": 1506.4959526062012, + "p90": 1514.3359899520874, + "p95": 1520.3839540481567, + "p99": 1524.8960256576538 }, "roundtrip": { - "p50": 3440.864086151123, - "p90": 3457.6640129089355, - "p95": 3468.832015991211, - "p99": 3514.2080783843994 + "p50": 2408.128023147583, + "p90": 2428.4160137176514, + "p95": 2437.8559589385986, + "p99": 2452.7359008789062 }, "isolatedSum": { - "p50": 2514.240026473999, - "p90": 2531.327962875366, - "p95": 2544.6080565452576, - "p99": 2591.3599729537964 + "p50": 2438.9439821243286, + "p90": 2462.6240134239197, + "p95": 2474.2719531059265, + "p99": 2497.1200227737427 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30381,47 +30927,48 @@ ] }, { - "id": "cx-3feaa006", - "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_49e66a7b", - "comparisonKey": "5b68240330e760fc", + "id": "cx-0457a436", + "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "e008e386a7e2bc41", "schemaVersion": 3, - "generatedAt": "2026-06-27T09:47:42.062998+00:00", + "generatedAt": "2026-06-29T14:09:04.614868+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep · fp8 [cl]", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", "shape": { - "hidden": 7168, + "hidden": 5120, "topk": 8, - "experts": 256, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -30429,59 +30976,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28285593016", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285593016", - "createdAt": "2026-06-27T09:47:42.062998+00:00", - "sha": "149586650dbed5b7579537347e9489d5b41543c1" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 76.9599974155426, - "p90": 79.64800298213959, - "p95": 80.92799782752991, - "p99": 92.3520028591156 + "p50": 117.34399944543839, + "p90": 126.5919953584671, + "p95": 130.3360015153885, + "p99": 138.62399756908417 }, "combine": { - "p50": 108.51199924945831, - "p90": 111.42399907112122, - "p95": 112.41599917411804, - "p99": 122.40000069141388 + "p50": 113.53600025177002, + "p90": 120.7360029220581, + "p95": 122.81599640846252, + "p99": 132.83200562000275 }, "roundtrip": { - "p50": 210.78400313854218, - "p90": 213.85599672794342, - "p95": 216.5759950876236, - "p99": 232.9919934272766 + "p50": 203.45599949359894, + "p90": 211.35999262332916, + "p95": 214.27200734615326, + "p99": 223.1680005788803 }, "isolatedSum": { - "p50": 185.47199666500092, - "p90": 191.0720020532608, - "p95": 193.34399700164795, - "p99": 214.75200355052948 + "p50": 230.8799996972084, + "p90": 247.3279982805252, + "p95": 253.151997923851, + "p99": 271.4560031890869 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30490,35 +31037,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 109.95200276374817, - "p90": 113.43999952077866, - "p95": 115.26399850845337, - "p99": 126.62400305271149 + "p50": 148.3200043439865, + "p90": 169.40799355506897, + "p95": 187.00799345970154, + "p99": 236.25600337982178 }, "combine": { - "p50": 154.91199493408203, - "p90": 159.04000401496887, - "p95": 161.18399798870087, - "p99": 169.88800466060638 + "p50": 150.68799257278442, + "p90": 172.89599776268005, + "p95": 190.0479942560196, + "p99": 229.69600558280945 }, "roundtrip": { - "p50": 318.30400228500366, - "p90": 323.3279883861542, - "p95": 325.21599531173706, - "p99": 336.70398592948914 + "p50": 270.687997341156, + "p90": 281.3439965248108, + "p95": 305.34398555755615, + "p99": 322.2079873085022 }, "isolatedSum": { - "p50": 264.8639976978302, - "p90": 272.4800035357475, - "p95": 276.44799649715424, - "p99": 296.51200771331787 + "p50": 299.00799691677094, + "p90": 342.303991317749, + "p95": 377.05598771572113, + "p99": 465.9520089626312 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30527,35 +31074,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 172.09599912166595, - "p90": 176.38400197029114, - "p95": 178.56000363826752, - "p99": 194.07999515533447 + "p50": 204.54399287700653, + "p90": 227.32800245285034, + "p95": 239.1359955072403, + "p99": 334.27199721336365 }, "combine": { - "p50": 265.79201221466064, - "p90": 270.112007856369, - "p95": 272.0319926738739, - "p99": 286.655992269516 + "p50": 241.18399620056152, + "p90": 248.09600412845612, + "p95": 256.19199872016907, + "p99": 278.78400683403015 }, "roundtrip": { - "p50": 542.7200198173523, - "p90": 548.8640069961548, - "p95": 551.967978477478, - "p99": 560.8000159263611 + "p50": 399.4559943675995, + "p90": 425.79200863838196, + "p95": 449.3440091609955, + "p99": 484.3200147151947 }, "isolatedSum": { - "p50": 437.8880113363266, - "p90": 446.49600982666016, - "p95": 450.5919963121414, - "p99": 480.73598742485046 + "p50": 445.72798907756805, + "p90": 475.42400658130646, + "p95": 495.32799422740936, + "p99": 613.0560040473938 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30564,35 +31111,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 286.8160009384155, - "p90": 291.9040024280548, - "p95": 294.65600848197937, - "p99": 305.184006690979 + "p50": 317.7280128002167, + "p90": 331.4560055732727, + "p95": 345.7280099391937, + "p99": 370.2720105648041 }, "combine": { - "p50": 452.2559940814972, - "p90": 458.0160081386566, - "p95": 460.7360064983368, - "p99": 470.5919921398163 + "p50": 465.88799357414246, + "p90": 487.5839948654175, + "p95": 502.560019493103, + "p99": 514.9120092391968 }, "roundtrip": { - "p50": 964.1919732093811, - "p90": 972.320020198822, - "p95": 979.5200228691101, - "p99": 989.8560047149658 + "p50": 714.3039703369141, + "p90": 724.2559790611267, + "p95": 728.16002368927, + "p99": 738.0800247192383 }, "isolatedSum": { - "p50": 739.0719950199127, - "p90": 749.9200105667114, - "p95": 755.3920149803162, - "p99": 775.7759988307953 + "p50": 783.6160063743591, + "p90": 819.0400004386902, + "p95": 848.2880294322968, + "p99": 885.1840198040009 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30601,35 +31148,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 523.1040120124817, - "p90": 529.7920107841492, - "p95": 532.2239995002747, - "p99": 541.5999889373779 + "p50": 551.967978477478, + "p90": 561.3440275192261, + "p95": 564.3200278282166, + "p99": 576.1920213699341 }, "combine": { - "p50": 814.2399787902832, - "p90": 822.5280046463013, - "p95": 828.8639783859253, - "p99": 839.9360179901123 + "p50": 835.8719944953918, + "p90": 845.9839820861816, + "p95": 848.3840227127075, + "p99": 855.8719754219055 }, "roundtrip": { - "p50": 1798.4319925308228, - "p90": 1811.8720054626465, - "p95": 1817.9839849472046, - "p99": 1887.3599767684937 + "p50": 1357.0560216903687, + "p90": 1370.3999519348145, + "p95": 1381.8880319595337, + "p99": 1400.7680416107178 }, "isolatedSum": { - "p50": 1337.343990802765, - "p90": 1352.3200154304504, - "p95": 1361.0879778862, - "p99": 1381.5360069274902 + "p50": 1387.8399729728699, + "p90": 1407.3280096054077, + "p95": 1412.704050540924, + "p99": 1432.0639967918396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30638,35 +31185,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 989.2160296440125, - "p90": 998.3360171318054, - "p95": 1003.7120580673218, - "p99": 1014.8160457611084 + "p50": 1023.0720043182373, + "p90": 1034.5280170440674, + "p95": 1038.4960174560547, + "p99": 1047.1359491348267 }, "combine": { - "p50": 1527.8079509735107, - "p90": 1537.376046180725, - "p95": 1542.8800582885742, - "p99": 1555.7119846343994 + "p50": 1569.85604763031, + "p90": 1575.7440328598022, + "p95": 1580.8639526367188, + "p99": 1587.2960090637207 }, "roundtrip": { - "p50": 3446.592092514038, - "p90": 3460.416078567505, - "p95": 3467.77606010437, - "p99": 3511.4240646362305 + "p50": 2552.4160861968994, + "p90": 2566.272020339966, + "p95": 2571.7759132385254, + "p99": 2589.53595161438 }, "isolatedSum": { - "p50": 2517.023980617523, - "p90": 2535.7120633125305, - "p95": 2546.592116355896, - "p99": 2570.528030395508 + "p50": 2592.9280519485474, + "p90": 2610.2720499038696, + "p95": 2619.3599700927734, + "p99": 2634.4319581985474 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -30674,31 +31221,31 @@ ] }, { - "id": "cx-7cddf11f", - "identity": "b300|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_9bdf2cf9", - "comparisonKey": "6cb3f1841938f6d9", + "id": "cx-529dcc68", + "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "0958f6765b1be546", "schemaVersion": 3, - "generatedAt": "2026-06-28T02:32:30.085872+00:00", + "generatedAt": "2026-06-29T14:13:22.285501+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_09", - "sku": "b300", - "backend": "deepep-hybrid", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep-hybrid · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16", + "model": "MiniMax-M3", "shape": { - "hidden": 7168, + "hidden": 6144, "topk": 8, "experts": 256, "routing": "uniform", @@ -30707,333 +31254,260 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "hybrid-e0a5b1d", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28308873989", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308873989", - "createdAt": "2026-06-28T02:32:30.085872+00:00", - "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 119.07199770212173, - "p90": 122.17599898576736, - "p95": 123.36000055074692, - "p99": 126.94400548934937 - }, - "combine": { - "p50": 36.51199862360954, - "p90": 38.015998899936676, - "p95": 38.816001266241074, - "p99": 39.872001856565475 - }, - "roundtrip": { - "p50": 151.48800611495972, - "p90": 155.8080017566681, - "p95": 157.98400342464447, - "p99": 166.52800142765045 - }, - "isolatedSum": { - "p50": 155.58399632573128, - "p90": 160.19199788570404, - "p95": 162.176001816988, - "p99": 166.81600734591484 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 122.20799922943115, - "p90": 125.34399330615997, - "p95": 126.75200402736664, - "p99": 132.54399597644806 + "p50": 122.11199849843979, + "p90": 133.760005235672, + "p95": 147.10399508476257, + "p99": 180.83199858665466 }, "combine": { - "p50": 37.63199970126152, - "p90": 39.5519994199276, - "p95": 39.84000161290169, - "p99": 49.984000623226166 + "p50": 119.90399658679962, + "p90": 128.92800569534302, + "p95": 132.79999792575836, + "p99": 160.76800227165222 }, "roundtrip": { - "p50": 154.55999970436096, - "p90": 157.72800147533417, - "p95": 158.65600109100342, - "p99": 166.143998503685 + "p50": 212.64000236988068, + "p90": 223.07200729846954, + "p95": 232.54400491714478, + "p99": 283.03998708724976 }, "isolatedSum": { - "p50": 159.83999893069267, - "p90": 164.89599272608757, - "p95": 166.59200564026833, - "p99": 182.52799659967422 + "p50": 242.0159950852394, + "p90": 262.688010931015, + "p95": 279.90399301052094, + "p99": 341.6000008583069 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 125.02400577068329, - "p90": 128.76799702644348, - "p95": 130.11200726032257, - "p99": 150.39999783039093 + "p50": 156.38400614261627, + "p90": 166.59200191497803, + "p95": 170.33599317073822, + "p99": 182.72000551223755 }, "combine": { - "p50": 39.99999910593033, - "p90": 41.88799858093262, - "p95": 42.65600070357323, - "p99": 50.97600072622299 + "p50": 159.9999964237213, + "p90": 168.32000017166138, + "p95": 170.1440066099167, + "p99": 184.1920018196106 }, "roundtrip": { - "p50": 159.67999398708344, - "p90": 162.88000345230103, - "p95": 163.96799683570862, - "p99": 178.5919964313507 + "p50": 288.7679934501648, + "p90": 298.0160117149353, + "p95": 302.14399099349976, + "p99": 324.0320086479187 }, "isolatedSum": { - "p50": 165.02400487661362, - "p90": 170.6559956073761, - "p95": 172.7680079638958, - "p99": 201.37599855661392 + "p50": 316.3840025663376, + "p90": 334.9120020866394, + "p95": 340.4799997806549, + "p99": 366.91200733184814 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 130.49599528312683, - "p90": 133.7279975414276, - "p95": 135.04000008106232, - "p99": 140.1599943637848 + "p50": 221.69600427150726, + "p90": 230.81600666046143, + "p95": 234.65600609779358, + "p99": 257.9840123653412 }, "combine": { - "p50": 40.863998234272, - "p90": 42.367998510599136, - "p95": 43.327998369932175, - "p99": 46.30399867892265 + "p50": 257.4079930782318, + "p90": 265.53601026535034, + "p95": 267.5839960575104, + "p99": 280.09599447250366 }, "roundtrip": { - "p50": 167.4560010433197, - "p90": 170.78399658203125, - "p95": 172.19200730323792, - "p99": 184.51200425624847 + "p50": 426.30401253700256, + "p90": 436.0960125923157, + "p95": 439.7439956665039, + "p99": 474.8480021953583 }, "isolatedSum": { - "p50": 171.35999351739883, - "p90": 176.09599605202675, - "p95": 178.3679984509945, - "p99": 186.46399304270744 + "p50": 479.1039973497391, + "p90": 496.35201692581177, + "p95": 502.24000215530396, + "p99": 538.0800068378448 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 142.65599846839905, - "p90": 145.6959992647171, - "p95": 147.35999703407288, - "p99": 153.60000729560852 + "p50": 344.0319895744324, + "p90": 352.80001163482666, + "p95": 355.74400424957275, + "p99": 374.55999851226807 }, "combine": { - "p50": 44.79999840259552, - "p90": 46.720001846551895, - "p95": 47.42399975657463, - "p99": 48.54400083422661 + "p50": 472.4479913711548, + "p90": 478.36801409721375, + "p95": 481.59998655319214, + "p99": 518.9759731292725 }, "roundtrip": { - "p50": 185.47199666500092, - "p90": 188.960000872612, - "p95": 191.8720006942749, - "p99": 211.29600703716278 + "p50": 779.9040079116821, + "p90": 790.5279994010925, + "p95": 797.3759770393372, + "p99": 827.1039724349976 }, "isolatedSum": { - "p50": 187.45599687099457, - "p90": 192.416001111269, - "p95": 194.7839967906475, - "p99": 202.14400812983513 + "p50": 816.4799809455872, + "p90": 831.1680257320404, + "p95": 837.3439908027649, + "p99": 893.5359716415405 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 163.83999586105347, - "p90": 166.52800142765045, - "p95": 167.64800250530243, - "p99": 172.41600155830383 + "p50": 600.1920104026794, + "p90": 609.0559959411621, + "p95": 613.9839887619019, + "p99": 639.9359703063965 }, "combine": { - "p50": 44.544000178575516, - "p90": 46.33599892258644, - "p95": 46.751998364925385, - "p99": 54.336000233888626 + "p50": 850.2399921417236, + "p90": 860.0000143051147, + "p95": 861.5999817848206, + "p99": 882.0160031318665 }, "roundtrip": { - "p50": 207.7759951353073, - "p90": 211.13599836826324, - "p95": 213.02400529384613, - "p99": 223.4240025281906 + "p50": 1419.6159839630127, + "p90": 1429.5680522918701, + "p95": 1432.6080083847046, + "p99": 1440.832018852234 }, "isolatedSum": { - "p50": 208.38399603962898, - "p90": 212.8640003502369, - "p95": 214.4000008702278, - "p99": 226.75200179219246 + "p50": 1450.432002544403, + "p90": 1469.0560102462769, + "p95": 1475.5839705467224, + "p99": 1521.951973438263 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 210.9760046005249, - "p90": 214.65599536895752, - "p95": 216.12800657749176, - "p99": 231.455996632576 + "p50": 1100.000023841858, + "p90": 1110.0159883499146, + "p95": 1115.231990814209, + "p99": 1144.0320014953613 }, "combine": { - "p50": 48.51200059056282, - "p90": 50.27199909090996, - "p95": 50.84799975156784, - "p99": 52.41600051522255 + "p50": 1596.5759754180908, + "p90": 1604.9920320510864, + "p95": 1608.3199977874756, + "p99": 1615.5200004577637 }, "roundtrip": { - "p50": 253.76001000404358, - "p90": 258.0159902572632, - "p95": 260.09601354599, - "p99": 319.2639946937561 + "p50": 2659.872055053711, + "p90": 2672.640085220337, + "p95": 2677.2799491882324, + "p99": 2690.0479793548584 }, "isolatedSum": { - "p50": 259.4880051910877, - "p90": 264.9279944598675, - "p95": 266.9760063290596, - "p99": 283.87199714779854 + "p50": 2696.5759992599487, + "p90": 2715.008020401001, + "p95": 2723.5519886016846, + "p99": 2759.552001953125 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 209.6640020608902, - "p90": 212.79999613761902, - "p95": 214.33599293231964, - "p99": 221.69600427150726 - }, - "combine": { - "p50": 56.352000683546066, - "p90": 59.007998555898666, - "p95": 59.967998415231705, - "p99": 61.24800071120262 - }, - "roundtrip": { - "p50": 261.7279887199402, - "p90": 264.70398902893066, - "p95": 266.2079930305481, - "p99": 278.8800001144409 - }, - "isolatedSum": { - "p50": 266.01600274443626, - "p90": 271.8079946935177, - "p95": 274.30399134755135, - "p99": 282.9440049827099 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31041,28 +31515,28 @@ ] }, { - "id": "cx-4a0e300c", - "identity": "b300|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_9bdf2cf9", - "comparisonKey": "e35b7ffee4d4fef7", + "id": "cx-e8b502a1", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_74218200", + "comparisonKey": "2870a44c1f8d758b", "schemaVersion": 3, - "generatedAt": "2026-06-28T02:33:00.515887+00:00", + "generatedAt": "2026-06-29T13:58:42.564954+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", - "backend": "deepep-hybrid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · deepep-hybrid · bf16", + "label": "GB300 EP8 · deepep · bf16", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -31074,74 +31548,75 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "hybrid-e0a5b1d", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28308873989", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308873989", - "createdAt": "2026-06-28T02:33:00.515887+00:00", - "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 207.7759951353073, - "p90": 211.2639993429184, - "p95": 212.96000480651855, - "p99": 220.15999257564545 + "p50": 125.31200051307678, + "p90": 141.59999787807465, + "p95": 155.13600409030914, + "p99": 183.87199938297272 }, "combine": { - "p50": 57.88800120353699, - "p90": 59.23200026154518, - "p95": 59.99999865889549, - "p99": 67.84000247716904 + "p50": 126.5919953584671, + "p90": 144.28800344467163, + "p95": 156.00000321865082, + "p99": 190.91199338436127 }, "roundtrip": { - "p50": 260.44800877571106, - "p90": 263.61599564552307, - "p95": 266.36800169944763, - "p99": 274.7200131416321 + "p50": 223.7119972705841, + "p90": 243.20000410079956, + "p95": 256.51198625564575, + "p99": 284.35200452804565 }, "isolatedSum": { - "p50": 265.6639963388443, - "p90": 270.4959996044636, - "p95": 272.96000346541405, - "p99": 287.9999950528145 + "p50": 251.90399587154388, + "p90": 285.8880013227463, + "p95": 311.13600730895996, + "p99": 374.783992767334 }, "roundtripMeasured": true, "dispatchLogicalBytes": 77672448, "combineLogicalBytes": 77672448, "fanoutMean": 5.291015625, "recvTokensMax": 723, - "stragglerRank": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31150,35 +31625,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 216.44799411296844, - "p90": 220.60799598693848, - "p95": 222.33599424362183, - "p99": 248.73599410057068 + "p50": 161.82400286197662, + "p90": 170.97599804401398, + "p95": 174.52800273895264, + "p99": 207.16799795627594 }, "combine": { - "p50": 74.23999905586243, - "p90": 76.38400048017502, - "p95": 76.80000364780426, - "p99": 87.20000088214874 + "p50": 167.87199676036835, + "p90": 172.8000044822693, + "p95": 179.1040003299713, + "p99": 211.99999749660492 }, "roundtrip": { - "p50": 283.87200832366943, - "p90": 287.32800483703613, - "p95": 289.5039916038513, - "p99": 305.27999997138977 + "p50": 298.43199253082275, + "p90": 307.71198868751526, + "p95": 315.90399146080017, + "p99": 358.4960103034973 }, "isolatedSum": { - "p50": 290.6879931688309, - "p90": 296.9919964671135, - "p95": 299.1359978914261, - "p99": 335.9359949827194 + "p50": 329.69599962234497, + "p90": 343.77600252628326, + "p95": 353.63200306892395, + "p99": 419.16799545288086 }, "roundtripMeasured": true, "dispatchLogicalBytes": 155889664, "combineLogicalBytes": 155889664, "fanoutMean": 5.3095703125, "recvTokensMax": 1422, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31187,35 +31662,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 226.3679951429367, - "p90": 230.24000227451324, - "p95": 231.77599906921387, - "p99": 250.2399981021881 + "p50": 231.87200725078583, + "p90": 259.8719894886017, + "p95": 273.3120024204254, + "p99": 294.5599853992462 }, "combine": { - "p50": 107.64800012111664, - "p90": 109.79200154542923, - "p95": 110.62400043010712, - "p99": 118.75200271606445 + "p50": 282.9119861125946, + "p90": 296.7039942741394, + "p95": 315.90399146080017, + "p99": 331.36001229286194 }, "roundtrip": { - "p50": 329.47200536727905, - "p90": 333.6319923400879, - "p95": 335.6800079345703, - "p99": 362.2719943523407 + "p50": 458.75200629234314, + "p90": 481.7599952220917, + "p95": 489.6959960460663, + "p99": 510.43200492858887 }, "isolatedSum": { - "p50": 334.01599526405334, - "p90": 340.0320038199425, - "p95": 342.399999499321, - "p99": 368.99200081825256 + "p50": 514.7839933633804, + "p90": 556.5759837627411, + "p95": 589.2159938812256, + "p99": 625.9199976921082 }, "roundtripMeasured": true, "dispatchLogicalBytes": 312266752, "combineLogicalBytes": 312266752, "fanoutMean": 5.31787109375, "recvTokensMax": 2779, - "stragglerRank": 6, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31224,35 +31699,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 286.01598739624023, - "p90": 289.34401273727417, - "p95": 290.5600070953369, - "p99": 304.3519854545593 + "p50": 361.85601353645325, + "p90": 388.0319893360138, + "p95": 397.5040018558502, + "p99": 415.3920114040375 }, "combine": { - "p50": 185.15199422836304, - "p90": 187.8719925880432, - "p95": 188.54400515556335, - "p99": 190.8160001039505 + "p50": 489.1520142555237, + "p90": 513.696014881134, + "p95": 525.4719853401184, + "p99": 546.3359951972961 }, "roundtrip": { - "p50": 467.0400023460388, - "p90": 471.48799896240234, - "p95": 473.91998767852783, - "p99": 505.3759813308716 + "p50": 815.936028957367, + "p90": 848.6400246620178, + "p95": 862.2400164604187, + "p99": 877.9839873313904 }, "isolatedSum": { - "p50": 471.16798162460327, - "p90": 477.2160053253174, - "p95": 479.10401225090027, - "p99": 495.1679855585098 + "p50": 851.0080277919769, + "p90": 901.7280042171478, + "p95": 922.9759871959686, + "p99": 961.7280066013336 }, "roundtripMeasured": true, "dispatchLogicalBytes": 623443968, "combineLogicalBytes": 623443968, "fanoutMean": 5.30859375, "recvTokensMax": 5505, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31261,35 +31736,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 458.40001106262207, - "p90": 461.60000562667847, - "p95": 463.1040096282959, - "p99": 475.93599557876587 + "p50": 615.6479716300964, + "p90": 625.823974609375, + "p95": 631.0719847679138, + "p99": 670.9439754486084 }, "combine": { - "p50": 320.76799869537354, - "p90": 323.93598556518555, - "p95": 325.0240087509155, - "p99": 332.5760066509247 + "p50": 870.464026927948, + "p90": 877.7920007705688, + "p95": 882.8160166740417, + "p99": 902.4959802627563 }, "roundtrip": { - "p50": 776.095986366272, - "p90": 781.216025352478, - "p95": 785.9200239181519, - "p99": 819.5199966430664 + "p50": 1457.4400186538696, + "p90": 1474.176049232483, + "p95": 1483.7440252304077, + "p99": 1505.5359601974487 }, "isolatedSum": { - "p50": 779.1680097579956, - "p90": 785.535991191864, - "p95": 788.1280183792114, - "p99": 808.5120022296906 + "p50": 1486.1119985580444, + "p90": 1503.6159753799438, + "p95": 1513.8880014419556, + "p99": 1573.4399557113647 }, "roundtripMeasured": true, "dispatchLogicalBytes": 1243805696, "combineLogicalBytes": 1243805696, "fanoutMean": 5.29547119140625, "recvTokensMax": 10952, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31298,35 +31773,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 741.0560250282288, - "p90": 745.2800273895264, - "p95": 748.8639950752258, - "p99": 812.4160170555115 + "p50": 1129.7600269317627, + "p90": 1146.5599536895752, + "p95": 1158.9759588241577, + "p99": 1175.9040355682373 }, "combine": { - "p50": 593.1199789047241, - "p90": 595.3599810600281, - "p95": 596.1599946022034, - "p99": 601.5999913215637 + "p50": 1619.104027748108, + "p90": 1635.6159448623657, + "p95": 1645.18404006958, + "p99": 1662.5280380249023 }, "roundtrip": { - "p50": 1334.2399597167969, - "p90": 1338.528037071228, - "p95": 1340.000033378601, - "p99": 1418.4319972991943 + "p50": 2731.6160202026367, + "p90": 2748.863935470581, + "p95": 2757.9519748687744, + "p99": 2776.8640518188477 }, "isolatedSum": { - "p50": 1334.1760039329529, - "p90": 1340.6400084495544, - "p95": 1345.0239896774292, - "p99": 1414.0160083770752 + "p50": 2748.8640546798706, + "p90": 2782.175898551941, + "p95": 2804.159998893738, + "p99": 2838.4320735931396 }, "roundtripMeasured": true, "dispatchLogicalBytes": 2487009280, "combineLogicalBytes": 2487009280, "fanoutMean": 5.294189453125, "recvTokensMax": 21781, - "stragglerRank": 4, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31334,28 +31809,28 @@ ] }, { - "id": "cx-6136a9d3", - "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|fp8|none|0|tuned||ac583971f94b176", - "colorKey": "b300_5ec8473f", - "comparisonKey": "be2ec236ee21b030", + "id": "cx-ba4423d9", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "02cbe60d1402a41c", "schemaVersion": 3, - "generatedAt": "2026-06-28T05:40:56.109359+00:00", + "generatedAt": "2026-06-29T13:48:34.462312+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · bf16", + "label": "GB300 EP8 · deepep · bf16", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -31367,333 +31842,260 @@ "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", - "combineQuantMode": "fp8" + "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28312753674", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28312753674", - "createdAt": "2026-06-28T05:40:56.109359+00:00", - "sha": "85273c67789913421295080d1d06daacdc027a4a" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 54.59199845790863, - "p90": 56.60799890756607, - "p95": 57.472001761198044, - "p99": 65.66400080919266 - }, - "combine": { - "p50": 54.59199845790863, - "p90": 56.60799890756607, - "p95": 57.472001761198044, - "p99": 65.66400080919266 - }, - "roundtrip": { - "p50": 54.59199845790863, - "p90": 56.60799890756607, - "p95": 57.472001761198044, - "p99": 65.66400080919266 - }, - "isolatedSum": { - "p50": 109.18399691581726, - "p90": 113.21599781513214, - "p95": 114.94400352239609, - "p99": 131.32800161838531 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 55.36000058054924, - "p90": 57.440001517534256, - "p95": 58.9120015501976, - "p99": 68.15999746322632 - }, - "combine": { - "p50": 55.36000058054924, - "p90": 57.440001517534256, - "p95": 58.9120015501976, - "p99": 68.15999746322632 - }, - "roundtrip": { - "p50": 55.36000058054924, - "p90": 57.440001517534256, - "p95": 58.9120015501976, - "p99": 68.15999746322632 - }, - "isolatedSum": { - "p50": 110.72000116109848, - "p90": 114.88000303506851, - "p95": 117.8240031003952, - "p99": 136.31999492645264 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 57.18399956822395, - "p90": 59.74400043487549, - "p95": 60.736000537872314, - "p99": 74.23999905586243 + "p50": 123.77600371837616, + "p90": 134.36800241470337, + "p95": 155.2640050649643, + "p99": 180.63999712467194 }, "combine": { - "p50": 57.18399956822395, - "p90": 59.74400043487549, - "p95": 60.736000537872314, - "p99": 74.23999905586243 + "p50": 123.64800274372101, + "p90": 131.96800649166107, + "p95": 139.93600010871887, + "p99": 164.99200463294983 }, "roundtrip": { - "p50": 57.18399956822395, - "p90": 59.74400043487549, - "p95": 60.736000537872314, - "p99": 74.23999905586243 + "p50": 221.53599560260773, + "p90": 233.15200209617615, + "p95": 243.00800263881683, + "p99": 297.4399924278259 }, "isolatedSum": { - "p50": 114.3679991364479, - "p90": 119.48800086975098, - "p95": 121.47200107574463, - "p99": 148.47999811172485 + "p50": 247.42400646209717, + "p90": 266.33600890636444, + "p95": 295.20000517368317, + "p99": 345.63200175762177 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 57.50399827957153, - "p90": 60.06399914622307, - "p95": 61.85600161552429, - "p99": 68.7360018491745 + "p50": 160.47999262809753, + "p90": 168.16000640392303, + "p95": 170.6240028142929, + "p99": 176.09600722789764 }, "combine": { - "p50": 57.50399827957153, - "p90": 60.06399914622307, - "p95": 61.85600161552429, - "p99": 68.7360018491745 + "p50": 166.6879951953888, + "p90": 171.36000096797943, + "p95": 173.0239987373352, + "p99": 180.31999468803406 }, "roundtrip": { - "p50": 57.50399827957153, - "p90": 60.06399914622307, - "p95": 61.85600161552429, - "p99": 68.7360018491745 + "p50": 297.0240116119385, + "p90": 305.08801341056824, + "p95": 307.9040050506592, + "p99": 314.6879971027374 }, "isolatedSum": { - "p50": 115.00799655914307, - "p90": 120.12799829244614, - "p95": 123.71200323104858, - "p99": 137.472003698349 + "p50": 327.1679878234863, + "p90": 339.52000737190247, + "p95": 343.6480015516281, + "p99": 356.4160019159317 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 58.20799991488457, - "p90": 60.35200133919716, - "p95": 61.824001371860504, - "p99": 74.17599856853485 + "p50": 227.55199670791626, + "p90": 237.95199394226074, + "p95": 241.15200340747833, + "p99": 287.03999519348145 }, "combine": { - "p50": 58.20799991488457, - "p90": 60.35200133919716, - "p95": 61.824001371860504, - "p99": 74.17599856853485 + "p50": 284.41599011421204, + "p90": 292.1600043773651, + "p95": 294.20799016952515, + "p99": 301.85601115226746 }, "roundtrip": { - "p50": 58.20799991488457, - "p90": 60.35200133919716, - "p95": 61.824001371860504, - "p99": 74.17599856853485 + "p50": 461.8239998817444, + "p90": 471.2960124015808, + "p95": 474.8159945011139, + "p99": 501.5680193901062 }, "isolatedSum": { - "p50": 116.41599982976913, - "p90": 120.70400267839432, - "p95": 123.64800274372101, - "p99": 148.3519971370697 + "p50": 511.9679868221283, + "p90": 530.1119983196259, + "p95": 535.3599935770035, + "p99": 588.8960063457489 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 59.328000992536545, - "p90": 61.59999966621399, - "p95": 62.72000074386597, - "p99": 75.6160020828247 + "p50": 360.54399609565735, + "p90": 367.8080141544342, + "p95": 371.13600969314575, + "p99": 404.1920006275177 }, "combine": { - "p50": 59.328000992536545, - "p90": 61.59999966621399, - "p95": 62.72000074386597, - "p99": 75.6160020828247 + "p50": 487.87200450897217, + "p90": 493.6639964580536, + "p95": 496.44801020622253, + "p99": 506.1759948730469 }, "roundtrip": { - "p50": 59.328000992536545, - "p90": 61.59999966621399, - "p95": 62.72000074386597, - "p99": 75.6160020828247 + "p50": 816.1600232124329, + "p90": 824.2560029029846, + "p95": 827.072024345398, + "p99": 852.895975112915 }, "isolatedSum": { - "p50": 118.65600198507309, - "p90": 123.19999933242798, - "p95": 125.44000148773193, - "p99": 151.2320041656494 + "p50": 848.4160006046295, + "p90": 861.4720106124878, + "p95": 867.5840198993683, + "p99": 910.3679955005646 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 62.78400123119354, - "p90": 65.21599739789963, - "p95": 66.17599725723267, - "p99": 76.67200267314911 + "p50": 615.1040196418762, + "p90": 624.9600052833557, + "p95": 628.9920210838318, + "p99": 655.4880142211914 }, "combine": { - "p50": 62.78400123119354, - "p90": 65.21599739789963, - "p95": 66.17599725723267, - "p99": 76.67200267314911 + "p50": 870.6560134887695, + "p90": 879.4559836387634, + "p95": 884.447991847992, + "p99": 908.6080193519592 }, "roundtrip": { - "p50": 62.78400123119354, - "p90": 65.21599739789963, - "p95": 66.17599725723267, - "p99": 76.67200267314911 + "p50": 1458.240032196045, + "p90": 1469.9840545654297, + "p95": 1476.0639667510986, + "p99": 1511.8720531463623 }, "isolatedSum": { - "p50": 125.56800246238708, - "p90": 130.43199479579926, - "p95": 132.35199451446533, - "p99": 153.34400534629822 + "p50": 1485.7600331306458, + "p90": 1504.4159889221191, + "p95": 1513.4400129318237, + "p99": 1564.0960335731506 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 71.07199728488922, - "p90": 72.9919970035553, - "p95": 73.63200187683105, - "p99": 87.36000210046768 + "p50": 1133.0879926681519, + "p90": 1140.6079530715942, + "p95": 1143.5840129852295, + "p99": 1149.1520404815674 }, "combine": { - "p50": 71.07199728488922, - "p90": 72.9919970035553, - "p95": 73.63200187683105, - "p99": 87.36000210046768 + "p50": 1619.264006614685, + "p90": 1625.440001487732, + "p95": 1628.0319690704346, + "p99": 1635.6480121612549 }, "roundtrip": { - "p50": 71.07199728488922, - "p90": 72.9919970035553, - "p95": 73.63200187683105, - "p99": 87.36000210046768 + "p50": 2728.7039756774902, + "p90": 2738.1439208984375, + "p95": 2741.1201000213623, + "p99": 2750.080108642578 }, "isolatedSum": { - "p50": 142.14399456977844, - "p90": 145.9839940071106, - "p95": 147.2640037536621, - "p99": 174.72000420093536 + "p50": 2752.351999282837, + "p90": 2766.047954559326, + "p95": 2771.615982055664, + "p99": 2784.8000526428223 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -31701,366 +32103,476 @@ ] }, { - "id": "cx-4e6a4685", - "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_5ec8473f", - "comparisonKey": "0f567db5f9c07223", + "id": "cx-fdc9a7c6", + "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "1dc982986bf98728", "schemaVersion": 3, - "generatedAt": "2026-06-27T17:26:58.425220+00:00", + "generatedAt": "2026-06-29T13:18:54.160154+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28296434249", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296434249", - "createdAt": "2026-06-27T17:26:58.425220+00:00", - "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18" + "id": "28374328313", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374328313", + "createdAt": "2026-06-29T13:08:14Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 48.576001077890396, - "p90": 49.855999648571014, - "p95": 50.81599950790405, - "p99": 64.4799992442131 + "p50": 124.41600114107132, + "p90": 131.3599944114685, + "p95": 134.49600338935852, + "p99": 152.48000621795654 }, "combine": { - "p50": 48.576001077890396, - "p90": 49.855999648571014, - "p95": 50.81599950790405, - "p99": 64.4799992442131 + "p50": 122.49600142240524, + "p90": 131.48799538612366, + "p95": 133.02400708198547, + "p99": 143.71199905872345 }, "roundtrip": { - "p50": 48.576001077890396, - "p90": 49.855999648571014, - "p95": 50.81599950790405, - "p99": 64.4799992442131 + "p50": 223.7119972705841, + "p90": 232.06399381160736, + "p95": 234.46400463581085, + "p99": 241.31199717521667 }, "isolatedSum": { - "p50": 97.15200215578079, - "p90": 99.71199929714203, - "p95": 101.6319990158081, - "p99": 128.9599984884262 + "p50": 246.91200256347656, + "p90": 262.84798979759216, + "p95": 267.520010471344, + "p99": 296.19200527668 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 49.44000020623207, - "p90": 50.912000238895416, - "p95": 51.552001386880875, - "p99": 56.543998420238495 + "p50": 163.29599916934967, + "p90": 180.57599663734436, + "p95": 196.60800695419312, + "p99": 224.67200458049774 }, "combine": { - "p50": 49.44000020623207, - "p90": 50.912000238895416, - "p95": 51.552001386880875, - "p99": 56.543998420238495 + "p50": 169.0240055322647, + "p90": 201.7280012369156, + "p95": 208.44799280166626, + "p99": 229.5359969139099 }, "roundtrip": { - "p50": 49.44000020623207, - "p90": 50.912000238895416, - "p95": 51.552001386880875, - "p99": 56.543998420238495 + "p50": 301.56800150871277, + "p90": 311.39200925827026, + "p95": 314.59200382232666, + "p99": 321.5999901294708 }, "isolatedSum": { - "p50": 98.88000041246414, - "p90": 101.82400047779083, - "p95": 103.10400277376175, - "p99": 113.08799684047699 + "p50": 332.3200047016144, + "p90": 382.30399787425995, + "p95": 405.0559997558594, + "p99": 454.20800149440765 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 51.552001386880875, - "p90": 53.888000547885895, - "p95": 54.976001381874084, - "p99": 66.72000139951706 + "p50": 229.63200509548187, + "p90": 245.88799476623535, + "p95": 262.1760070323944, + "p99": 281.3760042190552 }, "combine": { - "p50": 51.552001386880875, - "p90": 53.888000547885895, - "p95": 54.976001381874084, - "p99": 66.72000139951706 + "p50": 289.63199257850647, + "p90": 312.25600838661194, + "p95": 320.67200541496277, + "p99": 339.55198526382446 }, "roundtrip": { - "p50": 51.552001386880875, - "p90": 53.888000547885895, - "p95": 54.976001381874084, - "p99": 66.72000139951706 + "p50": 465.05600214004517, + "p90": 485.9200119972229, + "p95": 495.712012052536, + "p99": 518.2399749755859 }, "isolatedSum": { - "p50": 103.10400277376175, - "p90": 107.77600109577179, - "p95": 109.95200276374817, - "p99": 133.44000279903412 + "p50": 519.2639976739883, + "p90": 558.1440031528473, + "p95": 582.8480124473572, + "p99": 620.9279894828796 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 51.7439991235733, - "p90": 54.43200096487999, - "p95": 55.07199838757515, - "p99": 61.664000153541565 + "p50": 359.42399501800537, + "p90": 385.50400733947754, + "p95": 399.29598569869995, + "p99": 414.0160083770752 }, "combine": { - "p50": 51.7439991235733, - "p90": 54.43200096487999, - "p95": 55.07199838757515, - "p99": 61.664000153541565 + "p50": 490.01601338386536, + "p90": 505.7920217514038, + "p95": 515.8079862594604, + "p99": 541.8239831924438 }, "roundtrip": { - "p50": 51.7439991235733, - "p90": 54.43200096487999, - "p95": 55.07199838757515, - "p99": 61.664000153541565 + "p50": 817.8240060806274, + "p90": 834.272027015686, + "p95": 856.000006198883, + "p99": 874.5920062065125 }, "isolatedSum": { - "p50": 103.4879982471466, - "p90": 108.86400192975998, - "p95": 110.1439967751503, - "p99": 123.32800030708313 + "p50": 849.4400084018707, + "p90": 891.2960290908813, + "p95": 915.1039719581604, + "p99": 955.839991569519 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 54.207999259233475, - "p90": 56.0000017285347, - "p95": 56.703999638557434, - "p99": 63.13599646091461 + "p50": 608.4480285644531, + "p90": 615.231990814209, + "p95": 618.6239719390869, + "p99": 633.5359811782837 }, "combine": { - "p50": 54.207999259233475, - "p90": 56.0000017285347, - "p95": 56.703999638557434, - "p99": 63.13599646091461 + "p50": 857.7280044555664, + "p90": 862.8479838371277, + "p95": 865.9200072288513, + "p99": 871.7439770698547 }, "roundtrip": { - "p50": 54.207999259233475, - "p90": 56.0000017285347, - "p95": 56.703999638557434, - "p99": 63.13599646091461 + "p50": 1434.0800046920776, + "p90": 1442.4959421157837, + "p95": 1445.7600116729736, + "p99": 1471.8719720840454 }, "isolatedSum": { - "p50": 108.41599851846695, - "p90": 112.0000034570694, - "p95": 113.40799927711487, - "p99": 126.27199292182922 + "p50": 1466.1760330200195, + "p90": 1478.0799746513367, + "p95": 1484.5439791679382, + "p99": 1505.2799582481384 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 54.976001381874084, - "p90": 57.312000542879105, - "p95": 58.04799869656563, - "p99": 60.7680007815361 + "p50": 1127.3280382156372, + "p90": 1133.7920427322388, + "p95": 1135.8720064163208, + "p99": 1139.4239664077759 }, "combine": { - "p50": 54.976001381874084, - "p90": 57.312000542879105, - "p95": 58.04799869656563, - "p99": 60.7680007815361 + "p50": 1611.4239692687988, + "p90": 1621.5039491653442, + "p95": 1624.1919994354248, + "p99": 1664.9600267410278 }, "roundtrip": { - "p50": 54.976001381874084, - "p90": 57.312000542879105, - "p95": 58.04799869656563, - "p99": 60.7680007815361 + "p50": 2713.792085647583, + "p90": 2722.9440212249756, + "p95": 2724.9600887298584, + "p99": 2731.4560413360596 }, "isolatedSum": { - "p50": 109.95200276374817, - "p90": 114.62400108575821, - "p95": 116.09599739313126, - "p99": 121.5360015630722 + "p50": 2738.752007484436, + "p90": 2755.295991897583, + "p95": 2760.0640058517456, + "p99": 2804.3839931488037 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8c3cdccb", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||8bf55e36863f028", + "colorKey": "gb300_d4c8afb8", + "comparisonKey": "7c5d1ae307d82bca", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:42:20.458799+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8bf55e36863f028", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.4400041103363, + "p90": 151.10400319099426, + "p95": 154.84799444675446, + "p99": 160.73599457740784 + }, + "combine": { + "p50": 146.36799693107605, + "p90": 156.19200468063354, + "p95": 158.11200439929962, + "p99": 167.39200055599213 + }, + "roundtrip": { + "p50": 259.2960000038147, + "p90": 268.3199942111969, + "p95": 271.4560031890869, + "p99": 277.24799513816833 + }, + "isolatedSum": { + "p50": 287.80800104141235, + "p90": 307.2960078716278, + "p95": 312.9599988460541, + "p99": 328.12799513339996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 59.29600074887276, - "p90": 61.47199869155884, - "p95": 62.912002205848694, - "p99": 81.31200075149536 + "p50": 279.07198667526245, + "p90": 288.9919877052307, + "p95": 292.4799919128418, + "p99": 327.90398597717285 }, "combine": { - "p50": 59.29600074887276, - "p90": 61.47199869155884, - "p95": 62.912002205848694, - "p99": 81.31200075149536 + "p50": 368.00000071525574, + "p90": 376.70400738716125, + "p95": 378.08001041412354, + "p99": 390.5920088291168 }, "roundtrip": { - "p50": 59.29600074887276, - "p90": 61.47199869155884, - "p95": 62.912002205848694, - "p99": 81.31200075149536 + "p50": 606.4959764480591, + "p90": 617.0240044593811, + "p95": 620.8320260047913, + "p99": 659.6480011940002 }, "isolatedSum": { - "p50": 118.59200149774551, - "p90": 122.94399738311768, - "p95": 125.82400441169739, - "p99": 162.62400150299072 + "p50": 647.0719873905182, + "p90": 665.695995092392, + "p95": 670.5600023269653, + "p99": 718.4959948062897 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 69.2799985408783, - "p90": 71.00799679756165, - "p95": 71.29599899053574, - "p99": 78.97599786520004 + "p50": 808.9600205421448, + "p90": 823.2319951057434, + "p95": 826.6559839248657, + "p99": 860.8959913253784 }, "combine": { - "p50": 69.2799985408783, - "p90": 71.00799679756165, - "p95": 71.29599899053574, - "p99": 78.97599786520004 + "p50": 1192.863941192627, + "p90": 1201.1200189590454, + "p95": 1202.623963356018, + "p99": 1207.6159715652466 }, "roundtrip": { - "p50": 69.2799985408783, - "p90": 71.00799679756165, - "p95": 71.29599899053574, - "p99": 78.97599786520004 + "p50": 1971.0719585418701, + "p90": 1987.712025642395, + "p95": 1993.183970451355, + "p99": 2004.2240619659424 }, "isolatedSum": { - "p50": 138.5599970817566, - "p90": 142.0159935951233, - "p95": 142.59199798107147, - "p99": 157.95199573040009 + "p50": 2001.8239617347717, + "p90": 2024.3520140647888, + "p95": 2029.2799472808838, + "p99": 2068.511962890625 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32068,366 +32580,476 @@ ] }, { - "id": "cx-c4d51897", - "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|nvfp4|none|0|tuned||ac583971f94b176", - "colorKey": "b300_5ec8473f", - "comparisonKey": "fcbe4c54041214ff", + "id": "cx-5f85a462", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb300_f163949b", + "comparisonKey": "255dfa9bd7173c73", "schemaVersion": 3, - "generatedAt": "2026-06-28T06:30:40.335883+00:00", + "generatedAt": "2026-06-29T13:44:11.777684+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · bf16", + "label": "GB300 EP8 · deepep · bf16 · balanced-rank-local", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", - "combineQuantMode": "nvfp4" + "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28313781903", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28313781903", - "createdAt": "2026-06-28T06:30:40.335883+00:00", - "sha": "0e61ac1009cdb939b811e283f71ad6306241d3dd" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 55.64799904823303, - "p90": 57.88800120353699, - "p95": 60.127999633550644, - "p99": 76.86399668455124 + "p50": 100.19200295209885, + "p90": 110.17599701881409, + "p95": 114.68800157308578, + "p99": 122.65600264072418 }, "combine": { - "p50": 55.64799904823303, - "p90": 57.88800120353699, - "p95": 60.127999633550644, - "p99": 76.86399668455124 + "p50": 81.91999793052673, + "p90": 85.9839990735054, + "p95": 92.28800237178802, + "p99": 96.3520035147667 }, "roundtrip": { - "p50": 55.64799904823303, - "p90": 57.88800120353699, - "p95": 60.127999633550644, - "p99": 76.86399668455124 + "p50": 155.29599785804749, + "p90": 171.36000096797943, + "p95": 193.95199418067932, + "p99": 208.28799903392792 }, "isolatedSum": { - "p50": 111.29599809646606, - "p90": 115.77600240707397, - "p95": 120.25599926710129, - "p99": 153.72799336910248 + "p50": 182.11200088262558, + "p90": 196.1599960923195, + "p95": 206.9760039448738, + "p99": 219.00800615549088 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 56.51199817657471, - "p90": 58.52799862623215, - "p95": 59.61599946022034, - "p99": 66.3679987192154 + "p50": 122.84799665212631, + "p90": 131.45600259304047, + "p95": 135.23200154304504, + "p99": 144.76799964904785 }, "combine": { - "p50": 56.51199817657471, - "p90": 58.52799862623215, - "p95": 59.61599946022034, - "p99": 66.3679987192154 + "p50": 124.12799894809723, + "p90": 131.8719983100891, + "p95": 133.85599851608276, + "p99": 146.91199362277985 }, "roundtrip": { - "p50": 56.51199817657471, - "p90": 58.52799862623215, - "p95": 59.61599946022034, - "p99": 66.3679987192154 + "p50": 220.60799598693848, + "p90": 228.4799963235855, + "p95": 232.60800540447235, + "p99": 247.23200500011444 }, "isolatedSum": { - "p50": 113.02399635314941, - "p90": 117.0559972524643, - "p95": 119.23199892044067, - "p99": 132.7359974384308 + "p50": 246.97599560022354, + "p90": 263.3280009031296, + "p95": 269.0880000591278, + "p99": 291.6799932718277 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 57.5999990105629, - "p90": 59.67999994754791, - "p95": 60.92799827456474, - "p99": 67.1359971165657 + "p50": 210.36800742149353, + "p90": 219.7439968585968, + "p95": 222.20799326896667, + "p99": 242.2720044851303 }, "combine": { - "p50": 57.5999990105629, - "p90": 59.67999994754791, - "p95": 60.92799827456474, - "p99": 67.1359971165657 + "p50": 256.0960054397583, + "p90": 263.13599944114685, + "p95": 266.6560113430023, + "p99": 283.29598903656006 }, "roundtrip": { - "p50": 57.5999990105629, - "p90": 59.67999994754791, - "p95": 60.92799827456474, - "p99": 67.1359971165657 + "p50": 447.90399074554443, + "p90": 455.4559886455536, + "p95": 457.7600061893463, + "p99": 465.6960070133209 }, "isolatedSum": { - "p50": 115.1999980211258, - "p90": 119.35999989509583, - "p95": 121.85599654912949, - "p99": 134.2719942331314 + "p50": 466.46401286125183, + "p90": 482.87999629974365, + "p95": 488.864004611969, + "p99": 525.5679935216904 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba690ae0", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb300_c93e2296", + "comparisonKey": "2ca11a784293be10", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:40:00.700440+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.64000022411346, + "p90": 116.64000153541565, + "p95": 119.9679970741272, + "p99": 128.92800569534302 + }, + "combine": { + "p50": 96.70399874448776, + "p90": 103.7760004401207, + "p95": 106.65600001811981, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 176.57600343227386, + "p90": 184.9920004606247, + "p95": 187.29600310325623, + "p99": 195.360004901886 + }, + "isolatedSum": { + "p50": 205.34399896860123, + "p90": 220.41600197553635, + "p95": 226.623997092247, + "p99": 238.97600919008255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 57.82400071620941, - "p90": 59.87200140953064, - "p95": 60.7680007815361, - "p99": 70.0799971818924 + "p50": 125.63200294971466, + "p90": 133.85599851608276, + "p95": 137.7599984407425, + "p99": 141.92000031471252 }, "combine": { - "p50": 57.82400071620941, - "p90": 59.87200140953064, - "p95": 60.7680007815361, - "p99": 70.0799971818924 + "p50": 120.06399780511856, + "p90": 124.09599870443344, + "p95": 128.86400520801544, + "p99": 138.2399946451187 }, "roundtrip": { - "p50": 57.82400071620941, - "p90": 59.87200140953064, - "p95": 60.7680007815361, - "p99": 70.0799971818924 + "p50": 216.19200706481934, + "p90": 224.60800409317017, + "p95": 228.12800109386444, + "p99": 236.12800240516663 }, "isolatedSum": { - "p50": 115.64800143241882, - "p90": 119.74400281906128, - "p95": 121.5360015630722, - "p99": 140.1599943637848 + "p50": 245.69600075483322, + "p90": 257.9519972205162, + "p95": 266.62400364875793, + "p99": 280.15999495983124 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 59.39200147986412, - "p90": 61.63199990987778, - "p95": 63.35999816656113, - "p99": 71.48800045251846 + "p50": 161.21600568294525, + "p90": 170.01600563526154, + "p95": 172.41600155830383, + "p99": 177.37600207328796 }, "combine": { - "p50": 59.39200147986412, - "p90": 61.63199990987778, - "p95": 63.35999816656113, - "p99": 71.48800045251846 + "p50": 155.16799688339233, + "p90": 159.2320054769516, + "p95": 160.64000129699707, + "p99": 168.83200407028198 }, "roundtrip": { - "p50": 59.39200147986412, - "p90": 61.63199990987778, - "p95": 63.35999816656113, - "p99": 71.48800045251846 + "p50": 286.49601340293884, + "p90": 293.37599873542786, + "p95": 295.77600955963135, + "p99": 300.6080090999603 }, "isolatedSum": { - "p50": 118.78400295972824, - "p90": 123.26399981975555, - "p95": 126.71999633312225, - "p99": 142.97600090503693 + "p50": 316.3840025663376, + "p90": 329.24801111221313, + "p95": 333.0560028553009, + "p99": 346.20800614356995 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 60.35200133919716, - "p90": 62.463998794555664, - "p95": 64.19199705123901, - "p99": 72.35199958086014 + "p50": 229.88800704479218, + "p90": 240.03200232982635, + "p95": 244.9920028448105, + "p99": 291.29600524902344 }, "combine": { - "p50": 60.35200133919716, - "p90": 62.463998794555664, - "p95": 64.19199705123901, - "p99": 72.35199958086014 + "p50": 280.95999360084534, + "p90": 287.4560058116913, + "p95": 291.00799560546875, + "p99": 312.00000643730164 }, "roundtrip": { - "p50": 60.35200133919716, - "p90": 62.463998794555664, - "p95": 64.19199705123901, - "p99": 72.35199958086014 + "p50": 480.73598742485046, + "p90": 491.5519952774048, + "p95": 498.27200174331665, + "p99": 536.4159941673279 }, "isolatedSum": { - "p50": 120.70400267839432, - "p90": 124.92799758911133, - "p95": 128.38399410247803, - "p99": 144.70399916172028 + "p50": 510.8480006456375, + "p90": 527.4880081415176, + "p95": 535.9999984502792, + "p99": 603.2960116863251 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 62.97600269317627, - "p90": 65.76000154018402, - "p95": 66.14399701356888, - "p99": 75.03999769687653 + "p50": 368.3519959449768, + "p90": 378.36799025535583, + "p95": 381.056010723114, + "p99": 384.6080005168915 }, "combine": { - "p50": 62.97600269317627, - "p90": 65.76000154018402, - "p95": 66.14399701356888, - "p99": 75.03999769687653 + "p50": 491.32800102233887, + "p90": 495.87199091911316, + "p95": 499.87199902534485, + "p99": 507.1679949760437 }, "roundtrip": { - "p50": 62.97600269317627, - "p90": 65.76000154018402, - "p95": 66.14399701356888, - "p99": 75.03999769687653 + "p50": 830.3359746932983, + "p90": 837.1840119361877, + "p95": 839.3599987030029, + "p99": 843.8720107078552 }, "isolatedSum": { - "p50": 125.95200538635254, - "p90": 131.52000308036804, - "p95": 132.28799402713776, - "p99": 150.07999539375305 + "p50": 859.6799969673157, + "p90": 874.239981174469, + "p95": 880.9280097484589, + "p99": 891.7759954929352 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 70.8480030298233, - "p90": 72.92799651622772, - "p95": 74.07999783754349, - "p99": 86.27200126647949 + "p50": 663.2639765739441, + "p90": 672.1280217170715, + "p95": 677.0560145378113, + "p99": 696.4160203933716 }, "combine": { - "p50": 70.8480030298233, - "p90": 72.92799651622772, - "p95": 74.07999783754349, - "p99": 86.27200126647949 + "p50": 886.5280151367188, + "p90": 895.2640295028687, + "p95": 897.9200124740601, + "p99": 910.0800156593323 }, "roundtrip": { - "p50": 70.8480030298233, - "p90": 72.92799651622772, - "p95": 74.07999783754349, - "p99": 86.27200126647949 + "p50": 1518.5279846191406, + "p90": 1526.8160104751587, + "p95": 1530.3360223770142, + "p99": 1536.7679595947266 }, "isolatedSum": { - "p50": 141.6960060596466, - "p90": 145.85599303245544, - "p95": 148.15999567508698, - "p99": 172.54400253295898 + "p50": 1549.7919917106628, + "p90": 1567.3920512199402, + "p95": 1574.9760270118713, + "p99": 1606.4960360527039 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32435,366 +33057,476 @@ ] }, { - "id": "cx-30070070", - "identity": "b300|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_a52edb56", - "comparisonKey": "46230412bf8dc722", + "id": "cx-c6f5f498", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||eb906a106a6cb71", + "colorKey": "gb300_440d13a2", + "comparisonKey": "e5ffbcf044278e38", "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:11.748195+00:00", + "generatedAt": "2026-06-29T13:48:54.490576+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_12", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · fp8", + "label": "GB300 EP8 · deepep · bf16 · hotspot-single", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "hotspot-single", + "routingLabel": "hotspot-single", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "fp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "eb906a106a6cb71", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28307775342", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307775342", - "createdAt": "2026-06-28T01:38:11.748195+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 50.49600079655647, - "p90": 52.000001072883606, - "p95": 53.21599915623665, - "p99": 66.3359984755516 + "p50": 130.5920034646988, + "p90": 142.11200177669525, + "p95": 145.82400023937225, + "p99": 173.0239987373352 }, "combine": { - "p50": 50.49600079655647, - "p90": 52.000001072883606, - "p95": 53.21599915623665, - "p99": 66.3359984755516 + "p50": 139.3599957227707, + "p90": 146.7200070619583, + "p95": 154.14400398731232, + "p99": 182.20800161361694 }, "roundtrip": { - "p50": 50.49600079655647, - "p90": 52.000001072883606, - "p95": 53.21599915623665, - "p99": 66.3359984755516 + "p50": 245.60000002384186, + "p90": 254.84800338745117, + "p95": 259.2639923095703, + "p99": 282.0799946784973 }, "isolatedSum": { - "p50": 100.99200159311295, - "p90": 104.00000214576721, - "p95": 106.4319983124733, - "p99": 132.6719969511032 + "p50": 269.9519991874695, + "p90": 288.83200883865356, + "p95": 299.96800422668457, + "p99": 355.23200035095215 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 53.21599915623665, - "p90": 55.23199960589409, - "p95": 56.12799897789955, - "p99": 60.32000109553337 + "p50": 255.13601303100586, + "p90": 284.8320007324219, + "p95": 298.0799973011017, + "p99": 321.4080035686493 }, "combine": { - "p50": 53.21599915623665, - "p90": 55.23199960589409, - "p95": 56.12799897789955, - "p99": 60.32000109553337 + "p50": 364.80000615119934, + "p90": 381.82398676872253, + "p95": 394.6560025215149, + "p99": 412.8960072994232 }, "roundtrip": { - "p50": 53.21599915623665, - "p90": 55.23199960589409, - "p95": 56.12799897789955, - "p99": 60.32000109553337 + "p50": 578.0479907989502, + "p90": 592.8959846496582, + "p95": 611.9359731674194, + "p99": 636.0960006713867 }, "isolatedSum": { - "p50": 106.4319983124733, - "p90": 110.46399921178818, - "p95": 112.2559979557991, - "p99": 120.64000219106674 + "p50": 619.9360191822052, + "p90": 666.6559875011444, + "p95": 692.7359998226166, + "p99": 734.3040108680725 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 54.976001381874084, - "p90": 57.40800127387047, - "p95": 59.20000001788139, - "p99": 64.96000289916992 + "p50": 720.9920287132263, + "p90": 748.3199834823608, + "p95": 759.9359750747681, + "p99": 828.6719918251038 }, "combine": { - "p50": 54.976001381874084, - "p90": 57.40800127387047, - "p95": 59.20000001788139, - "p99": 64.96000289916992 + "p50": 1155.4239988327026, + "p90": 1177.183985710144, + "p95": 1192.479968070984, + "p99": 1226.5599966049194 }, "roundtrip": { - "p50": 54.976001381874084, - "p90": 57.40800127387047, - "p95": 59.20000001788139, - "p99": 64.96000289916992 + "p50": 1848.1279611587524, + "p90": 1863.8720512390137, + "p95": 1877.1840333938599, + "p99": 1896.83198928833 }, "isolatedSum": { - "p50": 109.95200276374817, - "p90": 114.81600254774094, - "p95": 118.40000003576279, - "p99": 129.92000579833984 + "p50": 1876.416027545929, + "p90": 1925.5039691925049, + "p95": 1952.415943145752, + "p99": 2055.231988430023 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5f0c4166", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb300_87f4d4ec", + "comparisonKey": "6a684c17508b8933", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:56:09.280253+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.3919939994812, + "p90": 135.42400300502777, + "p95": 138.14400136470795, + "p99": 149.08799529075623 + }, + "combine": { + "p50": 126.17599964141846, + "p90": 131.9040060043335, + "p95": 133.44000279903412, + "p99": 144.25599575042725 + }, + "roundtrip": { + "p50": 225.8239984512329, + "p90": 234.9119931459427, + "p95": 238.01599442958832, + "p99": 244.28799748420715 + }, + "isolatedSum": { + "p50": 253.56799364089966, + "p90": 267.32800900936127, + "p95": 271.58400416374207, + "p99": 293.3439910411835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 54.9440011382103, - "p90": 58.111999183893204, - "p95": 59.20000001788139, - "p99": 68.86400282382965 + "p50": 164.70399498939514, + "p90": 173.66400361061096, + "p95": 176.60799622535706, + "p99": 182.20800161361694 }, "combine": { - "p50": 54.9440011382103, - "p90": 58.111999183893204, - "p95": 59.20000001788139, - "p99": 68.86400282382965 + "p50": 169.08800601959229, + "p90": 176.06399953365326, + "p95": 178.81600558757782, + "p99": 181.8239986896515 }, "roundtrip": { - "p50": 54.9440011382103, - "p90": 58.111999183893204, - "p95": 59.20000001788139, - "p99": 68.86400282382965 + "p50": 305.5039942264557, + "p90": 314.91199135780334, + "p95": 317.7280128002167, + "p99": 328.12801003456116 }, "isolatedSum": { - "p50": 109.8880022764206, - "p90": 116.22399836778641, - "p95": 118.40000003576279, - "p99": 137.7280056476593 + "p50": 333.7920010089874, + "p90": 349.7280031442642, + "p95": 355.4240018129349, + "p99": 364.03200030326843 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 56.0000017285347, - "p90": 58.33600088953972, - "p95": 59.167999774217606, - "p99": 64.15999680757523 + "p50": 231.9359928369522, + "p90": 239.74399268627167, + "p95": 242.91199445724487, + "p99": 254.88001108169556 }, "combine": { - "p50": 56.0000017285347, - "p90": 58.33600088953972, - "p95": 59.167999774217606, - "p99": 64.15999680757523 + "p50": 284.86400842666626, + "p90": 292.7680015563965, + "p95": 294.5919930934906, + "p99": 303.0399978160858 }, "roundtrip": { - "p50": 56.0000017285347, - "p90": 58.33600088953972, - "p95": 59.167999774217606, - "p99": 64.15999680757523 + "p50": 462.8799855709076, + "p90": 471.8720018863678, + "p95": 475.1040041446686, + "p99": 479.93600368499756 }, "isolatedSum": { - "p50": 112.0000034570694, - "p90": 116.67200177907944, - "p95": 118.33599954843521, - "p99": 128.31999361515045 + "p50": 516.8000012636185, + "p90": 532.5119942426682, + "p95": 537.5039875507355, + "p99": 557.9200088977814 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 56.76800012588501, - "p90": 58.94400179386139, - "p95": 60.15999987721443, - "p99": 69.69600170850754 + "p50": 362.65599727630615, + "p90": 372.8320002555847, + "p95": 377.3120045661926, + "p99": 408.2239866256714 }, "combine": { - "p50": 56.76800012588501, - "p90": 58.94400179386139, - "p95": 60.15999987721443, - "p99": 69.69600170850754 + "p50": 490.911990404129, + "p90": 499.7119903564453, + "p95": 501.8560290336609, + "p99": 520.7679867744446 }, "roundtrip": { - "p50": 56.76800012588501, - "p90": 58.94400179386139, - "p95": 60.15999987721443, - "p99": 69.69600170850754 + "p50": 823.8720297813416, + "p90": 832.1920037269592, + "p95": 836.9280099868774, + "p99": 854.8480272293091 }, "isolatedSum": { - "p50": 113.53600025177002, - "p90": 117.88800358772278, - "p95": 120.31999975442886, - "p99": 139.39200341701508 + "p50": 853.5679876804352, + "p90": 872.54399061203, + "p95": 879.1680335998535, + "p99": 928.991973400116 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 58.52799862623215, - "p90": 61.24800071120262, - "p95": 62.65600025653839, - "p99": 74.49600100517273 + "p50": 612.3200058937073, + "p90": 618.943989276886, + "p95": 622.3359704017639, + "p99": 626.6239881515503 }, "combine": { - "p50": 58.52799862623215, - "p90": 61.24800071120262, - "p95": 62.65600025653839, - "p99": 74.49600100517273 + "p50": 863.2000088691711, + "p90": 870.8480000495911, + "p95": 872.7359771728516, + "p99": 876.9919872283936 }, "roundtrip": { - "p50": 58.52799862623215, - "p90": 61.24800071120262, - "p95": 62.65600025653839, - "p99": 74.49600100517273 + "p50": 1445.4400539398193, + "p90": 1454.0159702301025, + "p95": 1455.8720588684082, + "p99": 1462.6879692077637 }, "isolatedSum": { - "p50": 117.0559972524643, - "p90": 122.49600142240524, - "p95": 125.31200051307678, - "p99": 148.99200201034546 + "p50": 1475.5200147628784, + "p90": 1489.791989326477, + "p95": 1495.0719475746155, + "p99": 1503.6159753799438 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 65.08799642324448, - "p90": 67.26399809122086, - "p95": 67.87200272083282, - "p99": 74.43200051784515 + "p50": 1125.440001487732, + "p90": 1132.256031036377, + "p95": 1134.6559524536133, + "p99": 1146.6879844665527 }, "combine": { - "p50": 65.08799642324448, - "p90": 67.26399809122086, - "p95": 67.87200272083282, - "p99": 74.43200051784515 + "p50": 1601.0240316390991, + "p90": 1609.8239421844482, + "p95": 1611.6479635238647, + "p99": 1616.5440082550049 }, "roundtrip": { - "p50": 65.08799642324448, - "p90": 67.26399809122086, - "p95": 67.87200272083282, - "p99": 74.43200051784515 + "p50": 2701.9519805908203, + "p90": 2711.711883544922, + "p95": 2714.303970336914, + "p99": 2723.871946334839 }, "isolatedSum": { - "p50": 130.17599284648895, - "p90": 134.5279961824417, - "p95": 135.74400544166565, - "p99": 148.8640010356903 + "p50": 2726.464033126831, + "p90": 2742.079973220825, + "p95": 2746.303915977478, + "p99": 2763.2319927215576 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -32802,366 +33534,293 @@ ] }, { - "id": "cx-9a73b5f5", - "identity": "b300|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_6af1abcd", - "comparisonKey": "227468e11845c947", + "id": "cx-5a5f4e18", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb300_8b7def4e", + "comparisonKey": "e430694c35257860", "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:16.371741+00:00", + "generatedAt": "2026-06-29T13:38:02.057307+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_06", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · mxfp8", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 288, "routing": "uniform", - "routingLabel": "uniform", + "routingLabel": "uniform+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "mxfp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28307776684", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307776684", - "createdAt": "2026-06-28T01:38:16.371741+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 50.464000552892685, - "p90": 52.2879995405674, - "p95": 53.21599915623665, - "p99": 66.81600213050842 + "p50": 122.94399738311768, + "p90": 130.20800054073334, + "p95": 133.18400084972382, + "p99": 146.08000218868256 }, "combine": { - "p50": 50.464000552892685, - "p90": 52.2879995405674, - "p95": 53.21599915623665, - "p99": 66.81600213050842 + "p50": 122.49600142240524, + "p90": 131.42399489879608, + "p95": 132.7359974384308, + "p99": 143.90400052070618 }, "roundtrip": { - "p50": 50.464000552892685, - "p90": 52.2879995405674, - "p95": 53.21599915623665, - "p99": 66.81600213050842 + "p50": 220.35199403762817, + "p90": 228.83200645446777, + "p95": 231.83999955654144, + "p99": 239.29600417613983 }, "isolatedSum": { - "p50": 100.92800110578537, - "p90": 104.5759990811348, - "p95": 106.4319983124733, - "p99": 133.63200426101685 + "p50": 245.43999880552292, + "p90": 261.6319954395294, + "p95": 265.9199982881546, + "p99": 289.98400270938873 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 51.90400034189224, - "p90": 53.568001836538315, - "p95": 54.46400120854378, - "p99": 58.36800113320351 + "p50": 160.41600704193115, + "p90": 168.99199783802032, + "p95": 171.61600291728973, + "p99": 179.80800569057465 }, "combine": { - "p50": 51.90400034189224, - "p90": 53.568001836538315, - "p95": 54.46400120854378, - "p99": 58.36800113320351 + "p50": 167.77600347995758, + "p90": 172.03199863433838, + "p95": 177.69600450992584, + "p99": 182.65600502490997 }, "roundtrip": { - "p50": 51.90400034189224, - "p90": 53.568001836538315, - "p95": 54.46400120854378, - "p99": 58.36800113320351 + "p50": 298.880010843277, + "p90": 306.8160116672516, + "p95": 310.016006231308, + "p99": 319.2319869995117 }, "isolatedSum": { - "p50": 103.80800068378448, - "p90": 107.13600367307663, - "p95": 108.92800241708755, - "p99": 116.73600226640701 + "p50": 328.19201052188873, + "p90": 341.0239964723587, + "p95": 349.3120074272156, + "p99": 362.4640107154846 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 53.63199859857559, - "p90": 55.39200082421303, - "p95": 56.28800019621849, - "p99": 68.80000233650208 - }, - "combine": { - "p50": 53.63199859857559, - "p90": 55.39200082421303, - "p95": 56.28800019621849, - "p99": 68.80000233650208 - }, - "roundtrip": { - "p50": 53.63199859857559, - "p90": 55.39200082421303, - "p95": 56.28800019621849, - "p99": 68.80000233650208 - }, - "isolatedSum": { - "p50": 107.26399719715118, - "p90": 110.78400164842606, - "p95": 112.57600039243698, - "p99": 137.60000467300415 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 53.727999329566956, - "p90": 55.776000022888184, - "p95": 56.89600110054016, - "p99": 65.63200056552887 - }, - "combine": { - "p50": 53.727999329566956, - "p90": 55.776000022888184, - "p95": 56.89600110054016, - "p99": 65.63200056552887 - }, - "roundtrip": { - "p50": 53.727999329566956, - "p90": 55.776000022888184, - "p95": 56.89600110054016, - "p99": 65.63200056552887 - }, - "isolatedSum": { - "p50": 107.45599865913391, - "p90": 111.55200004577637, - "p95": 113.79200220108032, - "p99": 131.26400113105774 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 55.135998874902725, - "p90": 56.92800134420395, - "p95": 57.920001447200775, - "p99": 66.3359984755516 + "p50": 228.96000742912292, + "p90": 238.0480021238327, + "p95": 242.01600253582, + "p99": 250.71999430656433 }, "combine": { - "p50": 55.135998874902725, - "p90": 56.92800134420395, - "p95": 57.920001447200775, - "p99": 66.3359984755516 + "p50": 282.46399760246277, + "p90": 291.26399755477905, + "p95": 292.60799288749695, + "p99": 294.40000653266907 }, "roundtrip": { - "p50": 55.135998874902725, - "p90": 56.92800134420395, - "p95": 57.920001447200775, - "p99": 66.3359984755516 + "p50": 465.7599925994873, + "p90": 475.74400901794434, + "p95": 478.8160026073456, + "p99": 491.90399050712585 }, "isolatedSum": { - "p50": 110.27199774980545, - "p90": 113.8560026884079, - "p95": 115.84000289440155, - "p99": 132.6719969511032 + "p50": 511.4240050315857, + "p90": 529.3119996786118, + "p95": 534.623995423317, + "p99": 545.1200008392334 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 55.55199831724167, - "p90": 57.37600103020668, - "p95": 58.04799869656563, - "p99": 64.51199948787689 + "p50": 355.0400137901306, + "p90": 362.4640107154846, + "p95": 364.54400420188904, + "p99": 369.24800276756287 }, "combine": { - "p50": 55.55199831724167, - "p90": 57.37600103020668, - "p95": 58.04799869656563, - "p99": 64.51199948787689 + "p50": 488.70399594306946, + "p90": 492.73601174354553, + "p95": 495.7759976387024, + "p99": 500.4799962043762 }, "roundtrip": { - "p50": 55.55199831724167, - "p90": 57.37600103020668, - "p95": 58.04799869656563, - "p99": 64.51199948787689 + "p50": 815.3280019760132, + "p90": 822.9759931564331, + "p95": 825.6000280380249, + "p99": 832.6399922370911 }, "isolatedSum": { - "p50": 111.10399663448334, - "p90": 114.75200206041336, - "p95": 116.09599739313126, - "p99": 129.02399897575378 + "p50": 843.7440097332001, + "p90": 855.2000224590302, + "p95": 860.3200018405914, + "p99": 869.7279989719391 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 57.40800127387047, - "p90": 59.51999872922897, - "p95": 60.22400036454201, - "p99": 71.35999947786331 + "p50": 607.807993888855, + "p90": 615.1360273361206, + "p95": 617.2159910202026, + "p99": 620.9279894828796 }, "combine": { - "p50": 57.40800127387047, - "p90": 59.51999872922897, - "p95": 60.22400036454201, - "p99": 71.35999947786331 + "p50": 857.088029384613, + "p90": 861.2800240516663, + "p95": 862.2720241546631, + "p99": 871.1040019989014 }, "roundtrip": { - "p50": 57.40800127387047, - "p90": 59.51999872922897, - "p95": 60.22400036454201, - "p99": 71.35999947786331 + "p50": 1434.656023979187, + "p90": 1442.6239728927612, + "p95": 1445.95205783844, + "p99": 1453.4399509429932 }, "isolatedSum": { - "p50": 114.81600254774094, - "p90": 119.03999745845795, - "p95": 120.44800072908401, - "p99": 142.71999895572662 + "p50": 1464.896023273468, + "p90": 1476.4160513877869, + "p95": 1479.4880151748657, + "p99": 1492.031991481781 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 63.840001821517944, - "p90": 65.69600105285645, - "p95": 66.84800237417221, - "p99": 73.79200309515 + "p50": 1124.4479417800903, + "p90": 1131.8399906158447, + "p95": 1135.2640390396118, + "p99": 1138.7519836425781 }, "combine": { - "p50": 63.840001821517944, - "p90": 65.69600105285645, - "p95": 66.84800237417221, - "p99": 73.79200309515 + "p50": 1602.5279760360718, + "p90": 1611.1359596252441, + "p95": 1612.8640174865723, + "p99": 1620.736002922058 }, "roundtrip": { - "p50": 63.840001821517944, - "p90": 65.69600105285645, - "p95": 66.84800237417221, - "p99": 73.79200309515 + "p50": 2700.9921073913574, + "p90": 2710.047960281372, + "p95": 2712.671995162964, + "p99": 2718.91188621521 }, "isolatedSum": { - "p50": 127.68000364303589, - "p90": 131.3920021057129, - "p95": 133.69600474834442, - "p99": 147.5840061903 + "p50": 2726.975917816162, + "p90": 2742.975950241089, + "p95": 2748.128056526184, + "p99": 2759.4879865646362 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33169,366 +33828,293 @@ ] }, { - "id": "cx-1cb033e4", - "identity": "b300|flashinfer|7168|8|256|nvfp4|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_f0247ae6", - "comparisonKey": "0025025816a64ee6", + "id": "cx-939b56bc", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_b3a88763", + "comparisonKey": "92dc80df4affb401", "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:24.335990+00:00", + "generatedAt": "2026-06-29T14:00:52.364979+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_11", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · nvfp4", + "label": "GB300 EP8 · deepep · bf16 · zipf", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "nvfp4", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { - "kind": "packed", - "nodes": 1, + "kind": "adversarial", + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28307777849", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307777849", - "createdAt": "2026-06-28T01:38:24.335990+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 1746.3040351867676, - "p90": 1957.1199417114258, - "p95": 2725.9199619293213, - "p99": 3350.719928741455 - }, - "combine": { - "p50": 1746.3040351867676, - "p90": 1957.1199417114258, - "p95": 2725.9199619293213, - "p99": 3350.719928741455 - }, - "roundtrip": { - "p50": 1746.3040351867676, - "p90": 1957.1199417114258, - "p95": 2725.9199619293213, - "p99": 3350.719928741455 - }, - "isolatedSum": { - "p50": 3492.608070373535, - "p90": 3914.2398834228516, - "p95": 5451.839923858643, - "p99": 6701.43985748291 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 157696, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 1747.3280429840088, - "p90": 2064.6719932556152, - "p95": 2780.8001041412354, - "p99": 3415.9998893737793 - }, - "combine": { - "p50": 1747.3280429840088, - "p90": 2064.6719932556152, - "p95": 2780.8001041412354, - "p99": 3415.9998893737793 - }, - "roundtrip": { - "p50": 1747.3280429840088, - "p90": 2064.6719932556152, - "p95": 2780.8001041412354, - "p99": 3415.9998893737793 - }, - "isolatedSum": { - "p50": 3494.6560859680176, - "p90": 4129.3439865112305, - "p95": 5561.600208282471, - "p99": 6831.999778747559 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 308224, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 1755.903959274292, - "p90": 1934.3680143356323, - "p95": 2666.5918827056885, - "p99": 3387.00795173645 + "p50": 125.47199428081512, + "p90": 134.20799374580383, + "p95": 138.91200721263885, + "p99": 150.4960060119629 }, "combine": { - "p50": 1755.903959274292, - "p90": 1934.3680143356323, - "p95": 2666.5918827056885, - "p99": 3387.00795173645 + "p50": 134.14399325847626, + "p90": 143.0719941854477, + "p95": 144.896000623703, + "p99": 148.95999431610107 }, "roundtrip": { - "p50": 1755.903959274292, - "p90": 1934.3680143356323, - "p95": 2666.5918827056885, - "p99": 3387.00795173645 + "p50": 234.8479926586151, + "p90": 242.08000302314758, + "p95": 244.7039932012558, + "p99": 250.65600872039795 }, "isolatedSum": { - "p50": 3511.807918548584, - "p90": 3868.7360286712646, - "p95": 5333.183765411377, - "p99": 6774.0159034729 + "p50": 259.6159875392914, + "p90": 277.2799879312515, + "p95": 283.80800783634186, + "p99": 299.45600032806396 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 620032, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 1760.9599828720093, - "p90": 2005.1519870758057, - "p95": 2768.415927886963, - "p99": 3292.3200130462646 + "p50": 161.02400422096252, + "p90": 184.38400328159332, + "p95": 192.47999787330627, + "p99": 212.47999370098114 }, "combine": { - "p50": 1760.9599828720093, - "p90": 2005.1519870758057, - "p95": 2768.415927886963, - "p99": 3292.3200130462646 + "p50": 184.1599941253662, + "p90": 206.01600408554077, + "p95": 230.78399896621704, + "p99": 240.9600019454956 }, "roundtrip": { - "p50": 1760.9599828720093, - "p90": 2005.1519870758057, - "p95": 2768.415927886963, - "p99": 3292.3200130462646 + "p50": 318.36798787117004, + "p90": 339.3920063972473, + "p95": 351.00799798965454, + "p99": 374.65599179267883 }, "isolatedSum": { - "p50": 3521.9199657440186, - "p90": 4010.3039741516113, - "p95": 5536.831855773926, - "p99": 6584.640026092529 + "p50": 345.18399834632874, + "p90": 390.4000073671341, + "p95": 423.2639968395233, + "p99": 453.43999564647675 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243648, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 1761.0559463500977, - "p90": 2489.1200065612793, - "p95": 2836.575984954834, - "p99": 4053.1201362609863 + "p50": 231.77599906921387, + "p90": 241.98399484157562, + "p95": 250.40000677108765, + "p99": 270.01601457595825 }, "combine": { - "p50": 1761.0559463500977, - "p90": 2489.1200065612793, - "p95": 2836.575984954834, - "p99": 4053.1201362609863 + "p50": 345.95200419425964, + "p90": 367.16800928115845, + "p95": 391.4879858493805, + "p99": 408.86399149894714 }, "roundtrip": { - "p50": 1761.0559463500977, - "p90": 2489.1200065612793, - "p95": 2836.575984954834, - "p99": 4053.1201362609863 + "p50": 544.6079969406128, + "p90": 556.6400289535522, + "p95": 564.0959739685059, + "p99": 585.6320261955261 }, "isolatedSum": { - "p50": 3522.1118927001953, - "p90": 4978.240013122559, - "p95": 5673.151969909668, - "p99": 8106.240272521973 + "p50": 577.7280032634735, + "p90": 609.1520041227341, + "p95": 641.8879926204681, + "p99": 678.8800060749054 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 1755.552053451538, - "p90": 1923.6479997634888, - "p95": 2723.328113555908, - "p99": 3401.18408203125 + "p50": 371.71199917793274, + "p90": 382.207989692688, + "p95": 386.3680064678192, + "p99": 416.3520038127899 }, "combine": { - "p50": 1755.552053451538, - "p90": 1923.6479997634888, - "p95": 2723.328113555908, - "p99": 3401.18408203125 + "p50": 624.5759725570679, + "p90": 629.6319961547852, + "p95": 633.9840292930603, + "p99": 647.9039788246155 }, "roundtrip": { - "p50": 1755.552053451538, - "p90": 1923.6479997634888, - "p95": 2723.328113555908, - "p99": 3401.18408203125 + "p50": 955.1039934158325, + "p90": 964.8640155792236, + "p95": 969.8240160942078, + "p99": 981.2800288200378 }, "isolatedSum": { - "p50": 3511.104106903076, - "p90": 3847.2959995269775, - "p95": 5446.656227111816, - "p99": 6802.3681640625 + "p50": 996.2879717350006, + "p90": 1011.8399858474731, + "p95": 1020.3520357608795, + "p99": 1064.2559826374054 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4931584, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 1759.071946144104, - "p90": 2116.3198947906494, - "p95": 2758.9120864868164, - "p99": 3519.9038982391357 + "p50": 686.240017414093, + "p90": 698.6240148544312, + "p95": 704.479992389679, + "p99": 723.6800193786621 }, "combine": { - "p50": 1759.071946144104, - "p90": 2116.3198947906494, - "p95": 2758.9120864868164, - "p99": 3519.9038982391357 + "p50": 1130.6560039520264, + "p90": 1135.807991027832, + "p95": 1141.5679454803467, + "p99": 1145.9519863128662 }, "roundtrip": { - "p50": 1759.071946144104, - "p90": 2116.3198947906494, - "p95": 2758.9120864868164, - "p99": 3519.9038982391357 + "p50": 1787.4239683151245, + "p90": 1797.2160577774048, + "p95": 1799.839973449707, + "p99": 1810.304045677185 }, "isolatedSum": { - "p50": 3518.143892288208, - "p90": 4232.639789581299, - "p95": 5517.824172973633, - "p99": 7039.8077964782715 + "p50": 1816.8960213661194, + "p90": 1834.4320058822632, + "p95": 1846.0479378700256, + "p99": 1869.6320056915283 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 1765.4720544815063, - "p90": 2013.4079456329346, - "p95": 2776.8959999084473, - "p99": 3300.992012023926 + "p50": 1334.8480463027954, + "p90": 1348.031997680664, + "p95": 1351.8719673156738, + "p99": 1359.231948852539 }, "combine": { - "p50": 1765.4720544815063, - "p90": 2013.4079456329346, - "p95": 2776.8959999084473, - "p99": 3300.992012023926 + "p50": 2164.1600131988525, + "p90": 2191.135883331299, + "p95": 2203.552007675171, + "p99": 2225.4080772399902 }, "roundtrip": { - "p50": 1765.4720544815063, - "p90": 2013.4079456329346, - "p95": 2776.8959999084473, - "p99": 3300.992012023926 + "p50": 3478.6880016326904, + "p90": 3500.6399154663086, + "p95": 3511.0080242156982, + "p99": 3533.087968826294 }, "isolatedSum": { - "p50": 3530.9441089630127, - "p90": 4026.815891265869, - "p95": 5553.7919998168945, - "p99": 6601.984024047852 + "p50": 3499.008059501648, + "p90": 3539.167881011963, + "p95": 3555.4239749908447, + "p99": 3584.6400260925293 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33536,291 +34122,364 @@ ] }, { - "id": "cx-207d8ef2", - "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_5ec8473f", - "comparisonKey": "01804e6d9a96754e", + "id": "cx-3904ac00", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||e47f9de18e6cabe", + "colorKey": "gb300_b3a88763", + "comparisonKey": "92dc80df4affb401", "schemaVersion": 3, - "generatedAt": "2026-06-27T17:26:59.581224+00:00", + "generatedAt": "2026-06-29T13:45:08.590637+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_03", - "sku": "b300", - "backend": "flashinfer", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · bf16", + "label": "GB300 EP8 · deepep · bf16 · zipf", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf", + "routingLabel": "zipf", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "e47f9de18e6cabe", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28296434249", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296434249", - "createdAt": "2026-06-27T17:26:59.581224+00:00", - "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 69.31199878454208, - "p90": 70.91200351715088, - "p95": 71.58400118350983, - "p99": 75.42400062084198 + "p50": 127.77599692344666, + "p90": 135.5839967727661, + "p95": 139.67999815940857, + "p99": 148.67199957370758 }, "combine": { - "p50": 69.31199878454208, - "p90": 70.91200351715088, - "p95": 71.58400118350983, - "p99": 75.42400062084198 + "p50": 134.8479986190796, + "p90": 144.31999623775482, + "p95": 145.91999351978302, + "p99": 156.09599649906158 }, "roundtrip": { - "p50": 69.31199878454208, - "p90": 70.91200351715088, - "p95": 71.58400118350983, - "p99": 75.42400062084198 + "p50": 237.18400299549103, + "p90": 246.7840015888214, + "p95": 249.28000569343567, + "p99": 256.415992975235 }, "isolatedSum": { - "p50": 138.62399756908417, - "p90": 141.82400703430176, - "p95": 143.16800236701965, - "p99": 150.84800124168396 + "p50": 262.62399554252625, + "p90": 279.90399301052094, + "p95": 285.5999916791916, + "p99": 304.76799607276917 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 256, - "globalTokens": 2048, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 96.22400254011154, - "p90": 98.2080027461052, - "p95": 99.10400211811066, - "p99": 110.75200140476227 + "p50": 232.67200589179993, + "p90": 241.37599766254425, + "p95": 246.0159957408905, + "p99": 254.04798984527588 }, "combine": { - "p50": 96.22400254011154, - "p90": 98.2080027461052, - "p95": 99.10400211811066, - "p99": 110.75200140476227 + "p50": 347.7120101451874, + "p90": 356.25600814819336, + "p95": 358.3360016345978, + "p99": 368.1280016899109 }, "roundtrip": { - "p50": 96.22400254011154, - "p90": 98.2080027461052, - "p95": 99.10400211811066, - "p99": 110.75200140476227 + "p50": 542.1119928359985, + "p90": 551.9359707832336, + "p95": 553.9199709892273, + "p99": 563.3599758148193 }, "isolatedSum": { - "p50": 192.44800508022308, - "p90": 196.4160054922104, - "p95": 198.2080042362213, - "p99": 221.50400280952454 + "p50": 580.3840160369873, + "p90": 597.6320058107376, + "p95": 604.3519973754883, + "p99": 622.1759915351868 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 512, - "globalTokens": 4096, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 153.08800339698792, - "p90": 155.42399883270264, - "p95": 156.47999942302704, - "p99": 161.79199516773224 + "p50": 694.5279836654663, + "p90": 704.7680020332336, + "p95": 707.8080177307129, + "p99": 722.3680019378662 }, "combine": { - "p50": 153.08800339698792, - "p90": 155.42399883270264, - "p95": 156.47999942302704, - "p99": 161.79199516773224 + "p50": 1134.6240043640137, + "p90": 1143.1039571762085, + "p95": 1144.5759534835815, + "p99": 1158.944010734558 }, "roundtrip": { - "p50": 153.08800339698792, - "p90": 155.42399883270264, - "p95": 156.47999942302704, - "p99": 161.79199516773224 + "p50": 1801.3440370559692, + "p90": 1811.6159439086914, + "p95": 1815.4560327529907, + "p99": 1830.399990081787 }, "isolatedSum": { - "p50": 306.17600679397583, - "p90": 310.8479976654053, - "p95": 312.9599988460541, - "p99": 323.5839903354645 + "p50": 1829.15198802948, + "p90": 1847.8719592094421, + "p95": 1852.3839712142944, + "p99": 1881.3120126724243 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 - }, + } + ] + }, + { + "id": "cx-6da8b67d", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||9014f8b812bd39e", + "colorKey": "gb300_961589b9", + "comparisonKey": "796f3c416772b90a", + "schemaVersion": 3, + "generatedAt": "2026-06-29T13:47:00.191579+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "9014f8b812bd39e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + }, + "rows": [ { - "tokensPerRank": 1024, - "globalTokens": 8192, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 270.6240117549896, - "p90": 273.6319899559021, - "p95": 275.07200837135315, - "p99": 281.3119888305664 + "p50": 124.15999919176102, + "p90": 132.6719969511032, + "p95": 136.89599931240082, + "p99": 145.7280069589615 }, "combine": { - "p50": 270.6240117549896, - "p90": 273.6319899559021, - "p95": 275.07200837135315, - "p99": 281.3119888305664 + "p50": 130.14400005340576, + "p90": 134.97599959373474, + "p95": 139.23199474811554, + "p99": 145.47200500965118 }, "roundtrip": { - "p50": 270.6240117549896, - "p90": 273.6319899559021, - "p95": 275.07200837135315, - "p99": 281.3119888305664 + "p50": 227.00800001621246, + "p90": 238.36800456047058, + "p95": 241.82400107383728, + "p99": 252.00000405311584 }, "isolatedSum": { - "p50": 541.2480235099792, - "p90": 547.2639799118042, - "p95": 550.1440167427063, - "p99": 562.6239776611328 + "p50": 254.30399924516678, + "p90": 267.64799654483795, + "p95": 276.12799406051636, + "p99": 291.20001196861267 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2048, - "globalTokens": 16384, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 498.6560046672821, - "p90": 501.6000270843506, - "p95": 502.6879906654358, - "p99": 510.3679895401001 + "p50": 212.19199895858765, + "p90": 220.47999501228333, + "p95": 222.88000583648682, + "p99": 228.12800109386444 }, "combine": { - "p50": 498.6560046672821, - "p90": 501.6000270843506, - "p95": 502.6879906654358, - "p99": 510.3679895401001 + "p50": 314.62401151657104, + "p90": 324.6400058269501, + "p95": 327.7760148048401, + "p99": 331.6799998283386 }, "roundtrip": { - "p50": 498.6560046672821, - "p90": 501.6000270843506, - "p95": 502.6879906654358, - "p99": 510.3679895401001 + "p50": 499.55201148986816, + "p90": 511.23201847076416, + "p95": 515.1039958000183, + "p99": 522.4000215530396 }, "isolatedSum": { - "p50": 997.3120093345642, - "p90": 1003.2000541687012, - "p95": 1005.3759813308716, - "p99": 1020.7359790802002 + "p50": 526.8160104751587, + "p90": 545.1200008392334, + "p95": 550.6560206413269, + "p99": 559.8080009222031 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4096, - "globalTokens": 32768, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 943.9679980278015, - "p90": 947.4560022354126, - "p95": 949.0879774093628, - "p99": 961.6640210151672 + "p50": 614.1120195388794, + "p90": 625.1199841499329, + "p95": 628.5439729690552, + "p99": 638.3360028266907 }, "combine": { - "p50": 943.9679980278015, - "p90": 947.4560022354126, - "p95": 949.0879774093628, - "p99": 961.6640210151672 + "p50": 1166.1440134048462, + "p90": 1170.9760427474976, + "p95": 1172.9919910430908, + "p99": 1179.5519590377808 }, "roundtrip": { - "p50": 943.9679980278015, - "p90": 947.4560022354126, - "p95": 949.0879774093628, - "p99": 961.6640210151672 + "p50": 1718.7199592590332, + "p90": 1729.0879487991333, + "p95": 1731.5839529037476, + "p99": 1738.976001739502 }, "isolatedSum": { - "p50": 1887.935996055603, - "p90": 1894.9120044708252, - "p95": 1898.1759548187256, - "p99": 1923.3280420303345 + "p50": 1780.2560329437256, + "p90": 1796.0960268974304, + "p95": 1801.535964012146, + "p99": 1817.8879618644714 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -33829,107 +34488,108 @@ ] }, { - "id": "cx-ae942e6d", - "identity": "b300|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_a52edb56", - "comparisonKey": "e30791951192637e", + "id": "cx-37cf5d77", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb300_db9a43b5", + "comparisonKey": "8cb163d8db9bc0c6", "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:12.335801+00:00", + "generatedAt": "2026-06-29T13:54:01.125432+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_15", - "sku": "b300", - "backend": "flashinfer", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · fp8", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28307775342", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307775342", - "createdAt": "2026-06-28T01:38:12.335801+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 63.93600255250931, - "p90": 65.76000154018402, - "p95": 66.6240006685257, - "p99": 72.73600250482559 + "p50": 128.09599936008453, + "p90": 137.95199990272522, + "p95": 142.14399456977844, + "p99": 166.46400094032288 }, "combine": { - "p50": 63.93600255250931, - "p90": 65.76000154018402, - "p95": 66.6240006685257, - "p99": 72.73600250482559 + "p50": 125.95200538635254, + "p90": 132.38400220870972, + "p95": 134.94400680065155, + "p99": 147.07200229167938 }, "roundtrip": { - "p50": 63.93600255250931, - "p90": 65.76000154018402, - "p95": 66.6240006685257, - "p99": 72.73600250482559 + "p50": 224.8000055551529, + "p90": 233.7920069694519, + "p95": 238.24000358581543, + "p99": 269.9199914932251 }, "isolatedSum": { - "p50": 127.87200510501862, - "p90": 131.52000308036804, - "p95": 133.2480013370514, - "p99": 145.47200500965118 + "p50": 254.04800474643707, + "p90": 270.33600211143494, + "p95": 277.08800137043, + "p99": 313.53600323200226 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33938,35 +34598,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 84.03199911117554, - "p90": 86.11200004816055, - "p95": 86.56000345945358, - "p99": 92.83199906349182 + "p50": 166.20799899101257, + "p90": 173.43999445438385, + "p95": 176.96000635623932, + "p99": 193.50400567054749 }, "combine": { - "p50": 84.03199911117554, - "p90": 86.11200004816055, - "p95": 86.56000345945358, - "p99": 92.83199906349182 + "p50": 169.0240055322647, + "p90": 175.77600479125977, + "p95": 179.58399653434753, + "p99": 203.8400024175644 }, "roundtrip": { - "p50": 84.03199911117554, - "p90": 86.11200004816055, - "p95": 86.56000345945358, - "p99": 92.83199906349182 + "p50": 304.80000376701355, + "p90": 313.82399797439575, + "p95": 317.8560137748718, + "p99": 350.71998834609985 }, "isolatedSum": { - "p50": 168.06399822235107, - "p90": 172.2240000963211, - "p95": 173.12000691890717, - "p99": 185.66399812698364 + "p50": 335.2320045232773, + "p90": 349.2159992456436, + "p95": 356.54400289058685, + "p99": 397.3440080881119 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -33975,35 +34635,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 130.87999820709229, - "p90": 133.56800377368927, - "p95": 135.26399433612823, - "p99": 155.5200070142746 + "p50": 233.98399353027344, + "p90": 243.55199933052063, + "p95": 246.17600440979004, + "p99": 277.0560085773468 }, "combine": { - "p50": 130.87999820709229, - "p90": 133.56800377368927, - "p95": 135.26399433612823, - "p99": 155.5200070142746 + "p50": 293.5679852962494, + "p90": 300.86401104927063, + "p95": 304.4160008430481, + "p99": 320.73599100112915 }, "roundtrip": { - "p50": 130.87999820709229, - "p90": 133.56800377368927, - "p95": 135.26399433612823, - "p99": 155.5200070142746 + "p50": 469.5039987564087, + "p90": 478.2400131225586, + "p95": 483.99999737739563, + "p99": 497.72799015045166 }, "isolatedSum": { - "p50": 261.75999641418457, - "p90": 267.13600754737854, - "p95": 270.52798867225647, - "p99": 311.0400140285492 + "p50": 527.5519788265228, + "p90": 544.4160103797913, + "p95": 550.5920052528381, + "p99": 597.791999578476 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 5, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34012,35 +34672,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 229.40799593925476, - "p90": 233.11999440193176, - "p95": 234.27200317382812, - "p99": 247.48800694942474 + "p50": 364.25599455833435, + "p90": 375.328004360199, + "p95": 380.95998764038086, + "p99": 411.23199462890625 }, "combine": { - "p50": 229.40799593925476, - "p90": 233.11999440193176, - "p95": 234.27200317382812, - "p99": 247.48800694942474 + "p50": 496.6079890727997, + "p90": 505.2480101585388, + "p95": 509.0879797935486, + "p99": 533.6959958076477 }, "roundtrip": { - "p50": 229.40799593925476, - "p90": 233.11999440193176, - "p95": 234.27200317382812, - "p99": 247.48800694942474 + "p50": 832.0320248603821, + "p90": 843.2000279426575, + "p95": 850.9439826011658, + "p99": 886.4319920539856 }, "isolatedSum": { - "p50": 458.8159918785095, - "p90": 466.2399888038635, - "p95": 468.54400634765625, - "p99": 494.9760138988495 + "p50": 860.863983631134, + "p90": 880.5760145187378, + "p95": 890.0479674339294, + "p99": 944.927990436554 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34049,35 +34709,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 404.992014169693, - "p90": 407.9360067844391, - "p95": 409.63199734687805, - "p99": 412.6720130443573 + "p50": 623.2640147209167, + "p90": 633.3119869232178, + "p95": 638.3039951324463, + "p99": 663.1680130958557 }, "combine": { - "p50": 404.992014169693, - "p90": 407.9360067844391, - "p95": 409.63199734687805, - "p99": 412.6720130443573 + "p50": 895.5519795417786, + "p90": 901.7599821090698, + "p95": 906.2079787254333, + "p99": 914.5280122756958 }, "roundtrip": { - "p50": 404.992014169693, - "p90": 407.9360067844391, - "p95": 409.63199734687805, - "p99": 412.6720130443573 + "p50": 1488.2240295410156, + "p90": 1498.304009437561, + "p95": 1500.991940498352, + "p99": 1510.7200145721436 }, "isolatedSum": { - "p50": 809.984028339386, - "p90": 815.8720135688782, - "p95": 819.2639946937561, - "p99": 825.3440260887146 + "p50": 1518.8159942626953, + "p90": 1535.0719690322876, + "p95": 1544.5119738578796, + "p99": 1577.6960253715515 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 7, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34086,34 +34746,34 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 756.1280131340027, - "p90": 759.6480250358582, - "p95": 760.703980922699, - "p99": 774.944007396698 + "p50": 1152.1919965744019, + "p90": 1162.6240015029907, + "p95": 1166.7200326919556, + "p99": 1189.95201587677 }, "combine": { - "p50": 756.1280131340027, - "p90": 759.6480250358582, - "p95": 760.703980922699, - "p99": 774.944007396698 + "p50": 1676.7679452896118, + "p90": 1686.2080097198486, + "p95": 1689.2160177230835, + "p99": 1704.7679424285889 }, "roundtrip": { - "p50": 756.1280131340027, - "p90": 759.6480250358582, - "p95": 760.703980922699, - "p99": 774.944007396698 + "p50": 2801.2800216674805, + "p90": 2810.976028442383, + "p95": 2813.568115234375, + "p99": 2820.9919929504395 }, "isolatedSum": { - "p50": 1512.2560262680054, - "p90": 1519.2960500717163, - "p95": 1521.407961845398, - "p99": 1549.888014793396 + "p50": 2828.9599418640137, + "p90": 2848.8320112228394, + "p95": 2855.936050415039, + "p99": 2894.719958305359 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, "stragglerRank": 4, "correct": true, "samplesPooled": 600, @@ -34122,106 +34782,107 @@ ] }, { - "id": "cx-dede56e2", - "identity": "b300|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_6af1abcd", - "comparisonKey": "26534c8239f2bdd1", + "id": "cx-2c8de23f", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb300_15a35db4", + "comparisonKey": "ce656c1689809360", "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:47.923344+00:00", + "generatedAt": "2026-06-29T13:45:06.518011+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_14", - "sku": "b300", - "backend": "flashinfer", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · mxfp8", + "label": "GB300 EP8 · deepep · bf16 · zipf-mild", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-mild", + "routingLabel": "zipf-mild", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "mxfp8", + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28307776684", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307776684", - "createdAt": "2026-06-28T01:38:47.923344+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 64.7680014371872, - "p90": 66.39999896287918, - "p95": 67.32799857854843, - "p99": 76.4160007238388 + "p50": 126.91199779510498, + "p90": 136.63999736309052, + "p95": 144.96000111103058, + "p99": 181.7599982023239 }, "combine": { - "p50": 64.7680014371872, - "p90": 66.39999896287918, - "p95": 67.32799857854843, - "p99": 76.4160007238388 + "p50": 133.69600474834442, + "p90": 151.0400027036667, + "p95": 164.57599401474, + "p99": 193.50400567054749 }, "roundtrip": { - "p50": 64.7680014371872, - "p90": 66.39999896287918, - "p95": 67.32799857854843, - "p99": 76.4160007238388 + "p50": 237.34399676322937, + "p90": 254.94399666786194, + "p95": 272.6080119609833, + "p99": 294.49599981307983 }, "isolatedSum": { - "p50": 129.5360028743744, - "p90": 132.79999792575836, - "p95": 134.65599715709686, - "p99": 152.8320014476776 + "p50": 260.6080025434494, + "p90": 287.6800000667572, + "p95": 309.53599512577057, + "p99": 375.2640038728714 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -34231,35 +34892,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 84.28800106048584, - "p90": 85.40800213813782, - "p95": 86.07999980449677, - "p99": 86.91199868917465 + "p50": 165.24800658226013, + "p90": 173.88799786567688, + "p95": 176.67199671268463, + "p99": 182.5920045375824 }, "combine": { - "p50": 84.28800106048584, - "p90": 85.40800213813782, - "p95": 86.07999980449677, - "p99": 86.91199868917465 + "p50": 181.85600638389587, + "p90": 190.17599523067474, + "p95": 192.22399592399597, + "p99": 194.5279985666275 }, "roundtrip": { - "p50": 84.28800106048584, - "p90": 85.40800213813782, - "p95": 86.07999980449677, - "p99": 86.91199868917465 + "p50": 323.743999004364, + "p90": 331.64799213409424, + "p95": 335.2000117301941, + "p99": 340.60800075531006 }, "isolatedSum": { - "p50": 168.57600212097168, - "p90": 170.81600427627563, - "p95": 172.15999960899353, - "p99": 173.8239973783493 + "p50": 347.104012966156, + "p90": 364.0639930963516, + "p95": 368.8959926366806, + "p99": 377.1200031042099 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34268,34 +34929,34 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 131.3920021057129, - "p90": 133.91999900341034, - "p95": 134.91199910640717, - "p99": 136.63999736309052 + "p50": 240.12799561023712, + "p90": 247.99999594688416, + "p95": 251.74400210380554, + "p99": 255.74401021003723 }, "combine": { - "p50": 131.3920021057129, - "p90": 133.91999900341034, - "p95": 134.91199910640717, - "p99": 136.63999736309052 + "p50": 351.99999809265137, + "p90": 356.3520014286041, + "p95": 361.7280125617981, + "p99": 372.8320002555847 }, "roundtrip": { - "p50": 131.3920021057129, - "p90": 133.91999900341034, - "p95": 134.91199910640717, - "p99": 136.63999736309052 + "p50": 553.1200170516968, + "p90": 562.8160238265991, + "p95": 566.2720203399658, + "p99": 577.6000022888184 }, "isolatedSum": { - "p50": 262.7840042114258, - "p90": 267.8399980068207, - "p95": 269.82399821281433, - "p99": 273.27999472618103 + "p50": 592.1279937028885, + "p90": 604.3519973754883, + "p95": 613.4720146656036, + "p99": 628.576010465622 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -34305,35 +34966,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 229.72799837589264, - "p90": 232.7679991722107, - "p95": 233.95200073719025, - "p99": 236.32000386714935 + "p50": 388.89598846435547, + "p90": 396.92801237106323, + "p95": 399.77601170539856, + "p99": 405.7919979095459 }, "combine": { - "p50": 229.72799837589264, - "p90": 232.7679991722107, - "p95": 233.95200073719025, - "p99": 236.32000386714935 + "p50": 608.0960035324097, + "p90": 613.2479906082153, + "p95": 615.0720119476318, + "p99": 623.7760186195374 }, "roundtrip": { - "p50": 229.72799837589264, - "p90": 232.7679991722107, - "p95": 233.95200073719025, - "p99": 236.32000386714935 + "p50": 973.4399914741516, + "p90": 981.7600250244141, + "p95": 984.9920272827148, + "p99": 990.9759759902954 }, "isolatedSum": { - "p50": 459.4559967517853, - "p90": 465.5359983444214, - "p95": 467.9040014743805, - "p99": 472.6400077342987 + "p50": 996.9919919967651, + "p90": 1010.1760029792786, + "p95": 1014.8480236530304, + "p99": 1029.5680165290833 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34342,35 +35003,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 404.7999978065491, - "p90": 407.1680009365082, - "p95": 407.99999237060547, - "p99": 411.3920032978058 + "p50": 692.2879815101624, + "p90": 703.0079960823059, + "p95": 705.7279944419861, + "p99": 715.008020401001 }, "combine": { - "p50": 404.7999978065491, - "p90": 407.1680009365082, - "p95": 407.99999237060547, - "p99": 411.3920032978058 + "p50": 1107.7120304107666, + "p90": 1117.8560256958008, + "p95": 1119.488000869751, + "p99": 1128.8319826126099 }, "roundtrip": { - "p50": 404.7999978065491, - "p90": 407.1680009365082, - "p95": 407.99999237060547, - "p99": 411.3920032978058 + "p50": 1777.1519422531128, + "p90": 1790.6240224838257, + "p95": 1798.1120347976685, + "p99": 1832.1280479431152 }, "isolatedSum": { - "p50": 809.5999956130981, - "p90": 814.3360018730164, - "p95": 815.9999847412109, - "p99": 822.7840065956116 + "p50": 1800.000011920929, + "p90": 1820.8640217781067, + "p95": 1825.215995311737, + "p99": 1843.8400030136108 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34379,35 +35040,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 755.6480169296265, - "p90": 758.9439749717712, - "p95": 759.7439885139465, - "p99": 764.1919851303101 + "p50": 1350.208044052124, + "p90": 1361.791968345642, + "p95": 1364.7040128707886, + "p99": 1372.7359771728516 }, "combine": { - "p50": 755.6480169296265, - "p90": 758.9439749717712, - "p95": 759.7439885139465, - "p99": 764.1919851303101 + "p50": 2127.1679401397705, + "p90": 2136.8319988250732, + "p95": 2138.5281085968018, + "p99": 2141.2479877471924 }, "roundtrip": { - "p50": 755.6480169296265, - "p90": 758.9439749717712, - "p95": 759.7439885139465, - "p99": 764.1919851303101 + "p50": 3460.7040882110596, + "p90": 3473.088026046753, + "p95": 3476.736068725586, + "p99": 3485.503911972046 }, "isolatedSum": { - "p50": 1511.296033859253, - "p90": 1517.8879499435425, - "p95": 1519.487977027893, - "p99": 1528.3839702606201 + "p50": 3477.3759841918945, + "p90": 3498.6239671707153, + "p95": 3503.2321214675903, + "p99": 3513.983964920044 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34415,107 +35076,108 @@ ] }, { - "id": "cx-85dec801", - "identity": "b300|flashinfer|7168|8|256|nvfp4|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_f0247ae6", - "comparisonKey": "eb4126aa6cf3bfca", + "id": "cx-6461e658", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb300_46b172da", + "comparisonKey": "398178595fe92367", "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:25.905345+00:00", + "generatedAt": "2026-06-29T13:46:12.216671+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_04", - "sku": "b300", - "backend": "flashinfer", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · flashinfer · nvfp4", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "nvfp4", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 148, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, "paretoEligible": false }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28307777849", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307777849", - "createdAt": "2026-06-28T01:38:25.905345+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 62.68800050020218, - "p90": 64.64000046253204, - "p95": 65.66400080919266, - "p99": 75.99999755620956 + "p50": 123.45600128173828, + "p90": 137.05599308013916, + "p95": 143.0719941854477, + "p99": 186.27199530601501 }, "combine": { - "p50": 62.68800050020218, - "p90": 64.64000046253204, - "p95": 65.66400080919266, - "p99": 75.99999755620956 + "p50": 128.00000607967377, + "p90": 150.36800503730774, + "p95": 155.39200603961945, + "p99": 177.72799730300903 }, "roundtrip": { - "p50": 62.68800050020218, - "p90": 64.64000046253204, - "p95": 65.66400080919266, - "p99": 75.99999755620956 + "p50": 222.01600670814514, + "p90": 235.58400571346283, + "p95": 246.46399915218353, + "p99": 272.0640003681183 }, "isolatedSum": { - "p50": 125.37600100040436, - "p90": 129.2800009250641, - "p95": 131.32800161838531, - "p99": 151.99999511241913 + "p50": 251.45600736141205, + "p90": 287.4239981174469, + "p95": 298.46400022506714, + "p99": 363.99999260902405 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34524,34 +35186,34 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 78.36800068616867, - "p90": 80.25600016117096, - "p95": 81.05599880218506, - "p99": 84.95999872684479 + "p50": 160.99199652671814, + "p90": 170.33599317073822, + "p95": 173.15199971199036, + "p99": 181.85600638389587 }, "combine": { - "p50": 78.36800068616867, - "p90": 80.25600016117096, - "p95": 81.05599880218506, - "p99": 84.95999872684479 + "p50": 168.03200542926788, + "p90": 171.87200486660004, + "p95": 177.44000256061554, + "p99": 185.98400056362152 }, "roundtrip": { - "p50": 78.36800068616867, - "p90": 80.25600016117096, - "p95": 81.05599880218506, - "p99": 84.95999872684479 + "p50": 299.3920147418976, + "p90": 308.19201469421387, + "p95": 311.0400140285492, + "p99": 319.2319869995117 }, "isolatedSum": { - "p50": 156.73600137233734, - "p90": 160.51200032234192, - "p95": 162.11199760437012, - "p99": 169.91999745368958 + "p50": 329.024001955986, + "p90": 342.20799803733826, + "p95": 350.5920022726059, + "p99": 367.8400069475174 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38972416, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -34561,35 +35223,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 118.04799735546112, - "p90": 120.60800194740295, - "p95": 121.34400010108948, - "p99": 124.79999661445618 + "p50": 229.66399788856506, + "p90": 238.39999735355377, + "p95": 240.89600145816803, + "p99": 251.26400589942932 }, "combine": { - "p50": 118.04799735546112, - "p90": 120.60800194740295, - "p95": 121.34400010108948, - "p99": 124.79999661445618 + "p50": 286.52799129486084, + "p90": 293.11999678611755, + "p95": 295.00800371170044, + "p99": 302.4640083312988 }, "roundtrip": { - "p50": 118.04799735546112, - "p90": 120.60800194740295, - "p95": 121.34400010108948, - "p99": 124.79999661445618 + "p50": 466.97598695755005, + "p90": 476.73600912094116, + "p95": 480.70400953292847, + "p99": 490.27198553085327 }, "isolatedSum": { - "p50": 236.09599471092224, - "p90": 241.2160038948059, - "p95": 242.68800020217896, - "p99": 249.59999322891235 + "p50": 516.1919891834259, + "p90": 531.5199941396713, + "p95": 535.9040051698685, + "p99": 553.7280142307281 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 78066688, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, - "stragglerRank": 4, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34598,34 +35260,34 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 204.83200252056122, - "p90": 208.48000049591064, - "p95": 209.1200053691864, - "p99": 221.76000475883484 + "p50": 357.5040102005005, + "p90": 365.664005279541, + "p95": 367.8080141544342, + "p99": 374.4960129261017 }, "combine": { - "p50": 204.83200252056122, - "p90": 208.48000049591064, - "p95": 209.1200053691864, - "p99": 221.76000475883484 + "p50": 489.02401328086853, + "p90": 494.30400133132935, + "p95": 498.9120066165924, + "p99": 502.20799446105957 }, "roundtrip": { - "p50": 204.83200252056122, - "p90": 208.48000049591064, - "p95": 209.1200053691864, - "p99": 221.76000475883484 + "p50": 816.7999982833862, + "p90": 826.304018497467, + "p95": 830.1759958267212, + "p99": 842.2719836235046 }, "isolatedSum": { - "p50": 409.66400504112244, - "p90": 416.9600009918213, - "p95": 418.2400107383728, - "p99": 443.5200095176697 + "p50": 846.528023481369, + "p90": 859.9680066108704, + "p95": 866.7200207710266, + "p99": 876.7040073871613 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155860992, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, "stragglerRank": 7, "correct": true, "samplesPooled": 600, @@ -34635,35 +35297,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 359.51998829841614, - "p90": 362.5600039958954, - "p95": 363.77599835395813, - "p99": 371.16798758506775 + "p50": 613.53600025177, + "p90": 623.3599781990051, + "p95": 627.232015132904, + "p99": 638.1760239601135 }, "combine": { - "p50": 359.51998829841614, - "p90": 362.5600039958954, - "p95": 363.77599835395813, - "p99": 371.16798758506775 + "p50": 868.9280152320862, + "p90": 882.6239705085754, + "p95": 892.5759792327881, + "p99": 912.2560024261475 }, "roundtrip": { - "p50": 359.51998829841614, - "p90": 362.5600039958954, - "p95": 363.77599835395813, - "p99": 371.16798758506775 + "p50": 1452.1280527114868, + "p90": 1467.360019683838, + "p95": 1480.1280498504639, + "p99": 1500.8959770202637 }, "isolatedSum": { - "p50": 719.0399765968323, - "p90": 725.1200079917908, - "p95": 727.5519967079163, - "p99": 742.3359751701355 + "p50": 1482.4640154838562, + "p90": 1505.9839487075806, + "p95": 1519.8079943656921, + "p99": 1550.432026386261 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 310951424, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34672,35 +35334,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 662.0479822158813, - "p90": 664.9919748306274, - "p95": 665.9520268440247, - "p99": 692.6400065422058 + "p50": 1130.4320096969604, + "p90": 1137.7919912338257, + "p95": 1140.9920454025269, + "p99": 1147.1680402755737 }, "combine": { - "p50": 662.0479822158813, - "p90": 664.9919748306274, - "p95": 665.9520268440247, - "p99": 692.6400065422058 + "p50": 1613.0239963531494, + "p90": 1622.1120357513428, + "p95": 1623.9999532699585, + "p99": 1631.9040060043335 }, "roundtrip": { - "p50": 662.0479822158813, - "p90": 664.9919748306274, - "p95": 665.9520268440247, - "p99": 692.6400065422058 + "p50": 2719.4559574127197, + "p90": 2728.447914123535, + "p95": 2731.3599586486816, + "p99": 2740.0639057159424 }, "isolatedSum": { - "p50": 1324.0959644317627, - "p90": 1329.9839496612549, - "p95": 1331.9040536880493, - "p99": 1385.2800130844116 + "p50": 2743.45600605011, + "p90": 2759.9040269851685, + "p95": 2764.9919986724854, + "p99": 2779.072046279907 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 621752320, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, - "stragglerRank": 6, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -34708,47 +35370,48 @@ ] }, { - "id": "cx-2fdde1de", - "identity": "b300|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "b300_e952b5c0", - "comparisonKey": "97ed86fe35a5b2af", + "id": "cx-a4c44aca", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_6e04dda3", + "comparisonKey": "1dcefebf80b3425d", "schemaVersion": 3, - "generatedAt": "2026-06-27T17:36:18.907415+00:00", + "generatedAt": "2026-06-29T13:49:15.844464+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_16", - "sku": "b300", - "backend": "uccl", - "phase": "decode", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · uccl · bf16", + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, "topk": 8, "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -34756,318 +35419,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": null, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28296669967", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296669967", - "createdAt": "2026-06-27T17:36:18.907415+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 81.24800026416779, - "p90": 87.45600283145905, - "p95": 88.41600269079208, - "p99": 92.99200028181076 - }, - "combine": { - "p50": 74.23999905586243, - "p90": 76.48000121116638, - "p95": 77.11999863386154, - "p99": 85.66399663686752 - }, - "roundtrip": { - "p50": 140.19200205802917, - "p90": 144.31999623775482, - "p95": 145.37599682807922, - "p99": 151.93599462509155 - }, - "isolatedSum": { - "p50": 155.4879993200302, - "p90": 163.93600404262543, - "p95": 165.53600132465363, - "p99": 178.65599691867828 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 80.92799782752991, - "p90": 85.37600189447403, - "p95": 86.2400010228157, - "p99": 89.75999802350998 - }, - "combine": { - "p50": 74.30399954319, - "p90": 76.25599950551987, - "p95": 77.18399912118912, - "p99": 84.44800227880478 - }, - "roundtrip": { - "p50": 141.6960060596466, - "p90": 145.50399780273438, - "p95": 146.7200070619583, - "p99": 162.27200627326965 - }, - "isolatedSum": { - "p50": 155.2319973707199, - "p90": 161.6320013999939, - "p95": 163.42400014400482, - "p99": 174.20800030231476 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 81.40800148248672, - "p90": 84.44800227880478, - "p95": 87.77599781751633, - "p99": 107.04000294208527 + "p50": 122.20799922943115, + "p90": 128.25599312782288, + "p95": 132.76800513267517, + "p99": 143.51999759674072 }, "combine": { - "p50": 75.6480023264885, - "p90": 84.73599702119827, - "p95": 85.21600067615509, - "p99": 96.38399630784988 + "p50": 132.57600367069244, + "p90": 140.22399485111237, + "p95": 142.4960047006607, + "p99": 144.896000623703 }, "roundtrip": { - "p50": 138.2720023393631, - "p90": 141.66399836540222, - "p95": 143.51999759674072, - "p99": 149.21599626541138 + "p50": 232.4800044298172, + "p90": 239.55200612545013, + "p95": 242.23999679088593, + "p99": 250.07998943328857 }, "isolatedSum": { - "p50": 157.05600380897522, - "p90": 169.18399930000305, - "p95": 172.99199849367142, - "p99": 203.42399924993515 + "p50": 254.7840029001236, + "p90": 268.47998797893524, + "p95": 275.2640098333359, + "p99": 288.4159982204437 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 81.40800148248672, - "p90": 83.93599838018417, - "p95": 85.28000116348267, - "p99": 93.28000247478485 + "p50": 155.4879993200302, + "p90": 179.83999848365784, + "p95": 191.03999435901642, + "p99": 217.8879976272583 }, "combine": { - "p50": 84.54400300979614, - "p90": 85.66399663686752, - "p95": 86.01599931716919, - "p99": 88.70399743318558 + "p50": 181.60000443458557, + "p90": 204.70400154590607, + "p95": 230.68800568580627, + "p99": 242.5280064344406 }, "roundtrip": { - "p50": 138.84800672531128, - "p90": 143.99999380111694, - "p95": 147.0080018043518, - "p99": 160.22400557994843 + "p50": 316.0000145435333, + "p90": 329.5679986476898, + "p95": 340.4160141944885, + "p99": 372.48000502586365 }, "isolatedSum": { - "p50": 165.95200449228287, - "p90": 169.5999950170517, - "p95": 171.29600048065186, - "p99": 181.98399990797043 + "p50": 337.0880037546158, + "p90": 384.5440000295639, + "p95": 421.7280000448227, + "p99": 460.4160040616989 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 82.84799754619598, - "p90": 84.99199897050858, - "p95": 87.10400015115738, - "p99": 97.37599641084671 + "p50": 227.00800001621246, + "p90": 234.8479926586151, + "p95": 237.69600689411163, + "p99": 246.97600305080414 }, "combine": { - "p50": 84.95999872684479, - "p90": 85.88799834251404, - "p95": 86.2400010228157, - "p99": 92.22400188446045 + "p50": 342.75200963020325, + "p90": 357.85600543022156, + "p95": 372.25601077079773, + "p99": 393.2799994945526 }, "roundtrip": { - "p50": 140.25600254535675, - "p90": 147.20000326633453, - "p95": 148.5760062932968, - "p99": 155.07200360298157 + "p50": 544.0959930419922, + "p90": 556.8959712982178, + "p95": 565.7600164413452, + "p99": 583.1040143966675 }, "isolatedSum": { - "p50": 167.80799627304077, - "p90": 170.8799973130226, - "p95": 173.34400117397308, - "p99": 189.59999829530716 + "p50": 569.7600096464157, + "p90": 592.7039980888367, + "p95": 609.9520176649094, + "p99": 640.2560025453568 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 99.84000027179718, - "p90": 102.11200267076492, - "p95": 103.58399897813797, - "p99": 106.84800148010254 + "p50": 367.0080006122589, + "p90": 380.92800974845886, + "p95": 395.52000164985657, + "p99": 414.0479862689972 }, "combine": { - "p50": 85.85599809885025, - "p90": 87.26400136947632, - "p95": 88.48000317811966, - "p99": 96.41599655151367 + "p50": 622.8160262107849, + "p90": 648.7039923667908, + "p95": 659.2000126838684, + "p99": 677.40797996521 }, "roundtrip": { - "p50": 150.59199929237366, - "p90": 153.56799960136414, - "p95": 155.008003115654, - "p99": 163.93600404262543 + "p50": 954.4960260391235, + "p90": 977.4399995803833, + "p95": 991.5199875831604, + "p99": 1010.0480318069458 }, "isolatedSum": { - "p50": 185.69599837064743, - "p90": 189.37600404024124, - "p95": 192.06400215625763, - "p99": 203.2639980316162 + "p50": 989.8240268230438, + "p90": 1029.6320021152496, + "p95": 1054.720014333725, + "p99": 1091.4559662342072 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 111.10399663448334, - "p90": 114.30399864912033, - "p95": 115.35999923944473, - "p99": 118.97599697113037 + "p50": 683.135986328125, + "p90": 693.2799816131592, + "p95": 696.7039704322815, + "p99": 702.5920152664185 }, "combine": { - "p50": 99.04000163078308, - "p90": 102.24000364542007, - "p95": 109.0560033917427, - "p99": 112.35199868679047 + "p50": 1128.607988357544, + "p90": 1132.0960521697998, + "p95": 1133.3119869232178, + "p99": 1142.016053199768 }, "roundtrip": { - "p50": 177.2480010986328, - "p90": 184.35199558734894, - "p95": 186.27199530601501, - "p99": 194.36800479888916 + "p50": 1789.5679473876953, + "p90": 1799.8080253601074, + "p95": 1802.2719621658325, + "p99": 1810.2400302886963 }, "isolatedSum": { - "p50": 210.14399826526642, - "p90": 216.5440022945404, - "p95": 224.41600263118744, - "p99": 231.32799565792084 + "p50": 1811.743974685669, + "p90": 1825.376033782959, + "p95": 1830.0159573554993, + "p99": 1844.6080684661865 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 124.51200187206268, - "p90": 127.3919939994812, - "p95": 128.54400277137756, - "p99": 138.7840062379837 + "p50": 1342.8479433059692, + "p90": 1354.207992553711, + "p95": 1358.1119775772095, + "p99": 1363.2639646530151 }, "combine": { - "p50": 122.43200093507767, - "p90": 123.32800030708313, - "p95": 124.54400211572647, - "p99": 133.69600474834442 + "p50": 2158.720016479492, + "p90": 2175.8079528808594, + "p95": 2198.591947555542, + "p99": 2221.280097961426 }, "roundtrip": { - "p50": 212.2880071401596, - "p90": 216.25599265098572, - "p95": 218.46400201320648, - "p99": 222.46399521827698 + "p50": 3481.8880558013916, + "p90": 3498.975992202759, + "p95": 3513.8559341430664, + "p99": 3535.583972930908 }, "isolatedSum": { - "p50": 246.94400280714035, - "p90": 250.71999430656433, - "p95": 253.08800488710403, - "p99": 272.4800109863281 + "p50": 3501.5679597854614, + "p90": 3530.0159454345703, + "p95": 3556.7039251327515, + "p99": 3584.544062614441 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35075,47 +35664,48 @@ ] }, { - "id": "cx-8d828593", - "identity": "b300|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "b300_e952b5c0", - "comparisonKey": "6e0e03618d466091", + "id": "cx-3622f171", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_04de5a5b", + "comparisonKey": "173fe7343d391895", "schemaVersion": 3, - "generatedAt": "2026-06-27T17:36:27.427420+00:00", + "generatedAt": "2026-06-29T13:50:21.485150+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "b300-nv_07", - "sku": "b300", - "backend": "uccl", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "B300 EP8 · uccl · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1351, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 148, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -35123,59 +35713,59 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28296669967", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296669967", - "createdAt": "2026-06-27T17:36:27.427420+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 125.2799928188324, - "p90": 127.9039978981018, - "p95": 128.9599984884262, - "p99": 135.51999628543854 + "p50": 123.83999675512314, + "p90": 131.48799538612366, + "p95": 135.29600203037262, + "p99": 144.99199390411377 }, "combine": { - "p50": 122.6240023970604, - "p90": 123.52000176906586, - "p95": 124.4800016283989, - "p99": 126.62400305271149 + "p50": 122.94399738311768, + "p90": 132.1599930524826, + "p95": 134.43200290203094, + "p99": 143.42400431632996 }, "roundtrip": { - "p50": 212.6079946756363, - "p90": 216.35200083255768, - "p95": 218.62399578094482, - "p99": 233.72800648212433 + "p50": 221.95200622081757, + "p90": 229.5680046081543, + "p95": 231.87200725078583, + "p99": 237.95199394226074 }, "isolatedSum": { - "p50": 247.9039952158928, - "p90": 251.42399966716766, - "p95": 253.4400001168251, - "p99": 262.14399933815 + "p50": 246.7839941382408, + "p90": 263.64798843860626, + "p95": 269.72800493240356, + "p99": 288.4159982204437 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35184,35 +35774,35 @@ "tokensPerRank": 256, "globalTokens": 2048, "dispatch": { - "p50": 157.3439985513687, - "p90": 162.9759967327118, - "p95": 164.12800550460815, - "p99": 174.43199455738068 + "p50": 161.69600188732147, + "p90": 170.46399414539337, + "p95": 173.40800166130066, + "p99": 177.7919977903366 }, "combine": { - "p50": 160.38399934768677, - "p90": 169.98399794101715, - "p95": 170.81600427627563, - "p99": 174.6560037136078 + "p50": 167.1999990940094, + "p90": 170.75200378894806, + "p95": 172.06400632858276, + "p99": 180.09600043296814 }, "roundtrip": { - "p50": 290.5920147895813, - "p90": 297.5040078163147, - "p95": 300.927996635437, - "p99": 310.91201305389404 + "p50": 298.8480031490326, + "p90": 307.2640001773834, + "p95": 310.2079927921295, + "p99": 317.6319897174835 }, "isolatedSum": { - "p50": 317.7279978990555, - "p90": 332.95999467372894, - "p95": 334.9440097808838, - "p99": 349.08799827098846 + "p50": 328.8960009813309, + "p90": 341.21599793434143, + "p95": 345.4720079898834, + "p99": 357.88799822330475 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35221,35 +35811,35 @@ "tokensPerRank": 512, "globalTokens": 4096, "dispatch": { - "p50": 222.3999947309494, - "p90": 224.95999932289124, - "p95": 226.30399465560913, - "p99": 234.68799889087677 + "p50": 232.09600150585175, + "p90": 255.10400533676147, + "p95": 262.2080147266388, + "p99": 287.84000873565674 }, "combine": { - "p50": 272.2240090370178, - "p90": 281.66401386260986, - "p95": 282.24000334739685, - "p99": 296.3840067386627 + "p50": 282.55999088287354, + "p90": 305.34398555755615, + "p95": 319.7759985923767, + "p99": 340.31999111175537 }, "roundtrip": { - "p50": 466.7840003967285, - "p90": 473.66398572921753, - "p95": 476.73600912094116, - "p99": 491.93599820137024 + "p50": 463.9680087566376, + "p90": 481.471985578537, + "p95": 493.9199984073639, + "p99": 512.3839974403381 }, "isolatedSum": { - "p50": 494.6240037679672, - "p90": 506.6240131855011, - "p95": 508.543998003006, - "p99": 531.0720056295395 + "p50": 514.6559923887253, + "p90": 560.4479908943176, + "p95": 581.9840133190155, + "p99": 628.1599998474121 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35258,35 +35848,35 @@ "tokensPerRank": 1024, "globalTokens": 8192, "dispatch": { - "p50": 346.015989780426, - "p90": 350.0800132751465, - "p95": 351.6800105571747, - "p99": 360.76799035072327 + "p50": 353.0240058898926, + "p90": 360.4159951210022, + "p95": 363.647997379303, + "p99": 370.4639971256256 }, "combine": { - "p50": 466.2719964981079, - "p90": 469.5360064506531, - "p95": 477.63198614120483, - "p99": 491.8079972267151 + "p50": 481.31200671195984, + "p90": 489.439994096756, + "p95": 491.07199907302856, + "p99": 493.9520061016083 }, "roundtrip": { - "p50": 785.152018070221, - "p90": 791.1360263824463, - "p95": 795.2319979667664, - "p99": 807.9360127449036 + "p50": 807.0719838142395, + "p90": 815.0079846382141, + "p95": 816.9599771499634, + "p99": 822.2399950027466 }, "isolatedSum": { - "p50": 812.2879862785339, - "p90": 819.6160197257996, - "p95": 829.3119966983795, - "p99": 852.5759875774384 + "p50": 834.3360126018524, + "p90": 849.8559892177582, + "p95": 854.7199964523315, + "p99": 864.4160032272339 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35295,35 +35885,35 @@ "tokensPerRank": 2048, "globalTokens": 16384, "dispatch": { - "p50": 592.0000076293945, - "p90": 598.8479852676392, - "p95": 602.3359894752502, - "p99": 609.5679998397827 + "p50": 613.1200194358826, + "p90": 632.0000290870667, + "p95": 650.4639983177185, + "p99": 669.4080233573914 }, "combine": { - "p50": 826.9439935684204, - "p90": 835.9040021896362, - "p95": 838.1119966506958, - "p99": 860.6079816818237 + "p50": 874.6880292892456, + "p90": 885.4719996452332, + "p95": 890.7840251922607, + "p99": 911.2319946289062 }, "roundtrip": { - "p50": 1397.760033607483, - "p90": 1407.039999961853, - "p95": 1411.2639427185059, - "p99": 1424.3839979171753 + "p50": 1460.4159593582153, + "p90": 1475.6159782409668, + "p95": 1487.0400428771973, + "p99": 1511.6159915924072 }, "isolatedSum": { - "p50": 1418.944001197815, - "p90": 1434.7519874572754, - "p95": 1440.447986125946, - "p99": 1470.1759815216064 + "p50": 1487.8080487251282, + "p90": 1517.4720287322998, + "p95": 1541.2480235099792, + "p99": 1580.6400179862976 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35332,35 +35922,35 @@ "tokensPerRank": 4096, "globalTokens": 32768, "dispatch": { - "p50": 1092.576026916504, - "p90": 1101.0559797286987, - "p95": 1106.4640283584595, - "p99": 1123.5840320587158 + "p50": 1129.3120384216309, + "p90": 1136.031985282898, + "p95": 1138.11194896698, + "p99": 1143.1679725646973 }, "combine": { - "p50": 1536.7679595947266, - "p90": 1549.2479801177979, - "p95": 1559.775948524475, - "p99": 1572.0000267028809 + "p50": 1624.2560148239136, + "p90": 1634.559988975525, + "p95": 1636.2240314483643, + "p99": 1642.2719955444336 }, "roundtrip": { - "p50": 2607.2959899902344, - "p90": 2621.471881866455, - "p95": 2628.511905670166, - "p99": 2641.5040493011475 + "p50": 2725.600004196167, + "p90": 2734.1439723968506, + "p95": 2737.6959323883057, + "p99": 2742.1441078186035 }, "isolatedSum": { - "p50": 2629.3439865112305, - "p90": 2650.3039598464966, - "p95": 2666.2399768829346, - "p99": 2695.5840587615967 + "p50": 2753.5680532455444, + "p90": 2770.591974258423, + "p95": 2774.3359804153442, + "p99": 2785.439968109131 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 6, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35368,47 +35958,48 @@ ] }, { - "id": "cx-32323f85", - "identity": "h100|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h100_42947950", - "comparisonKey": "13b620ce9b7928e9", + "id": "cx-cf8cb8f1", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_8cda999b", + "comparisonKey": "f8887e85df9ef186", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:16:12.750378+00:00", + "generatedAt": "2026-06-29T13:42:04.489904+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Qwen3.5", + "label": "GB300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", "routingStep": 0, "unevenTokens": "none", - "eplbEnabled": false, + "eplbEnabled": true, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -35416,318 +36007,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287504962", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287504962", - "createdAt": "2026-06-27T11:16:12.750378+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.44799679517746, - "p90": 104.54399883747101, - "p95": 106.27199709415436, - "p99": 110.07999628782272 - }, - "combine": { - "p50": 71.32799923419952, - "p90": 73.34399968385696, - "p95": 73.88799637556076, - "p99": 79.68000322580338 - }, - "roundtrip": { - "p50": 136.80000603199005, - "p90": 143.74400675296783, - "p95": 145.50399780273438, - "p99": 150.78400075435638 - }, - "isolatedSum": { - "p50": 167.77599602937698, - "p90": 177.88799852132797, - "p95": 180.15999346971512, - "p99": 189.7599995136261 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 68.9919963479042, - "p90": 101.6319990158081, - "p95": 103.5199984908104, - "p99": 110.91200262308121 - }, - "combine": { - "p50": 63.45599889755249, - "p90": 72.95999675989151, - "p95": 73.27999919652939, - "p99": 78.87999713420868 - }, - "roundtrip": { - "p50": 116.28799885511398, - "p90": 142.2719955444336, - "p95": 144.57599818706512, - "p99": 150.43200552463531 - }, - "isolatedSum": { - "p50": 132.4479952454567, - "p90": 174.59199577569962, - "p95": 176.79999768733978, - "p99": 189.7919997572899 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 69.21599805355072, - "p90": 134.20799374580383, - "p95": 137.43999600410461, - "p99": 141.34399592876434 + "p50": 124.64000284671783, + "p90": 132.4480026960373, + "p95": 136.25599443912506, + "p99": 147.10399508476257 }, "combine": { - "p50": 63.58399987220764, - "p90": 86.97599917650223, - "p95": 87.8399983048439, - "p99": 162.36799955368042 + "p50": 123.00799787044525, + "p90": 132.1599930524826, + "p95": 133.7279975414276, + "p99": 140.76800644397736 }, "roundtrip": { - "p50": 116.80000275373459, - "p90": 144.28800344467163, - "p95": 147.10399508476257, - "p99": 151.39199793338776 + "p50": 223.00800681114197, + "p90": 230.5919975042343, + "p95": 233.98399353027344, + "p99": 241.18399620056152 }, "isolatedSum": { - "p50": 132.79999792575836, - "p90": 221.18399292230606, - "p95": 225.27999430894852, - "p99": 303.71199548244476 + "p50": 247.6480007171631, + "p90": 264.6079957485199, + "p95": 269.9839919805527, + "p99": 287.87200152873993 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 5, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 83.52000266313553, - "p90": 104.60799932479858, - "p95": 113.21599781513214, - "p99": 352.54400968551636 + "p50": 162.01600432395935, + "p90": 169.37600076198578, + "p95": 172.03199863433838, + "p99": 182.11199343204498 }, "combine": { - "p50": 64.64000046253204, - "p90": 72.83200323581696, - "p95": 73.18399846553802, - "p99": 77.98399776220322 + "p50": 167.61599481105804, + "p90": 171.1679995059967, + "p95": 173.0239987373352, + "p99": 179.80800569057465 }, "roundtrip": { - "p50": 117.53600090742111, - "p90": 146.30399644374847, - "p95": 149.34399724006653, - "p99": 153.60000729560852 + "p50": 299.26401376724243, + "p90": 307.23199248313904, + "p95": 310.9759986400604, + "p99": 318.015992641449 }, "isolatedSum": { - "p50": 148.16000312566757, - "p90": 177.44000256061554, - "p95": 186.39999628067017, - "p99": 430.5280074477196 + "p50": 329.6319991350174, + "p90": 340.5440002679825, + "p95": 345.0559973716736, + "p99": 361.91999912261963 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 3, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 96.3200032711029, - "p90": 102.14400291442871, - "p95": 104.47999835014343, - "p99": 109.56799983978271 + "p50": 229.5359969139099, + "p90": 238.97600173950195, + "p95": 241.69600009918213, + "p99": 249.28000569343567 }, "combine": { - "p50": 71.80800288915634, - "p90": 74.11199808120728, - "p95": 78.8159966468811, - "p99": 80.19199967384338 + "p50": 281.2800109386444, + "p90": 287.4560058116913, + "p95": 290.6560003757477, + "p99": 294.14400458335876 }, "roundtrip": { - "p50": 143.71199905872345, - "p90": 151.39199793338776, - "p95": 153.02400290966034, - "p99": 157.95199573040009 + "p50": 463.3280038833618, + "p90": 472.22399711608887, + "p95": 475.8400022983551, + "p99": 481.440007686615 }, "isolatedSum": { - "p50": 168.12800616025925, - "p90": 176.256000995636, - "p95": 183.29599499702454, - "p99": 189.7599995136261 + "p50": 510.8160078525543, + "p90": 526.4320075511932, + "p95": 532.3520004749298, + "p99": 543.4240102767944 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 5, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 78.11199873685837, - "p90": 98.65599870681763, - "p95": 103.32799702882767, - "p99": 114.3999993801117 + "p50": 352.1279990673065, + "p90": 360.25598645210266, + "p95": 363.0400002002716, + "p99": 368.73599886894226 }, "combine": { - "p50": 65.92000275850296, - "p90": 79.29600030183792, - "p95": 80.44800162315369, - "p99": 81.31200075149536 + "p50": 481.5039932727814, + "p90": 489.8560047149658, + "p95": 491.67999625205994, + "p99": 493.75998973846436 }, "roundtrip": { - "p50": 117.53600090742111, - "p90": 150.36800503730774, - "p95": 152.63999998569489, - "p99": 155.7759940624237 + "p50": 807.744026184082, + "p90": 815.0720000267029, + "p95": 818.015992641449, + "p99": 824.7680068016052 }, "isolatedSum": { - "p50": 144.03200149536133, - "p90": 177.95199900865555, - "p95": 183.77599865198135, - "p99": 195.71200013160706 + "p50": 833.6319923400879, + "p90": 850.1119911670685, + "p95": 854.7199964523315, + "p99": 862.4959886074066 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 5, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 88.35200220346451, - "p90": 136.1279934644699, - "p95": 138.91200721263885, - "p99": 147.2959965467453 + "p50": 611.6480231285095, + "p90": 620.2239990234375, + "p95": 622.5280165672302, + "p99": 630.6560039520264 }, "combine": { - "p50": 74.0479975938797, - "p90": 96.3200032711029, - "p95": 102.11200267076492, - "p99": 104.3199971318245 + "p50": 874.6560215950012, + "p90": 883.7440013885498, + "p95": 885.3759765625, + "p99": 891.327977180481 }, "roundtrip": { - "p50": 133.760005235672, - "p90": 191.16799533367157, - "p95": 192.73599982261658, - "p99": 197.9839950799942 + "p50": 1460.7360363006592, + "p90": 1469.5359468460083, + "p95": 1472.0959663391113, + "p99": 1478.335976600647 }, "isolatedSum": { - "p50": 162.3999997973442, - "p90": 232.44799673557281, - "p95": 241.02400988340378, - "p99": 251.6159936785698 + "p50": 1486.3040447235107, + "p90": 1503.9680004119873, + "p95": 1507.9039931297302, + "p99": 1521.9839811325073 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 1, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 95.83999961614609, - "p90": 113.6000007390976, - "p95": 117.88800358772278, - "p99": 121.79200351238251 + "p50": 1128.5760402679443, + "p90": 1135.424017906189, + "p95": 1137.1519565582275, + "p99": 1142.3360109329224 }, "combine": { - "p50": 88.28800171613693, - "p90": 96.16000205278397, - "p95": 96.6079980134964, - "p99": 104.09600287675858 + "p50": 1623.2000589370728, + "p90": 1631.6479444503784, + "p95": 1634.6880197525024, + "p99": 1638.2720470428467 }, "roundtrip": { - "p50": 159.42400693893433, - "p90": 173.3119934797287, - "p95": 175.135999917984, - "p99": 178.01600694656372 + "p50": 2724.6720790863037, + "p90": 2733.407974243164, + "p95": 2735.5520725250244, + "p99": 2739.936113357544 }, "isolatedSum": { - "p50": 184.12800133228302, - "p90": 209.76000279188156, - "p95": 214.49600160121918, - "p99": 225.88800638914108 + "p50": 2751.776099205017, + "p90": 2767.0719623565674, + "p95": 2771.83997631073, + "p99": 2780.608057975769 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 2, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 @@ -35735,47 +36252,48 @@ ] }, { - "id": "cx-1c34e3d1", - "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h100_ff7906f8", - "comparisonKey": "ad5ebda2342035d4", + "id": "cx-eb796145", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_20de545c", + "comparisonKey": "c081f7cbf8991063", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:21.600015+00:00", + "generatedAt": "2026-06-29T13:46:36.139153+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Qwen3.5", + "label": "GB300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", "shape": { - "hidden": 4096, + "hidden": 7168, "topk": 8, - "experts": 128, + "experts": 256, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, "dispatchDtype": "bf16", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -35783,318 +36301,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271684428", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271684428", - "createdAt": "2026-06-26T23:51:21.600015+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 98.4639972448349, - "p90": 106.52799904346466, - "p95": 128.12800705432892, - "p99": 158.87999534606934 + "p50": 106.52799904346466, + "p90": 112.92800307273865, + "p95": 116.06399714946747, + "p99": 122.11199849843979 }, "combine": { - "p50": 66.52799993753433, - "p90": 73.34399968385696, - "p95": 81.34400099515915, - "p99": 91.96799993515015 + "p50": 122.20799922943115, + "p90": 129.85600531101227, + "p95": 132.1599930524826, + "p99": 135.13599336147308 }, "roundtrip": { - "p50": 139.42399621009827, - "p90": 146.84799313545227, - "p95": 150.56000649929047, - "p99": 186.81600689888 + "p50": 205.56800067424774, + "p90": 211.16800606250763, + "p95": 213.28000724315643, + "p99": 217.66400337219238 }, "isolatedSum": { - "p50": 164.99199718236923, - "p90": 179.87199872732162, - "p95": 209.47200804948807, - "p99": 250.84799528121948 + "p50": 228.7359982728958, + "p90": 242.78400838375092, + "p95": 248.22399020195007, + "p99": 257.2479918599129 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.58400118350983, - "p90": 103.45599800348282, - "p95": 106.39999806880951, - "p99": 124.67200309038162 - }, - "combine": { - "p50": 64.06400352716446, - "p90": 72.92799651622772, - "p95": 73.31199944019318, - "p99": 74.43200051784515 - }, - "roundtrip": { - "p50": 117.53600090742111, - "p90": 144.41600441932678, - "p95": 147.71200716495514, - "p99": 173.5360026359558 - }, - "isolatedSum": { - "p50": 135.6480047106743, - "p90": 176.38399451971054, - "p95": 179.71199750900269, - "p99": 199.10400360822678 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 92.70399808883667, - "p90": 100.47999769449234, - "p95": 102.75200009346008, - "p99": 106.23999685049057 + "p50": 145.7280069589615, + "p90": 177.18400061130524, + "p95": 185.85599958896637, + "p99": 197.50399887561798 }, "combine": { - "p50": 66.01600348949432, - "p90": 72.38399982452393, - "p95": 72.86400347948074, - "p99": 75.6160020828247 + "p50": 168.7680035829544, + "p90": 190.08000195026398, + "p95": 205.56800067424774, + "p99": 230.49600422382355 }, "roundtrip": { - "p50": 134.33599472045898, - "p90": 143.77599954605103, - "p95": 146.08000218868256, - "p99": 149.82399344444275 + "p50": 283.6799919605255, + "p90": 305.7279884815216, + "p95": 316.3839876651764, + "p99": 342.8800106048584 }, "isolatedSum": { - "p50": 158.720001578331, - "p90": 172.86399751901627, - "p95": 175.61600357294083, - "p99": 181.85599893331528 + "p50": 314.4960105419159, + "p90": 367.2640025615692, + "p95": 391.4240002632141, + "p99": 428.0000030994415 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 97.75999933481216, - "p90": 105.8880016207695, - "p95": 129.66400384902954, - "p99": 177.44000256061554 + "p50": 213.24799954891205, + "p90": 221.44000232219696, + "p95": 223.80800545215607, + "p99": 230.5919975042343 }, "combine": { - "p50": 71.32799923419952, - "p90": 74.65600222349167, - "p95": 81.53600245714188, - "p99": 92.00000017881393 + "p50": 284.12801027297974, + "p90": 293.503999710083, + "p95": 295.48799991607666, + "p99": 302.4959862232208 }, "roundtrip": { - "p50": 140.99200069904327, - "p90": 149.6960073709488, - "p95": 159.19999778270721, - "p99": 189.43999707698822 + "p50": 451.35998725891113, + "p90": 479.0079891681671, + "p95": 489.21599984169006, + "p99": 509.66399908065796 }, "isolatedSum": { - "p50": 169.0879985690117, - "p90": 180.54400384426117, - "p95": 211.20000630617142, - "p99": 269.4400027394295 + "p50": 497.3760098218918, + "p90": 514.94400203228, + "p95": 519.2960053682327, + "p99": 533.0879837274551 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 97.56799787282944, - "p90": 100.99200159311295, - "p95": 104.3199971318245, - "p99": 107.42399841547012 + "p50": 344.7679877281189, + "p90": 366.87999963760376, + "p95": 396.12799882888794, + "p99": 411.74399852752686 }, "combine": { - "p50": 71.6480016708374, - "p90": 73.7600028514862, - "p95": 75.00799745321274, - "p99": 80.92799782752991 + "p50": 488.0320131778717, + "p90": 507.04002380371094, + "p95": 523.360013961792, + "p99": 549.4080185890198 }, "roundtrip": { - "p50": 142.68800616264343, - "p90": 150.30400454998016, - "p95": 154.2080044746399, - "p99": 156.09599649906158 + "p50": 800.927996635437, + "p90": 814.7519826889038, + "p95": 830.0480246543884, + "p99": 856.3839793205261 }, "isolatedSum": { - "p50": 169.21599954366684, - "p90": 174.75200444459915, - "p95": 179.32799458503723, - "p99": 188.35199624300003 + "p50": 832.8000009059906, + "p90": 873.9200234413147, + "p95": 919.4880127906799, + "p99": 961.1520171165466 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 79.80799674987793, - "p90": 99.55199807882309, - "p95": 101.27999633550644, - "p99": 106.08000308275223 - }, - "combine": { - "p50": 66.68800115585327, - "p90": 76.03199779987335, - "p95": 80.38400113582611, - "p99": 81.31200075149536 - }, - "roundtrip": { - "p50": 123.87199699878693, - "p90": 150.27199685573578, - "p95": 152.16000378131866, - "p99": 155.4879993200302 - }, - "isolatedSum": { - "p50": 146.4959979057312, - "p90": 175.58399587869644, - "p95": 181.66399747133255, - "p99": 187.3920038342476 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 100.28800368309021, - "p90": 107.96800255775452, - "p95": 109.47199910879135, - "p99": 119.90399658679962 + "p50": 599.8079776763916, + "p90": 618.4319853782654, + "p95": 636.4799737930298, + "p99": 653.9520025253296 }, "combine": { - "p50": 81.11999928951263, - "p90": 87.71199733018875, - "p95": 89.1840010881424, - "p99": 90.14400094747543 + "p50": 870.527982711792, + "p90": 875.8400082588196, + "p95": 880.6719779968262, + "p99": 887.55202293396 }, "roundtrip": { - "p50": 151.8079936504364, - "p90": 162.59199380874634, - "p95": 164.06400501728058, - "p99": 168.57600212097168 + "p50": 1442.3359632492065, + "p90": 1450.5599737167358, + "p95": 1453.7279605865479, + "p99": 1460.70396900177 }, "isolatedSum": { - "p50": 181.40800297260284, - "p90": 195.67999988794327, - "p95": 198.65600019693375, - "p99": 210.04799753427505 + "p50": 1470.3359603881836, + "p90": 1494.271993637085, + "p95": 1517.151951789856, + "p99": 1541.5040254592896 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 105.69600015878677, - "p90": 115.99999666213989, - "p95": 118.1119978427887, - "p99": 124.83199685811996 + "p50": 1104.2560338974, + "p90": 1115.1039600372314, + "p95": 1133.6640119552612, + "p99": 1154.3680429458618 }, "combine": { - "p50": 88.3840024471283, - "p90": 97.4079966545105, - "p95": 97.88800030946732, - "p99": 100.38399696350098 + "p50": 1622.879981994629, + "p90": 1637.9200220108032, + "p95": 1651.2000560760498, + "p99": 1676.7679452896118 }, "roundtrip": { - "p50": 161.72799468040466, - "p90": 177.2480010986328, - "p95": 181.15200102329254, - "p99": 415.48800468444824 + "p50": 2707.711935043335, + "p90": 2723.3920097351074, + "p95": 2733.920097351074, + "p99": 2751.1041164398193 }, "isolatedSum": { - "p50": 194.08000260591507, - "p90": 213.4079933166504, - "p95": 215.999998152256, - "p99": 225.21599382162094 + "p50": 2727.136015892029, + "p90": 2753.0239820480347, + "p95": 2784.864068031311, + "p99": 2831.1359882354736 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36102,47 +36546,48 @@ ] }, { - "id": "cx-8988cd24", - "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h100_ff7906f8", - "comparisonKey": "c91a22e0dde262e4", + "id": "cx-ddfe8a4d", + "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "ed367f24667806d7", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:51.137960+00:00", + "generatedAt": "2026-06-29T14:02:32.068698+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "shape 5120/8/160", + "label": "GB300 EP8 · deepep · fp8", + "model": "Qwen3.5", "shape": { - "hidden": 5120, + "hidden": 4096, "topk": 8, - "experts": 160, + "experts": 128, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -36150,318 +36595,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", - "workloadSource": "canonical-serialized", + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271699258", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271699258", - "createdAt": "2026-06-26T23:51:51.137960+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 95.551997423172, - "p90": 107.04000294208527, - "p95": 120.38400024175644, - "p99": 156.00000321865082 + "p50": 419.20000314712524, + "p90": 444.70399618148804, + "p95": 450.6239891052246, + "p99": 467.3919975757599 }, "combine": { - "p50": 71.19999825954437, - "p90": 78.84799689054489, - "p95": 81.15199953317642, - "p99": 97.56799787282944 + "p50": 100.89600086212158, + "p90": 107.29599744081497, + "p95": 111.455999314785, + "p99": 118.46400052309036 }, "roundtrip": { - "p50": 140.25600254535675, - "p90": 152.319997549057, - "p95": 169.8240041732788, - "p99": 207.68000185489655 + "p50": 488.0639910697937, + "p90": 510.8479857444763, + "p95": 516.3840055465698, + "p99": 548.1280088424683 }, "isolatedSum": { - "p50": 166.75199568271637, - "p90": 185.88799983263016, - "p95": 201.53599977493286, - "p99": 253.56800109148026 + "p50": 520.0960040092468, + "p90": 551.999993622303, + "p95": 562.0799884200096, + "p99": 585.8559980988503 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 430080, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 5, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 68.4799998998642, - "p90": 104.12800312042236, - "p95": 121.69600278139114, - "p99": 155.13600409030914 + "p50": 408.32000970840454, + "p90": 433.9199960231781, + "p95": 440.5120015144348, + "p99": 452.2880017757416 }, "combine": { - "p50": 64.80000168085098, - "p90": 79.00799810886383, - "p95": 88.06400001049042, - "p99": 103.39199751615524 + "p50": 139.39200341701508, + "p90": 146.94400131702423, + "p95": 149.9200016260147, + "p99": 159.58400070667267 }, "roundtrip": { - "p50": 119.6800023317337, - "p90": 147.32800424098969, - "p95": 149.08799529075623, - "p99": 153.888002038002 + "p50": 522.3039984703064, + "p90": 547.9679703712463, + "p95": 554.0480017662048, + "p99": 570.2080130577087 }, "isolatedSum": { - "p50": 133.28000158071518, - "p90": 183.1360012292862, - "p95": 209.76000279188156, - "p99": 258.5280016064644 + "p50": 547.7120131254196, + "p90": 580.8639973402023, + "p95": 590.4320031404495, + "p99": 611.8720024824142 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 880640, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 70.01599669456482, - "p90": 98.27200323343277, - "p95": 101.47199779748917, - "p99": 114.33599889278412 + "p50": 469.7279930114746, + "p90": 483.0079972743988, + "p95": 487.5200092792511, + "p99": 502.4319887161255 }, "combine": { - "p50": 65.08799642324448, - "p90": 78.8159966468811, - "p95": 79.23199981451035, - "p99": 85.95199882984161 + "p50": 208.15999805927277, + "p90": 214.20800685882568, + "p95": 217.056006193161, + "p99": 224.2559939622879 }, "roundtrip": { - "p50": 119.03999745845795, - "p90": 149.98400211334229, - "p95": 151.8079936504364, - "p99": 158.33599865436554 + "p50": 659.0719819068909, + "p90": 674.5280027389526, + "p95": 680.832028388977, + "p99": 701.0560035705566 }, "isolatedSum": { - "p50": 135.1039931178093, - "p90": 177.08799988031387, - "p95": 180.7039976119995, - "p99": 200.28799772262573 + "p50": 677.8879910707474, + "p90": 697.2160041332245, + "p95": 704.5760154724121, + "p99": 726.6879826784134 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1740800, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 5, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 70.23999840021133, - "p90": 97.79199957847595, - "p95": 102.01600193977356, - "p99": 116.67200177907944 + "p50": 624.2560148239136, + "p90": 642.4959897994995, + "p95": 648.3520269393921, + "p99": 666.1120057106018 }, "combine": { - "p50": 65.47199934720993, - "p90": 79.0719985961914, - "p95": 79.64800298213959, - "p99": 87.67999708652496 + "p50": 381.21598958969116, + "p90": 390.6880021095276, + "p95": 393.75999569892883, + "p99": 401.91999077796936 }, "roundtrip": { - "p50": 118.367999792099, - "p90": 150.4639983177185, - "p95": 155.68000078201294, - "p99": 188.25599551200867 + "p50": 1005.952000617981, + "p90": 1020.6079483032227, + "p95": 1025.439977645874, + "p99": 1037.984013557434 }, "isolatedSum": { - "p50": 135.71199774742126, - "p90": 176.86399817466736, - "p95": 181.66400492191315, - "p99": 204.3519988656044 + "p50": 1005.4720044136047, + "p90": 1033.183991909027, + "p95": 1042.112022638321, + "p99": 1068.0319964885712 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 3471360, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 5, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 95.10400146245956, - "p90": 101.34399682283401, - "p95": 105.6319996714592, - "p99": 117.11999773979187 + "p50": 938.7199878692627, + "p90": 958.5599899291992, + "p95": 966.9439792633057, + "p99": 1001.4079809188843 }, "combine": { - "p50": 69.11999732255936, - "p90": 79.42400127649307, - "p95": 80.03199845552444, - "p99": 86.87999844551086 + "p50": 801.5360236167908, + "p90": 809.1840147972107, + "p95": 812.7040266990662, + "p99": 824.1919875144958 }, "roundtrip": { - "p50": 120.03199756145477, - "p90": 147.039994597435, - "p95": 149.72800016403198, - "p99": 158.55999290943146 + "p50": 1702.3040056228638, + "p90": 1729.9840450286865, + "p95": 1735.80801486969, + "p99": 1754.080057144165 }, "isolatedSum": { - "p50": 164.22399878501892, - "p90": 180.7679980993271, - "p95": 185.66399812698364, - "p99": 203.99999618530273 + "p50": 1740.2560114860535, + "p90": 1767.74400472641, + "p95": 1779.6480059623718, + "p99": 1825.5999684333801 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 6912000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 77.47200131416321, - "p90": 103.16800326108932, - "p95": 109.72800105810165, - "p99": 237.37600445747375 - }, - "combine": { - "p50": 71.99999690055847, - "p90": 87.13600039482117, - "p95": 95.20000219345093, - "p99": 104.16000336408615 - }, - "roundtrip": { - "p50": 146.14400267601013, - "p90": 166.52800142765045, - "p95": 171.1679995059967, - "p99": 366.0160005092621 - }, - "isolatedSum": { - "p50": 149.47199821472168, - "p90": 190.3040036559105, - "p95": 204.92800325155258, - "p99": 341.5360078215599 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13977600, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 92.67199784517288, - "p90": 111.04000359773636, - "p95": 113.79200220108032, - "p99": 126.68800354003906 - }, - "combine": { - "p50": 81.08799904584885, - "p90": 88.67199718952179, - "p95": 95.45599669218063, - "p99": 96.28800302743912 - }, - "roundtrip": { - "p50": 147.5840061903, - "p90": 168.96000504493713, - "p95": 170.9440052509308, - "p99": 174.9120056629181 - }, - "isolatedSum": { - "p50": 173.75999689102173, - "p90": 199.71200078725815, - "p95": 209.24799889326096, - "p99": 222.97600656747818 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27975680, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 7, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 105.15200346708298, - "p90": 120.7680031657219, - "p95": 122.68800288438797, - "p99": 131.29599392414093 + "p50": 1547.935962677002, + "p90": 1566.2399530410767, + "p95": 1573.9200115203857, + "p99": 1590.9119844436646 }, "combine": { - "p50": 95.90400010347366, - "p90": 104.67199981212616, - "p95": 112.60800063610077, - "p99": 267.5839960575104 + "p50": 1503.3600330352783, + "p90": 1512.5759840011597, + "p95": 1514.6559476852417, + "p99": 1518.720030784607 }, "roundtrip": { - "p50": 173.0239987373352, - "p90": 194.17600333690643, - "p95": 195.90400159358978, - "p99": 308.351993560791 + "p50": 3009.4399452209473, + "p90": 3032.543897628784, + "p95": 3040.3521060943604, + "p99": 3060.2879524230957 }, "isolatedSum": { - "p50": 201.05600357055664, - "p90": 225.44000297784805, - "p95": 235.29600352048874, - "p99": 398.8799899816513 + "p50": 3051.2959957122803, + "p90": 3078.8159370422363, + "p95": 3088.5759592056274, + "p99": 3109.6320152282715 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 2, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36469,47 +36840,48 @@ ] }, { - "id": "cx-1d6bf339", - "identity": "h100|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_42947950", - "comparisonKey": "4f849813bdf740d5", + "id": "cx-7de7dc87", + "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "6233cb31a6511067", "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:11.578821+00:00", + "generatedAt": "2026-06-29T14:06:51.104687+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "MiniMax-M3", + "label": "GB300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", "shape": { - "hidden": 6144, + "hidden": 5120, "topk": 8, - "experts": 256, + "experts": 160, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -36517,318 +36889,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28287492752", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287492752", - "createdAt": "2026-06-27T11:13:11.578821+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.47999703884125, - "p90": 105.56799918413162, - "p95": 109.15199667215347, - "p99": 129.66400384902954 - }, - "combine": { - "p50": 74.46400076150894, - "p90": 80.73599636554718, - "p95": 81.37600123882294, - "p99": 85.05599945783615 - }, - "roundtrip": { - "p50": 146.40000462532043, - "p90": 152.73599326610565, - "p95": 154.52800691127777, - "p99": 157.79200196266174 - }, - "isolatedSum": { - "p50": 170.9439978003502, - "p90": 186.3039955496788, - "p95": 190.5279979109764, - "p99": 214.7200033068657 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 97.31200337409973, - "p90": 103.32799702882767, - "p95": 104.89600151777267, - "p99": 109.27999764680862 - }, - "combine": { - "p50": 74.8480036854744, - "p90": 80.28800040483475, - "p95": 81.69600367546082, - "p99": 86.33600175380707 - }, - "roundtrip": { - "p50": 144.16000247001648, - "p90": 152.19199657440186, - "p95": 154.52800691127777, - "p99": 164.8319959640503 - }, - "isolatedSum": { - "p50": 172.16000705957413, - "p90": 183.61599743366241, - "p95": 186.5920051932335, - "p99": 195.6159994006157 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 95.96800059080124, - "p90": 102.39999741315842, - "p95": 104.8320010304451, - "p99": 110.88000237941742 + "p50": 426.62400007247925, + "p90": 460.7999920845032, + "p95": 468.9919948577881, + "p99": 490.81599712371826 }, "combine": { - "p50": 74.46400076150894, - "p90": 79.77599650621414, - "p95": 81.216000020504, - "p99": 83.96799862384796 + "p50": 110.46399921178818, + "p90": 117.08799749612808, + "p95": 121.95199728012085, + "p99": 140.44800400733948 }, "roundtrip": { - "p50": 144.16000247001648, - "p90": 152.28800475597382, - "p95": 155.03999590873718, - "p99": 161.40800714492798 + "p50": 481.6319942474365, + "p90": 513.7919783592224, + "p95": 521.6320157051086, + "p99": 537.8559827804565 }, "isolatedSum": { - "p50": 170.43200135231018, - "p90": 182.17599391937256, - "p95": 186.0480010509491, - "p99": 194.84800100326538 + "p50": 537.0879992842674, + "p90": 577.8879895806313, + "p95": 590.9439921379089, + "p99": 631.2640011310577 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 95.58399766683578, - "p90": 103.58399897813797, - "p95": 106.49599879980087, - "p99": 112.2559979557991 + "p50": 367.1039938926697, + "p90": 381.47199153900146, + "p95": 387.1999979019165, + "p99": 459.4239890575409 }, "combine": { - "p50": 75.6160020828247, - "p90": 81.60000294446945, - "p95": 82.65600353479385, - "p99": 87.00799942016602 + "p50": 144.67200636863708, + "p90": 150.94399452209473, + "p95": 154.08000349998474, + "p99": 159.71200168132782 }, "roundtrip": { - "p50": 147.2959965467453, - "p90": 154.23999726772308, - "p95": 156.44800662994385, - "p99": 160.73599457740784 + "p50": 487.96799778938293, + "p90": 501.72799825668335, + "p95": 506.0160160064697, + "p99": 574.1119980812073 }, "isolatedSum": { - "p50": 171.1999997496605, - "p90": 185.18400192260742, - "p95": 189.15200233459473, - "p99": 199.26399737596512 + "p50": 511.77600026130676, + "p90": 532.4159860610962, + "p95": 541.2800014019012, + "p99": 619.1359907388687 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 96.0640013217926, - "p90": 102.30399668216705, - "p95": 105.59999942779541, - "p99": 112.47999966144562 + "p50": 450.655996799469, + "p90": 463.5840058326721, + "p95": 468.35198998451233, + "p99": 482.7840030193329 }, "combine": { - "p50": 78.68800312280655, - "p90": 81.95199817419052, - "p95": 83.71199667453766, - "p99": 89.47200328111649 + "p50": 221.18400037288666, + "p90": 227.2000014781952, + "p95": 229.44000363349915, + "p99": 233.2800030708313 }, "roundtrip": { - "p50": 149.05600249767303, - "p90": 155.42399883270264, - "p95": 158.84800255298615, - "p99": 165.6319946050644 + "p50": 660.9920263290405, + "p90": 672.4479794502258, + "p95": 676.9919991493225, + "p99": 694.0479874610901 }, "isolatedSum": { - "p50": 174.75200444459915, - "p90": 184.25599485635757, - "p95": 189.31199610233307, - "p99": 201.9520029425621 + "p50": 671.8399971723557, + "p90": 690.7840073108673, + "p95": 697.7919936180115, + "p99": 716.0640060901642 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 96.0640013217926, - "p90": 104.16000336408615, - "p95": 105.6319996714592, - "p99": 113.27999830245972 + "p50": 631.4240097999573, + "p90": 645.6320285797119, + "p95": 649.6959924697876, + "p99": 663.0399823188782 }, "combine": { - "p50": 82.71999657154083, - "p90": 87.99999952316284, - "p95": 88.99199962615967, - "p99": 91.20000153779984 + "p50": 459.77601408958435, + "p90": 465.5359983444214, + "p95": 467.2960042953491, + "p99": 473.7600088119507 }, "roundtrip": { - "p50": 150.27199685573578, - "p90": 159.8079949617386, - "p95": 162.08000481128693, - "p99": 168.92799735069275 + "p50": 1042.8160429000854, + "p90": 1055.7440519332886, + "p95": 1059.648036956787, + "p99": 1105.2160263061523 }, "isolatedSum": { - "p50": 178.78399789333344, - "p90": 192.160002887249, - "p95": 194.62399929761887, - "p99": 204.47999984025955 + "p50": 1091.2000238895416, + "p90": 1111.1680269241333, + "p95": 1116.9919967651367, + "p99": 1136.7999911308289 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 107.744000852108, - "p90": 114.9120032787323, - "p95": 116.22399836778641, - "p99": 124.06399846076965 + "p50": 1002.9120445251465, + "p90": 1051.4240264892578, + "p95": 1057.1839809417725, + "p99": 1065.4079914093018 }, "combine": { - "p50": 91.96799993515015, - "p90": 96.3520035147667, - "p95": 97.6639986038208, - "p99": 103.61599922180176 + "p50": 834.1439962387085, + "p90": 841.2479758262634, + "p95": 844.2559838294983, + "p99": 850.4319787025452 }, "roundtrip": { - "p50": 164.60800170898438, - "p90": 177.40799486637115, - "p95": 179.26399409770966, - "p99": 182.3360025882721 + "p50": 1799.7119426727295, + "p90": 1841.69602394104, + "p95": 1851.4560461044312, + "p99": 1875.3600120544434 }, "isolatedSum": { - "p50": 199.71200078725815, - "p90": 211.264006793499, - "p95": 213.8879969716072, - "p99": 227.6799976825714 + "p50": 1837.056040763855, + "p90": 1892.6720023155212, + "p95": 1901.4399647712708, + "p99": 1915.839970111847 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 2, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 120.86399644613266, - "p90": 128.92800569534302, - "p95": 130.65600395202637, - "p99": 136.7039978504181 + "p50": 1781.8880081176758, + "p90": 1797.4720001220703, + "p95": 1803.1680583953857, + "p99": 1816.5760040283203 }, "combine": { - "p50": 107.16799646615982, - "p90": 112.12799698114395, - "p95": 112.99200356006622, - "p99": 115.29599875211716 + "p50": 1567.3600435256958, + "p90": 1573.855996131897, + "p95": 1575.5200386047363, + "p99": 1583.6479663848877 }, "roundtrip": { - "p50": 197.76000082492828, - "p90": 205.4080069065094, - "p95": 210.4959934949875, - "p99": 479.45600748062134 + "p50": 3305.3760528564453, + "p90": 3318.0160522460938, + "p95": 3323.0080604553223, + "p99": 3348.479986190796 }, "isolatedSum": { - "p50": 228.03199291229248, - "p90": 241.05600267648697, - "p95": 243.6480075120926, - "p99": 251.99999660253525 + "p50": 3349.2480516433716, + "p90": 3371.3279962539673, + "p95": 3378.688097000122, + "p99": 3400.223970413208 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -36836,28 +37134,28 @@ ] }, { - "id": "cx-d5af8f11", - "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ff7906f8", - "comparisonKey": "bb40f1d7fb8ef5bf", + "id": "cx-d318914f", + "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "6304da2c595b352d", "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:15.657129+00:00", + "generatedAt": "2026-06-29T14:11:20.066461+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "GB300 EP8 · deepep · fp8", "model": "MiniMax-M3", "shape": { "hidden": 6144, @@ -36868,15 +37166,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -36884,318 +37183,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28271714089", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271714089", - "createdAt": "2026-06-26T23:52:15.657129+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.51999717950821, - "p90": 104.99200224876404, - "p95": 123.16799908876419, - "p99": 153.05599570274353 - }, - "combine": { - "p50": 74.0479975938797, - "p90": 82.36800134181976, - "p95": 90.65599739551544, - "p99": 115.13599753379822 - }, - "roundtrip": { - "p50": 144.73600685596466, - "p90": 151.7760008573532, - "p95": 153.9199948310852, - "p99": 191.74399971961975 - }, - "isolatedSum": { - "p50": 169.5679947733879, - "p90": 187.3600035905838, - "p95": 213.82399648427963, - "p99": 268.19199323654175 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 71.26399874687195, - "p90": 102.4319976568222, - "p95": 104.47999835014343, - "p99": 143.48800480365753 + "p50": 428.6719858646393, + "p90": 454.8160135746002, + "p95": 462.3039960861206, + "p99": 486.62400245666504 }, "combine": { - "p50": 67.77600198984146, - "p90": 81.15199953317642, - "p95": 81.727996468544, - "p99": 87.71199733018875 + "p50": 117.18399822711945, + "p90": 123.03999811410904, + "p95": 125.95200538635254, + "p99": 131.42399489879608 }, "roundtrip": { - "p50": 124.03199821710587, - "p90": 153.02400290966034, - "p95": 154.94400262832642, - "p99": 158.36800634860992 + "p50": 506.5600275993347, + "p90": 525.5680084228516, + "p95": 531.1999917030334, + "p99": 541.8239831924438 }, "isolatedSum": { - "p50": 139.0400007367134, - "p90": 183.58399718999863, - "p95": 186.20799481868744, - "p99": 231.20000213384628 + "p50": 545.8559840917587, + "p90": 577.8560116887093, + "p95": 588.2560014724731, + "p99": 618.0479973554611 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 91.58399701118469, - "p90": 103.20000350475311, - "p95": 104.032002389431, - "p99": 107.58399963378906 - }, - "combine": { - "p50": 74.20799881219864, - "p90": 80.64000308513641, - "p95": 81.31200075149536, - "p99": 82.49600231647491 - }, - "roundtrip": { - "p50": 145.79200744628906, - "p90": 152.38399803638458, - "p95": 154.55999970436096, - "p99": 172.38399386405945 - }, - "isolatedSum": { - "p50": 165.79199582338333, - "p90": 183.84000658988953, - "p95": 185.34400314092636, - "p99": 190.08000195026398 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 92.6079973578453, - "p90": 103.00800204277039, - "p95": 114.46399986743927, - "p99": 149.98400211334229 + "p50": 423.0720102787018, + "p90": 441.9519901275635, + "p95": 446.46400213241577, + "p99": 457.8559994697571 }, "combine": { - "p50": 76.1599987745285, - "p90": 82.49600231647491, - "p95": 86.68799698352814, - "p99": 95.77599912881851 + "p50": 156.2879979610443, + "p90": 162.84799575805664, + "p95": 165.69599509239197, + "p99": 172.83199727535248 }, "roundtrip": { - "p50": 146.84799313545227, - "p90": 161.40800714492798, - "p95": 192.09599494934082, - "p99": 203.74399423599243 + "p50": 552.5439977645874, + "p90": 569.2160129547119, + "p95": 574.2719769477844, + "p99": 587.6160264015198 }, "isolatedSum": { - "p50": 168.7679961323738, - "p90": 185.5040043592453, - "p95": 201.1519968509674, - "p99": 245.7600012421608 + "p50": 579.3600082397461, + "p90": 604.7999858856201, + "p95": 612.1599972248077, + "p99": 630.6879967451096 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 91.2960022687912, - "p90": 99.16800260543823, - "p95": 101.1200025677681, - "p99": 107.68000036478043 + "p50": 524.1600275039673, + "p90": 540.4800176620483, + "p95": 544.8960065841675, + "p99": 551.6800284385681 }, "combine": { - "p50": 77.37600058317184, - "p90": 81.53600245714188, - "p95": 82.24000036716461, - "p99": 87.13600039482117 + "p50": 257.56800174713135, + "p90": 265.28000831604004, + "p95": 267.87200570106506, + "p99": 273.69600534439087 }, "roundtrip": { - "p50": 150.30400454998016, - "p90": 157.05600380897522, - "p95": 158.9760035276413, - "p99": 162.49600052833557 + "p50": 758.6560249328613, + "p90": 776.3839960098267, + "p95": 781.6960215568542, + "p99": 798.6239790916443 }, "isolatedSum": { - "p50": 168.67200285196304, - "p90": 180.7040050625801, - "p95": 183.3600029349327, - "p99": 194.8160007596016 + "p50": 781.7280292510986, + "p90": 805.7600259780884, + "p95": 812.7680122852325, + "p99": 825.376033782959 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 96.03200107812881, - "p90": 103.90400141477585, - "p95": 107.68000036478043, - "p99": 194.815993309021 + "p50": 736.3520264625549, + "p90": 751.9999742507935, + "p95": 755.3279995918274, + "p99": 762.2399926185608 }, "combine": { - "p50": 80.51200211048126, - "p90": 87.00799942016602, - "p95": 90.55999666452408, - "p99": 383.7119936943054 + "p50": 467.23198890686035, + "p90": 473.05598855018616, + "p95": 476.063996553421, + "p99": 483.5520088672638 }, "roundtrip": { - "p50": 134.97599959373474, - "p90": 158.27199816703796, - "p95": 171.36000096797943, - "p99": 204.0960043668747 + "p50": 1161.8560552597046, + "p90": 1176.1280298233032, + "p95": 1179.4240474700928, + "p99": 1195.520043373108 }, "isolatedSum": { - "p50": 176.54400318861008, - "p90": 190.91200083494186, - "p95": 198.2399970293045, - "p99": 578.5279870033264 + "p50": 1203.5840153694153, + "p90": 1225.0559628009796, + "p95": 1231.3919961452484, + "p99": 1245.7920014858246 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 107.61599987745285, - "p90": 114.49600011110306, - "p95": 116.35199934244156, - "p99": 122.84799665212631 + "p50": 1161.6319417953491, + "p90": 1181.663990020752, + "p95": 1189.4079446792603, + "p99": 1212.5439643859863 }, "combine": { - "p50": 92.06400066614151, - "p90": 98.2080027461052, - "p95": 98.68799895048141, - "p99": 102.46399790048599 + "p50": 848.2239842414856, + "p90": 855.135977268219, + "p95": 856.768012046814, + "p99": 861.7280125617981 }, "roundtrip": { - "p50": 167.84000396728516, - "p90": 173.567995429039, - "p95": 175.90400576591492, - "p99": 179.4240027666092 + "p50": 1978.816032409668, + "p90": 2005.0559043884277, + "p95": 2013.0879878997803, + "p99": 2033.6639881134033 }, "isolatedSum": { - "p50": 199.68000054359436, - "p90": 212.70400285720825, - "p95": 215.03999829292297, - "p99": 225.3119945526123 + "p50": 2009.8559260368347, + "p90": 2036.799967288971, + "p95": 2046.1759567260742, + "p99": 2074.2719769477844 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 123.26399981975555, - "p90": 130.68799674510956, - "p95": 132.83200562000275, - "p99": 148.0959951877594 + "p50": 2040.6720638275146, + "p90": 2056.096076965332, + "p95": 2061.8560314178467, + "p99": 2084.480047225952 }, "combine": { - "p50": 106.6880002617836, - "p90": 114.23999816179276, - "p95": 115.23199826478958, - "p99": 137.85600662231445 + "p50": 1591.647982597351, + "p90": 1599.552035331726, + "p95": 1603.327989578247, + "p99": 1609.055995941162 }, "roundtrip": { - "p50": 197.60000705718994, - "p90": 204.8639953136444, - "p95": 207.07200467586517, - "p99": 225.8879989385605 + "p50": 3593.087911605835, + "p90": 3605.5679321289062, + "p95": 3608.8318824768066, + "p99": 3622.080087661743 }, "isolatedSum": { - "p50": 229.95200008153915, - "p90": 244.9279949069023, - "p95": 248.06400388479233, - "p99": 285.95200181007385 + "p50": 3632.3200464248657, + "p90": 3655.648112297058, + "p95": 3665.1840209960938, + "p99": 3693.5360431671143 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 2, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, "correct": true, "samplesPooled": 600, "trials": 3 @@ -37203,28 +37428,28 @@ ] }, { - "id": "cx-7171c240", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "1fe2184d83233e7e", + "id": "cx-6fe76bb4", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_c4ac4643", + "comparisonKey": "9bdacb5bc6bbc14d", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:32.898956+00:00", + "generatedAt": "2026-06-29T13:50:37.611513+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "GB300 EP8 · deepep · fp8", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -37235,15 +37460,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -37251,199 +37477,273 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272125238", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272125238", - "createdAt": "2026-06-27T00:05:32.898956+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 94.7519987821579, - "p90": 118.49600076675415, - "p95": 129.60000336170197, - "p99": 144.31999623775482 + "p50": 111.39199882745743, + "p90": 120.89599668979645, + "p95": 123.6800029873848, + "p99": 132.54399597644806 }, "combine": { - "p50": 76.64000242948532, - "p90": 87.2960016131401, - "p95": 90.52799642086029, - "p99": 103.10400277376175 + "p50": 119.6800023317337, + "p90": 126.14400684833527, + "p95": 129.66400384902954, + "p99": 136.51199638843536 }, "roundtrip": { - "p50": 147.2640037536621, - "p90": 170.30400037765503, - "p95": 184.89600718021393, - "p99": 195.6160068511963 + "p50": 280.64000606536865, + "p90": 298.335999250412, + "p95": 303.5520017147064, + "p99": 315.3280019760132 }, "isolatedSum": { - "p50": 171.39200121164322, - "p90": 205.79200237989426, - "p95": 220.12799978256226, - "p99": 247.42399901151657 + "p50": 231.07200115919113, + "p90": 247.0400035381317, + "p95": 253.34400683641434, + "p99": 269.0559923648834 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 98.68799895048141, - "p90": 122.17599898576736, - "p95": 138.7840062379837, - "p99": 191.9039934873581 + "p50": 141.88799262046814, + "p90": 160.76800227165222, + "p95": 182.23999440670013, + "p99": 200.3519982099533 }, "combine": { - "p50": 81.31200075149536, - "p90": 89.72799777984619, - "p95": 97.08800166845322, - "p99": 106.62399977445602 + "p50": 161.28000617027283, + "p90": 182.91200697422028, + "p95": 202.14399695396423, + "p99": 226.33600234985352 }, "roundtrip": { - "p50": 152.70400047302246, - "p90": 174.9120056629181, - "p95": 184.03199315071106, - "p99": 195.51999866962433 + "p50": 356.51201009750366, + "p90": 385.5679929256439, + "p95": 394.8799967765808, + "p99": 411.6480052471161 }, "isolatedSum": { - "p50": 179.99999970197678, - "p90": 211.90399676561356, - "p95": 235.87200790643692, - "p99": 298.5279932618141 + "p50": 303.16799879074097, + "p90": 343.6800092458725, + "p95": 384.38399136066437, + "p99": 426.6880005598068 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 101.79200023412704, - "p90": 127.96799838542938, - "p95": 147.42399752140045, - "p99": 195.16800343990326 + "p50": 197.37599790096283, + "p90": 217.53600239753723, + "p95": 232.2240024805069, + "p99": 252.06398963928223 }, "combine": { - "p50": 89.66399729251862, - "p90": 103.4879982471466, - "p95": 113.02399635314941, - "p99": 128.1599998474121 + "p50": 281.69599175453186, + "p90": 295.6799864768982, + "p95": 312.0959997177124, + "p99": 327.58399844169617 }, "roundtrip": { - "p50": 162.88000345230103, - "p90": 193.53599846363068, - "p95": 214.08000588417053, - "p99": 247.71200120449066 + "p50": 584.2559933662415, + "p90": 597.5040197372437, + "p95": 617.2800064086914, + "p99": 638.4320259094238 }, "isolatedSum": { - "p50": 191.45599752664566, - "p90": 231.455996632576, - "p95": 260.44799387454987, - "p99": 323.32800328731537 + "p50": 479.0719896554947, + "p90": 513.2159888744354, + "p95": 544.3200021982193, + "p99": 579.6479880809784 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 130.49599528312683, - "p90": 143.8719928264618, - "p95": 152.70400047302246, - "p99": 158.9760035276413 + "p50": 310.5599880218506, + "p90": 328.8959860801697, + "p95": 342.9119884967804, + "p99": 363.45601081848145 }, "combine": { - "p50": 114.81600254774094, - "p90": 127.23200023174286, - "p95": 131.071999669075, - "p99": 139.5840048789978 + "p50": 482.91200399398804, + "p90": 501.2800097465515, + "p95": 510.047972202301, + "p99": 528.6719799041748 }, "roundtrip": { - "p50": 212.70400285720825, - "p90": 226.33600234985352, - "p95": 233.69599878787994, - "p99": 247.8400021791458 + "p50": 1010.1120471954346, + "p90": 1035.0079536437988, + "p95": 1043.67995262146, + "p99": 1060.3519678115845 }, "isolatedSum": { - "p50": 245.31199783086777, - "p90": 271.10399305820465, - "p95": 283.7760001420975, - "p99": 298.5600084066391 + "p50": 793.4719920158386, + "p90": 830.1759958267212, + "p95": 852.9599606990814, + "p99": 892.1279907226562 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 545.5999970436096, + "p90": 557.2479963302612, + "p95": 560.8320236206055, + "p99": 570.4320073127747 + }, + "combine": { + "p50": 868.0959939956665, + "p90": 876.8960237503052, + "p95": 879.5199990272522, + "p99": 900.704026222229 + }, + "roundtrip": { + "p50": 1849.5999574661255, + "p90": 1864.7359609603882, + "p95": 1871.9359636306763, + "p99": 1903.2959938049316 + }, + "isolatedSum": { + "p50": 1413.6959910392761, + "p90": 1434.1440200805664, + "p95": 1440.3520226478577, + "p99": 1471.1360335350037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1018.1759595870972, + "p90": 1037.343978881836, + "p95": 1054.0800094604492, + "p99": 1073.472023010254 + }, + "combine": { + "p50": 1616.3519620895386, + "p90": 1627.0400285720825, + "p95": 1634.9120140075684, + "p99": 1655.8079719543457 + }, + "roundtrip": { + "p50": 3515.0399208068848, + "p90": 3526.4639854431152, + "p95": 3531.424045562744, + "p99": 3540.8639907836914 + }, + "isolatedSum": { + "p50": 2634.5279216766357, + "p90": 2664.3840074539185, + "p95": 2688.9920234680176, + "p99": 2729.2799949645996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } ] }, { - "id": "cx-7a284f4e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_42947950", - "comparisonKey": "2b24bee4ac6d8f67", + "id": "cx-8e404634", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "67d9b2df504c0ef6", "schemaVersion": 3, - "generatedAt": "2026-06-27T10:09:52.345460+00:00", + "generatedAt": "2026-06-29T13:54:47.856231+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "GB300 EP8 · deepep · fp8", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -37454,15 +37754,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -37470,318 +37771,244 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28286083501", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286083501", - "createdAt": "2026-06-27T10:09:52.345460+00:00", - "sha": "76a3032d20288ee17220eb6099346f74d56ce005" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 74.36800003051758, - "p90": 106.81600123643875, - "p95": 108.06400328874588, - "p99": 112.89600282907486 - }, - "combine": { - "p50": 74.87999647855759, - "p90": 83.80799740552902, - "p95": 84.22400057315826, - "p99": 88.99199962615967 - }, - "roundtrip": { - "p50": 134.24000144004822, - "p90": 164.0319973230362, - "p95": 166.81599617004395, - "p99": 169.91999745368958 - }, - "isolatedSum": { - "p50": 149.24799650907516, - "p90": 190.62399864196777, - "p95": 192.28800386190414, - "p99": 201.88800245523453 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.60800153017044, - "p90": 107.84000158309937, - "p95": 108.76800119876862, - "p99": 112.44799941778183 - }, - "combine": { - "p50": 74.91199672222137, - "p90": 84.03199911117554, - "p95": 84.48000252246857, - "p99": 89.24800157546997 - }, - "roundtrip": { - "p50": 134.8160058259964, - "p90": 165.69599509239197, - "p95": 167.42399334907532, - "p99": 170.04799842834473 - }, - "isolatedSum": { - "p50": 147.51999825239182, - "p90": 191.8720006942749, - "p95": 193.24800372123718, - "p99": 201.6960009932518 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 72.57600128650665, - "p90": 103.32799702882767, - "p95": 106.1440035700798, - "p99": 112.22399771213531 + "p50": 364.5760118961334, + "p90": 393.44000816345215, + "p95": 400.7039964199066, + "p99": 503.9359927177429 }, "combine": { - "p50": 75.55200159549713, - "p90": 87.90399879217148, - "p95": 88.92799913883209, - "p99": 91.26400202512741 + "p50": 118.04799735546112, + "p90": 125.72799623012543, + "p95": 128.28800082206726, + "p99": 142.33599603176117 }, "roundtrip": { - "p50": 134.36800241470337, - "p90": 164.8319959640503, - "p95": 166.75199568271637, - "p99": 172.44799435138702 + "p50": 454.1119933128357, + "p90": 480.5760085582733, + "p95": 500.63997507095337, + "p99": 580.8640122413635 }, "isolatedSum": { - "p50": 148.12800288200378, - "p90": 191.23199582099915, - "p95": 195.0720027089119, - "p99": 203.48799973726273 + "p50": 482.62400925159454, + "p90": 519.1680043935776, + "p95": 528.9919972419739, + "p99": 646.2719887495041 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 75.93599706888199, - "p90": 104.54399883747101, - "p95": 107.51999914646149, - "p99": 112.64000087976456 + "p50": 385.0879967212677, + "p90": 405.4720103740692, + "p95": 416.703999042511, + "p99": 539.5839810371399 }, "combine": { - "p50": 75.48800110816956, - "p90": 84.28800106048584, - "p95": 88.86399865150452, - "p99": 91.32800251245499 + "p50": 160.38399934768677, + "p90": 167.64800250530243, + "p95": 171.07200622558594, + "p99": 191.42399728298187 }, "roundtrip": { - "p50": 134.5279961824417, - "p90": 165.47200083732605, - "p95": 167.35999286174774, - "p99": 170.71999609470367 + "p50": 534.3999862670898, + "p90": 554.4959902763367, + "p95": 571.3919997215271, + "p99": 673.7279891967773 }, "isolatedSum": { - "p50": 151.42399817705154, - "p90": 188.83199989795685, - "p95": 196.383997797966, - "p99": 203.96800339221954 + "p50": 545.4719960689545, + "p90": 573.1200128793716, + "p95": 587.7760052680969, + "p99": 731.0079783201218 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 84.51200276613235, - "p90": 102.68799960613251, - "p95": 104.41599786281586, - "p99": 111.13599687814713 + "p50": 507.29602575302124, + "p90": 522.271990776062, + "p95": 525.2479910850525, + "p99": 538.1119847297668 }, "combine": { - "p50": 75.96799731254578, - "p90": 90.62399715185165, - "p95": 91.58399701118469, - "p99": 92.6079973578453 + "p50": 275.7120132446289, + "p90": 282.20799565315247, + "p95": 284.960001707077, + "p99": 289.72798585891724 }, "roundtrip": { - "p50": 133.44000279903412, - "p90": 164.67200219631195, - "p95": 167.29600727558136, - "p99": 349.88799691200256 + "p50": 761.0880136489868, + "p90": 775.1039862632751, + "p95": 780.9600234031677, + "p99": 856.0960292816162 }, "isolatedSum": { - "p50": 160.48000007867813, - "p90": 193.31199675798416, - "p95": 195.99999487400055, - "p99": 203.74399423599243 + "p50": 783.0080389976501, + "p90": 804.4799864292145, + "p95": 810.2079927921295, + "p99": 827.8399705886841 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 91.87199920415878, - "p90": 107.58399963378906, - "p95": 108.60799998044968, - "p99": 113.15199732780457 + "p50": 739.5840287208557, + "p90": 757.3760151863098, + "p95": 777.4080038070679, + "p99": 874.4000196456909 }, "combine": { - "p50": 83.23200047016144, - "p90": 92.0960009098053, - "p95": 92.70399808883667, - "p99": 97.59999811649323 + "p50": 481.56800866127014, + "p90": 488.0320131778717, + "p95": 492.76798963546753, + "p99": 521.0239887237549 }, "roundtrip": { - "p50": 143.71199905872345, - "p90": 170.1440066099167, - "p95": 173.66400361061096, - "p99": 177.2480010986328 + "p50": 1188.704013824463, + "p90": 1206.5919637680054, + "p95": 1228.5759449005127, + "p99": 1329.4399976730347 }, "isolatedSum": { - "p50": 175.10399967432022, - "p90": 199.68000054359436, - "p95": 201.31199806928635, - "p99": 210.7519954442978 + "p50": 1221.1520373821259, + "p90": 1245.4080283641815, + "p95": 1270.1759934425354, + "p99": 1395.4240083694458 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 3, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 64, - "globalTokens": 512, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 98.24000298976898, - "p90": 121.69600278139114, - "p95": 124.95999783277512, - "p99": 132.06399977207184 + "p50": 1198.3360052108765, + "p90": 1216.6399955749512, + "p95": 1229.5360565185547, + "p99": 1348.1279611587524 }, "combine": { - "p50": 92.00000017881393, - "p90": 105.34399747848511, - "p95": 106.52799904346466, - "p99": 107.93600231409073 + "p50": 871.8400001525879, + "p90": 879.5520067214966, + "p95": 882.5920224189758, + "p99": 895.039975643158 }, "roundtrip": { - "p50": 167.67999529838562, - "p90": 185.2799952030182, - "p95": 188.4479969739914, - "p99": 196.0960030555725 + "p50": 2025.696039199829, + "p90": 2043.5519218444824, + "p95": 2052.639961242676, + "p99": 2149.120092391968 }, "isolatedSum": { - "p50": 190.24000316858292, - "p90": 227.04000025987625, - "p95": 231.48799687623978, - "p99": 240.00000208616257 + "p50": 2070.1760053634644, + "p90": 2096.1920022964478, + "p95": 2112.1280789375305, + "p99": 2243.1679368019104 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 118.65600198507309, - "p90": 134.3040019273758, - "p95": 136.86400651931763, - "p99": 142.17600226402283 + "p50": 2230.2401065826416, + "p90": 2242.2080039978027, + "p95": 2245.5999851226807, + "p99": 2260.4479789733887 }, "combine": { - "p50": 108.70400071144104, - "p90": 121.76000326871872, - "p95": 122.8799968957901, - "p99": 124.35200065374374 + "p50": 1617.3759698867798, + "p90": 1623.968005180359, + "p95": 1626.6560554504395, + "p99": 1631.5200328826904 }, "roundtrip": { - "p50": 202.65600085258484, - "p90": 218.6560034751892, - "p95": 221.3120013475418, - "p99": 225.0880002975464 + "p50": 3825.472116470337, + "p90": 3839.168071746826, + "p95": 3843.4879779815674, + "p99": 3857.9840660095215 }, "isolatedSum": { - "p50": 227.36000269651413, - "p90": 256.0640051960945, - "p95": 259.7440034151077, - "p99": 266.52800291776657 + "p50": 3847.6160764694214, + "p90": 3866.1760091781616, + "p95": 3872.25604057312, + "p99": 3891.968011856079 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 @@ -37789,47 +38016,48 @@ ] }, { - "id": "cx-9a231e73", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", - "colorKey": "h100_42947950", - "comparisonKey": "fb346b1019e55bb0", + "id": "cx-16cb50ff", + "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "ff56b33f9f8f54e2", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:23.336108+00:00", + "generatedAt": "2026-06-29T13:56:56.953377+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", + "label": "GB300 EP8 · deepep · fp8", + "model": "Kimi-K2", "shape": { "hidden": 7168, "topk": 8, - "experts": 256, + "experts": 384, "routing": "uniform", "routingLabel": "uniform", "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -37837,388 +38065,243 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": "set:3:07d544ac2af401ec", - "workloadSource": "canonical-serialized", + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272369133", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272369133", - "createdAt": "2026-06-27T00:13:23.336108+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.88000041246414, - "p90": 104.8320010304451, - "p95": 107.96800255775452, - "p99": 118.97599697113037 - }, - "combine": { - "p50": 79.93599772453308, - "p90": 87.5839963555336, - "p95": 87.99999952316284, - "p99": 92.28800237178802 - }, - "roundtrip": { - "p50": 154.11199629306793, - "p90": 159.2639982700348, - "p95": 161.43999993801117, - "p99": 167.29600727558136 - }, - "isolatedSum": { - "p50": 178.81599813699722, - "p90": 192.4159973859787, - "p95": 195.96800208091736, - "p99": 211.2639993429184 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.04000228643417, - "p90": 108.22399705648422, - "p95": 110.43199896812439, - "p99": 116.64000153541565 - }, - "combine": { - "p50": 87.93599903583527, - "p90": 94.94400024414062, - "p95": 96.03200107812881, - "p99": 98.49599748849869 - }, - "roundtrip": { - "p50": 162.4639928340912, - "p90": 170.3999936580658, - "p95": 172.31999337673187, - "p99": 178.9119988679886 - }, - "isolatedSum": { - "p50": 190.97600132226944, - "p90": 203.16799730062485, - "p95": 206.4640000462532, - "p99": 215.13599902391434 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, { "tokensPerRank": 128, "globalTokens": 1024, "dispatch": { - "p50": 129.40800189971924, - "p90": 137.7599984407425, - "p95": 139.45600390434265, - "p99": 143.48800480365753 + "p50": 340.60800075531006, + "p90": 360.7040047645569, + "p95": 367.0080006122589, + "p99": 378.2399892807007 }, "combine": { - "p50": 114.88000303506851, - "p90": 119.87199634313583, - "p95": 120.4800009727478, - "p99": 123.48800152540207 + "p50": 118.46400052309036, + "p90": 124.64000284671783, + "p95": 126.91199779510498, + "p99": 131.6159963607788 }, "roundtrip": { - "p50": 213.0880057811737, - "p90": 217.3759937286377, - "p95": 219.10400688648224, - "p99": 223.23200106620789 + "p50": 433.3760142326355, + "p90": 448.5760033130646, + "p95": 453.3120095729828, + "p99": 469.2479968070984 }, "isolatedSum": { - "p50": 244.28800493478775, - "p90": 257.6319947838783, - "p95": 259.93600487709045, - "p99": 266.9760063290596 + "p50": 459.0720012784004, + "p90": 485.3440076112747, + "p95": 493.9199984073639, + "p99": 509.8559856414795 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 - } - ] - }, - { - "id": "cx-535aa40c", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "h100_42947950", - "comparisonKey": "f31dd87deba90285", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:53:48.998127+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28273506790", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273506790", - "createdAt": "2026-06-27T00:53:48.998127+00:00", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" - }, - "rows": [ + }, { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 94.4959968328476, - "p90": 100.5759984254837, - "p95": 102.81600058078766, - "p99": 107.42399841547012 + "p50": 372.76801466941833, + "p90": 385.0559890270233, + "p95": 388.2879912853241, + "p99": 482.91200399398804 }, "combine": { - "p50": 76.92799717187881, - "p90": 80.89599758386612, - "p95": 81.37600123882294, - "p99": 85.91999858617783 + "p50": 162.81600296497345, + "p90": 168.44800114631653, + "p95": 171.80800437927246, + "p99": 178.52799594402313 }, "roundtrip": { - "p50": 150.65599977970123, - "p90": 155.35999834537506, - "p95": 157.02399611473083, - "p99": 163.5199934244156 + "p50": 522.8480100631714, + "p90": 534.5919728279114, + "p95": 538.0480289459229, + "p99": 551.8720149993896 }, "isolatedSum": { - "p50": 171.4239940047264, - "p90": 181.47199600934982, - "p95": 184.1920018196106, - "p99": 193.34399700164795 + "p50": 535.5840176343918, + "p90": 553.5039901733398, + "p95": 560.0959956645966, + "p99": 661.4399999380112 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 2, - "globalTokens": 16, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 90.97599983215332, - "p90": 98.52799773216248, - "p95": 101.02400183677673, - "p99": 107.68000036478043 + "p50": 485.4080080986023, + "p90": 496.6079890727997, + "p95": 499.35999512672424, + "p99": 537.0240211486816 }, "combine": { - "p50": 77.11999863386154, - "p90": 81.216000020504, - "p95": 82.71999657154083, - "p99": 87.55200356245041 + "p50": 281.2480032444, + "p90": 288.60801458358765, + "p95": 290.5600070953369, + "p99": 295.80798745155334 }, "roundtrip": { - "p50": 149.47199821472168, - "p90": 154.91199493408203, - "p95": 157.151997089386, - "p99": 163.80800306797028 + "p50": 745.0559735298157, + "p90": 754.8480033874512, + "p95": 758.8160037994385, + "p99": 774.0160226821899 }, "isolatedSum": { - "p50": 168.09599846601486, - "p90": 179.74399775266647, - "p95": 183.74399840831757, - "p99": 195.23200392723083 + "p50": 766.6560113430023, + "p90": 785.2160036563873, + "p95": 789.9200022220612, + "p99": 832.832008600235 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 4, - "globalTokens": 32, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 93.12000125646591, - "p90": 99.64799880981445, - "p95": 102.27199643850327, - "p99": 109.43999886512756 + "p50": 719.4880247116089, + "p90": 731.6160202026367, + "p95": 734.7519993782043, + "p99": 751.1680126190186 }, "combine": { - "p50": 79.3600007891655, - "p90": 83.0719992518425, - "p95": 84.22400057315826, - "p99": 88.54400366544724 + "p50": 487.90401220321655, + "p90": 494.3679869174957, + "p95": 496.0959851741791, + "p99": 502.4639964103699 }, "roundtrip": { - "p50": 151.96800231933594, - "p90": 158.9439958333969, - "p95": 160.25599837303162, - "p99": 163.07200491428375 + "p50": 1180.2239418029785, + "p90": 1194.4960355758667, + "p95": 1202.1119594573975, + "p99": 1253.1520128250122 }, "isolatedSum": { - "p50": 172.4800020456314, - "p90": 182.71999806165695, - "p95": 186.49599701166153, - "p99": 197.9840025305748 + "p50": 1207.3920369148254, + "p90": 1225.9840071201324, + "p95": 1230.8479845523834, + "p99": 1253.6320090293884 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 94.27200257778168, - "p90": 100.80000013113022, - "p95": 102.62399911880493, - "p99": 107.80800133943558 + "p50": 1175.0400066375732, + "p90": 1188.4160041809082, + "p95": 1192.1919584274292, + "p99": 1211.583971977234 }, "combine": { - "p50": 78.68800312280655, - "p90": 83.13599973917007, - "p95": 84.25600081682205, - "p99": 86.65599673986435 + "p50": 856.8000197410583, + "p90": 863.5839819908142, + "p95": 866.3039803504944, + "p99": 876.416027545929 }, "roundtrip": { - "p50": 151.39199793338776, - "p90": 157.79200196266174, - "p95": 160.25599837303162, - "p99": 164.95999693870544 + "p50": 1998.2080459594727, + "p90": 2011.5840435028076, + "p95": 2016.767978668213, + "p99": 2027.26411819458 }, "isolatedSum": { - "p50": 172.96000570058823, - "p90": 183.9359998703003, - "p95": 186.87999993562698, - "p99": 194.46399807929993 + "p50": 2031.8400263786316, + "p90": 2051.9999861717224, + "p95": 2058.4959387779236, + "p99": 2087.999999523163 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 16, - "globalTokens": 128, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 96.16000205278397, - "p90": 100.92800110578537, - "p95": 103.71199995279312, - "p99": 108.06400328874588 + "p50": 2221.760034561157, + "p90": 2233.1199645996094, + "p95": 2237.823963165283, + "p99": 2246.335983276367 }, "combine": { - "p50": 81.85599744319916, - "p90": 87.26400136947632, - "p95": 88.8959988951683, - "p99": 90.04800021648407 + "p50": 1596.6399908065796, + "p90": 1603.6479473114014, + "p95": 1605.2160263061523, + "p99": 1610.4960441589355 }, "roundtrip": { - "p50": 153.6639928817749, - "p90": 160.35200655460358, - "p95": 161.95200383663177, - "p99": 165.3439998626709 + "p50": 3788.2559299468994, + "p90": 3799.2959022521973, + "p95": 3803.936004638672, + "p99": 3814.0480518341064 }, "isolatedSum": { - "p50": 178.01599949598312, - "p90": 188.1920024752617, - "p95": 192.60799884796143, - "p99": 198.11200350522995 + "p50": 3818.400025367737, + "p90": 3836.7679119110107, + "p95": 3843.0399894714355, + "p99": 3856.8320274353027 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, "stragglerRank": 1, "correct": true, "samplesPooled": 600, @@ -38227,28 +38310,28 @@ ] }, { - "id": "cx-5a3d925c", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "da8c4fcc63f5bf6e", + "id": "cx-b8ab0990", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b1b733fb", + "comparisonKey": "5ba58d24d34449fd", "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:07.028525+00:00", + "generatedAt": "2026-06-29T13:52:40.434876+00:00", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", "backend": "deepep", - "phase": "decode", + "phase": "prefill", "mode": "normal", "resourceMode": "tuned", "suite": "backend-default", "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", "worldSize": 8, "epSize": 8, - "label": "H100 EP8 · deepep · bf16", + "label": "GB300 EP8 · deepep · fp8 [cl]", "model": "DeepSeek-V3/V4", "shape": { "hidden": 7168, @@ -38259,15 +38342,16 @@ "routingStep": 0, "unevenTokens": "none", "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", "combineQuantMode": "none" }, "resourceProfile": { "requestedFraction": null, - "achievedFraction": 0.1515, + "achievedFraction": 0.1316, "configuredUnits": 20, - "deviceUnits": 132, + "deviceUnits": 152, "resourceClass": "backend-tuned", "conformanceClass": "backend-default", "fixedKernel": false, @@ -38275,91990 +38359,2496 @@ }, "placement": { "kind": "packed", - "nodes": 1, + "nodes": 2, "gpusPerNode": 8, "scaleUpDomain": 8 }, "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", "eplbImbalanceBefore": null, "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", + "backendVersion": "1.1.0+814e508", "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", "repository": "SemiAnalysisAI/InferenceX", "run": { - "id": "28272117855", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272117855", - "createdAt": "2026-06-27T00:05:07.028525+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" }, "rows": [ { - "tokensPerRank": 1, - "globalTokens": 8, + "tokensPerRank": 128, + "globalTokens": 1024, "dispatch": { - "p50": 97.79199957847595, - "p90": 105.02400249242783, - "p95": 107.29599744081497, - "p99": 115.90400338172913 + "p50": 99.0080013871193, + "p90": 138.08000087738037, + "p95": 148.3519971370697, + "p99": 159.19999778270721 }, "combine": { - "p50": 79.77599650621414, - "p90": 82.11199939250946, - "p95": 86.91199868917465, - "p99": 88.79999816417694 + "p50": 121.76000326871872, + "p90": 147.67999947071075, + "p95": 155.5519998073578, + "p99": 182.0800006389618 }, "roundtrip": { - "p50": 152.44799852371216, - "p90": 158.59200060367584, - "p95": 160.44799983501434, - "p99": 165.40800034999847 + "p50": 265.6640112400055, + "p90": 291.77600145339966, + "p95": 302.5279939174652, + "p99": 338.49599957466125 }, "isolatedSum": { - "p50": 177.5679960846901, - "p90": 187.1360018849373, - "p95": 194.20799612998962, - "p99": 204.70400154590607 + "p50": 220.768004655838, + "p90": 285.7600003480911, + "p95": 303.9039969444275, + "p99": 341.279998421669 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 256, + "globalTokens": 2048, "dispatch": { - "p50": 98.01600128412247, - "p90": 103.87200117111206, - "p95": 106.01600259542465, - "p99": 113.11999708414078 + "p50": 130.048006772995, + "p90": 154.33600544929504, + "p95": 175.7120043039322, + "p99": 193.7279999256134 }, "combine": { - "p50": 81.02399855852127, - "p90": 87.71199733018875, - "p95": 87.96799927949905, - "p99": 89.50400352478027 + "p50": 162.9440039396286, + "p90": 187.74400651454926, + "p95": 203.8400024175644, + "p99": 224.73600506782532 }, "roundtrip": { - "p50": 155.16799688339233, - "p90": 160.38399934768677, - "p95": 162.23999857902527, - "p99": 166.87999665737152 + "p50": 344.1919982433319, + "p90": 379.5199990272522, + "p95": 392.8000032901764, + "p99": 424.54400658607483 }, "isolatedSum": { - "p50": 179.03999984264374, - "p90": 191.5839985013008, - "p95": 193.9840018749237, - "p99": 202.62400060892105 + "p50": 292.9920107126236, + "p90": 342.0800119638443, + "p95": 379.5520067214966, + "p99": 418.4640049934387 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 512, + "globalTokens": 4096, "dispatch": { - "p50": 101.98400169610977, - "p90": 106.6880002617836, - "p95": 109.95200276374817, - "p99": 120.35199999809265 + "p50": 183.45600366592407, + "p90": 190.08000195026398, + "p95": 192.57600605487823, + "p99": 198.71999323368073 }, "combine": { - "p50": 88.22400122880936, - "p90": 95.0080007314682, - "p95": 95.93600034713745, - "p99": 96.83199971914291 + "p50": 282.24000334739685, + "p90": 289.4720137119293, + "p95": 291.1680042743683, + "p99": 295.74400186538696 }, "roundtrip": { - "p50": 162.75200247764587, - "p90": 169.63200271129608, - "p95": 171.58399522304535, - "p99": 176.28799378871918 + "p50": 569.3759918212891, + "p90": 578.0799984931946, + "p95": 581.6640257835388, + "p99": 587.3600244522095 }, "isolatedSum": { - "p50": 190.20800292491913, - "p90": 201.6960009932518, - "p95": 205.88800311088562, - "p99": 217.18399971723557 + "p50": 465.6960070133209, + "p90": 479.5520156621933, + "p95": 483.7440103292465, + "p99": 494.4639950990677 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 129.66400384902954, - "p90": 137.08800077438354, - "p95": 139.0399932861328, - "p99": 142.752006649971 - }, - "combine": { - "p50": 115.00799655914307, - "p90": 120.7680031657219, - "p95": 121.31199985742569, - "p99": 127.83999741077423 - }, - "roundtrip": { - "p50": 212.89600431919098, - "p90": 218.72000396251678, - "p95": 219.9680060148239, - "p99": 224.06400740146637 - }, - "isolatedSum": { - "p50": 244.6720004081726, - "p90": 257.85600394010544, - "p95": 260.3519931435585, - "p99": 270.59200406074524 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-49497b06", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "5ec10556693a8c2b", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:08.113815+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272121618", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272121618", - "createdAt": "2026-06-27T00:05:08.113815+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 98.24000298976898, - "p90": 105.69600015878677, - "p95": 108.12799632549286, - "p99": 113.37599903345108 - }, - "combine": { - "p50": 79.68000322580338, - "p90": 82.07999914884567, - "p95": 82.97599852085114, - "p99": 87.61599659919739 - }, - "roundtrip": { - "p50": 146.464005112648, - "p90": 152.8320014476776, - "p95": 154.59200739860535, - "p99": 158.84800255298615 - }, - "isolatedSum": { - "p50": 177.92000621557236, - "p90": 187.77599930763245, - "p95": 191.103994846344, - "p99": 200.99199563264847 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 8, - "globalTokens": 64, + "tokensPerRank": 1024, + "globalTokens": 8192, "dispatch": { - "p50": 100.0640019774437, - "p90": 107.32799768447876, - "p95": 110.27199774980545, - "p99": 160.92799603939056 + "p50": 297.0240116119385, + "p90": 310.62400341033936, + "p95": 319.93600726127625, + "p99": 348.4160006046295 }, "combine": { - "p50": 81.34400099515915, - "p90": 87.16800063848495, - "p95": 87.87199854850769, - "p99": 90.27200192213058 + "p50": 483.71198773384094, + "p90": 502.1439790725708, + "p95": 511.1680030822754, + "p99": 532.4159860610962 }, "roundtrip": { - "p50": 152.92799472808838, - "p90": 160.51200032234192, - "p95": 162.30399906635284, - "p99": 166.24000668525696 + "p50": 999.2319941520691, + "p90": 1013.8880014419556, + "p95": 1025.8560180664062, + "p99": 1047.5200414657593 }, "isolatedSum": { - "p50": 181.40800297260284, - "p90": 194.49599832296371, - "p95": 198.14399629831314, - "p99": 251.19999796152115 + "p50": 780.7359993457794, + "p90": 812.7679824829102, + "p95": 831.1040103435516, + "p99": 880.8319866657257 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 32, - "globalTokens": 256, + "tokensPerRank": 2048, + "globalTokens": 16384, "dispatch": { - "p50": 103.39199751615524, - "p90": 108.2879975438118, - "p95": 110.6560006737709, - "p99": 119.03999745845795 + "p50": 531.9679975509644, + "p90": 557.9839944839478, + "p95": 576.2240290641785, + "p99": 596.9280004501343 }, "combine": { - "p50": 89.75999802350998, - "p90": 95.20000219345093, - "p95": 95.93600034713745, - "p99": 98.68799895048141 + "p50": 868.3519959449768, + "p90": 883.8719725608826, + "p95": 894.208014011383, + "p99": 924.0959882736206 }, "roundtrip": { - "p50": 161.6320013999939, - "p90": 169.08800601959229, - "p95": 170.68800330162048, - "p99": 175.64800381660461 + "p50": 1833.9840173721313, + "p90": 1859.8719835281372, + "p95": 1879.6800374984741, + "p99": 1900.704026222229 }, "isolatedSum": { - "p50": 193.15199553966522, - "p90": 203.48799973726273, - "p95": 206.59200102090836, - "p99": 217.72799640893936 + "p50": 1400.3199934959412, + "p90": 1441.8559670448303, + "p95": 1470.4320430755615, + "p99": 1521.0239887237549 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, "correct": true, "samplesPooled": 600, "trials": 3 }, { - "tokensPerRank": 128, - "globalTokens": 1024, + "tokensPerRank": 4096, + "globalTokens": 32768, "dispatch": { - "p50": 130.46400249004364, - "p90": 136.9280070066452, - "p95": 139.23199474811554, - "p99": 143.5839980840683 + "p50": 992.031991481781, + "p90": 1004.863977432251, + "p95": 1008.7360143661499, + "p99": 1018.0480480194092 }, "combine": { - "p50": 114.78400230407715, - "p90": 120.83200365304947, - "p95": 122.11199849843979, - "p99": 122.8799968957901 + "p50": 1615.7439947128296, + "p90": 1622.4639415740967, + "p95": 1625.1519918441772, + "p99": 1631.935954093933 }, "roundtrip": { - "p50": 211.71200275421143, - "p90": 219.35999393463135, - "p95": 221.91999852657318, - "p99": 235.00800132751465 + "p50": 3490.72003364563, + "p90": 3504.6401023864746, + "p95": 3512.00008392334, + "p99": 3539.936065673828 }, "isolatedSum": { - "p50": 245.2480047941208, - "p90": 257.7600106596947, - "p95": 261.3439932465553, - "p99": 266.4639949798584 + "p50": 2607.7759861946106, + "p90": 2627.3279190063477, + "p95": 2633.888006210327, + "p99": 2649.9840021133423 }, "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, "correct": true, "samplesPooled": 600, "trials": 3 } ] + } + ], + "failures": [ + { + "id": "cxf-152e9bea", + "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "generatedAt": "2026-06-29T13:56:37.073274+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "gb200", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + } }, { - "id": "cx-3b04d344", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h100_42947950", - "comparisonKey": "8bd0272e65400ebd", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:11.747577+00:00", + "id": "cxf-ef4bad88", + "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "generatedAt": "2026-06-29T13:57:26.133416+00:00", + "publicationStatus": "diagnostic", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", + "sku": "gb200", "backend": "deepep", "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "zeros", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", + "config": "fp8/ll/runtime-visible", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, "run": { - "id": "28272113941", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272113941", - "createdAt": "2026-06-27T00:05:11.747577+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.15200215578079, - "p90": 103.67999970912933, - "p95": 105.85600137710571, - "p99": 108.99200290441513 - }, - "combine": { - "p50": 79.64800298213959, - "p90": 82.33600109815598, - "p95": 86.84799820184708, - "p99": 87.96799927949905 - }, - "roundtrip": { - "p50": 151.8400013446808, - "p90": 158.01599621772766, - "p95": 160.76800227165222, - "p99": 165.3120070695877 - }, - "isolatedSum": { - "p50": 176.80000513792038, - "p90": 186.0160008072853, - "p95": 192.7039995789528, - "p99": 196.96000218391418 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.28000313043594, - "p90": 103.71199995279312, - "p95": 106.4319983124733, - "p99": 121.63200229406357 - }, - "combine": { - "p50": 79.93599772453308, - "p90": 87.39200234413147, - "p95": 87.93599903583527, - "p99": 90.04800021648407 - }, - "roundtrip": { - "p50": 153.72799336910248, - "p90": 159.55199301242828, - "p95": 160.7999950647354, - "p99": 165.6000018119812 - }, - "isolatedSum": { - "p50": 177.21600085496902, - "p90": 191.1040022969246, - "p95": 194.36799734830856, - "p99": 211.68000251054764 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 104.00000214576721, - "p90": 108.44799876213074, - "p95": 111.68000102043152, - "p99": 126.75200402736664 - }, - "combine": { - "p50": 87.99999952316284, - "p90": 93.44000369310379, - "p95": 95.87199985980988, - "p99": 97.59999811649323 - }, - "roundtrip": { - "p50": 161.8880033493042, - "p90": 168.64000260829926, - "p95": 170.0800061225891, - "p99": 175.99999904632568 - }, - "isolatedSum": { - "p50": 192.00000166893005, - "p90": 201.88800245523453, - "p95": 207.5520008802414, - "p99": 224.35200214385986 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 130.0159990787506, - "p90": 137.7280056476593, - "p95": 138.7840062379837, - "p99": 142.2719955444336 - }, - "combine": { - "p50": 115.167997777462, - "p90": 120.54400146007538, - "p95": 120.95999717712402, - "p99": 123.87199699878693 - }, - "roundtrip": { - "p50": 212.47999370098114, - "p90": 216.63999557495117, - "p95": 218.1439995765686, - "p99": 221.47199511528015 - }, - "isolatedSum": { - "p50": 245.18399685621262, - "p90": 258.2720071077347, - "p95": 259.7440034151077, - "p99": 266.1439925432205 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] + "id": "28374335449", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449", + "createdAt": "2026-06-29T13:08:20Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + } }, { - "id": "cx-d0428a76", - "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ff7906f8", - "comparisonKey": "e3488cf5058170e6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:28.813270+00:00", + "id": "cxf-70ad6a68", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "generatedAt": "2026-06-29T13:53:46.301476+00:00", + "publicationStatus": "diagnostic", "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", + "sku": "gb300", "backend": "deepep", "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + } + }, + { + "id": "cxf-6ecc9670", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "generatedAt": "2026-06-29T13:55:19.539361+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/runtime-visible", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28374342409", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409", + "createdAt": "2026-06-29T13:08:27Z", + "sha": "38890f652c38d280794346b6b5b951b9b380a24a" + } + } + ], + "summaryCards": [ + { + "title": "Best backend · decode EP8", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "Best backend · prefill EP8", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "LL -> normal crossover", + "value": "T~128 tok/rank", + "sub": "GB200 EP8 bf16 · normal RT p50 wins above this" + }, + { + "title": "Resource-normalized winner", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "Backend-default winner", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "Most unstable config", + "value": "GB200 · deepep prefill", + "sub": "2.89x p99 under zipf vs uniform", + "warning": true + }, + { + "title": "Invalid / diagnostic cases", + "value": "4", + "sub": "see Evidence failed table", + "warning": true, + "href": "#tab-evidence" + } + ], + "decision": { + "budgetsUs": [100, 250, 500], + "maxTokensUnderBudget": [], + "recommendations": [ + { + "id": "cxr-0f8e2a04", + "sku": "gb200", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 100.3, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", + "epSize": 8 }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false + { + "id": "cxr-3109b82a", + "sku": "gb200", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 137.2, + "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned", + "epSize": 8 }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 + { + "id": "cxr-1445ce8d", + "sku": "gb300", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 125.1, + "config": "bf16/normal/layout-and-dispatch-v1/zipf-moderate/tuned", + "epSize": 8 }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271559607", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271559607", - "createdAt": "2026-06-26T23:47:28.813270+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.79999947547913, - "p90": 103.39199751615524, - "p95": 104.80000078678131, - "p99": 109.43999886512756 - }, - "combine": { - "p50": 79.13599908351898, - "p90": 81.40800148248672, - "p95": 86.68799698352814, - "p99": 87.90399879217148 - }, - "roundtrip": { - "p50": 152.12799608707428, - "p90": 159.96800363063812, - "p95": 162.36799955368042, - "p99": 177.69600450992584 - }, - "isolatedSum": { - "p50": 175.9359985589981, - "p90": 184.79999899864197, - "p95": 191.48799777030945, - "p99": 197.34399765729904 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.23199850320816, - "p90": 101.98400169610977, - "p95": 103.84000092744827, - "p99": 108.35199803113937 - }, - "combine": { - "p50": 72.54400104284286, - "p90": 81.40800148248672, - "p95": 82.62400329113007, - "p99": 87.77599781751633 - }, - "roundtrip": { - "p50": 129.08799946308136, - "p90": 158.2079976797104, - "p95": 159.58400070667267, - "p99": 165.02399742603302 - }, - "isolatedSum": { - "p50": 143.77599954605103, - "p90": 183.3920031785965, - "p95": 186.46400421857834, - "p99": 196.1279958486557 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 70.52800059318542, - "p90": 99.84000027179718, - "p95": 105.72800040245056, - "p99": 115.07199704647064 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 80.99199831485748, - "p95": 86.94399893283844, - "p99": 103.55199873447418 - }, - "roundtrip": { - "p50": 129.43999469280243, - "p90": 156.19200468063354, - "p95": 159.07199680805206, - "p99": 162.56000101566315 - }, - "isolatedSum": { - "p50": 143.51999759674072, - "p90": 180.83199858665466, - "p95": 192.671999335289, - "p99": 218.62399578094482 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.54399752616882, - "p90": 101.3759970664978, - "p95": 103.61599922180176, - "p99": 111.26399785280228 - }, - "combine": { - "p50": 79.52000200748444, - "p90": 87.13600039482117, - "p95": 87.64799684286118, - "p99": 88.73599767684937 - }, - "roundtrip": { - "p50": 152.16000378131866, - "p90": 159.39199924468994, - "p95": 161.15200519561768, - "p99": 170.52799463272095 - }, - "isolatedSum": { - "p50": 176.06399953365326, - "p90": 188.51199746131897, - "p95": 191.26399606466293, - "p99": 199.99999552965164 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.3520035147667, - "p90": 101.75999999046326, - "p95": 104.89600151777267, - "p99": 110.11199653148651 - }, - "combine": { - "p50": 84.48000252246857, - "p90": 88.03199976682663, - "p95": 89.21600133180618, - "p99": 95.23200243711472 - }, - "roundtrip": { - "p50": 153.05599570274353, - "p90": 160.288006067276, - "p95": 162.432000041008, - "p99": 171.2000072002411 - }, - "isolatedSum": { - "p50": 180.83200603723526, - "p90": 189.7919997572899, - "p95": 194.11200284957886, - "p99": 205.34399896860123 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 89.9519994854927, - "p90": 104.35199737548828, - "p95": 106.65600001811981, - "p99": 117.85600334405899 - }, - "combine": { - "p50": 81.216000020504, - "p90": 92.19200164079666, - "p95": 95.39200365543365, - "p99": 96.0640013217926 - }, - "roundtrip": { - "p50": 141.05600118637085, - "p90": 168.2880073785782, - "p95": 169.5680022239685, - "p99": 174.40000176429749 - }, - "isolatedSum": { - "p50": 171.1679995059967, - "p90": 196.54399901628494, - "p95": 202.04800367355347, - "p99": 213.9200046658516 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 111.51999980211258, - "p90": 119.00799721479416, - "p95": 121.44000083208084, - "p99": 126.56000256538391 - }, - "combine": { - "p50": 95.0080007314682, - "p90": 103.04000228643417, - "p95": 103.35999727249146, - "p99": 104.92800176143646 - }, - "roundtrip": { - "p50": 164.63999450206757, - "p90": 182.3039948940277, - "p95": 185.12000143527985, - "p99": 188.7039989233017 - }, - "isolatedSum": { - "p50": 206.52800053358078, - "p90": 222.04799950122833, - "p95": 224.7999981045723, - "p99": 231.48800432682037 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.99199676513672, - "p90": 133.2480013370514, - "p95": 135.51999628543854, - "p99": 140.6400054693222 - }, - "combine": { - "p50": 106.88000172376633, - "p90": 119.55200135707855, - "p95": 120.2239990234375, - "p99": 127.55200266838074 - }, - "roundtrip": { - "p50": 199.3280053138733, - "p90": 215.45599400997162, - "p95": 217.56799519062042, - "p99": 258.91199707984924 - }, - "isolatedSum": { - "p50": 223.87199848890305, - "p90": 252.80000269412994, - "p95": 255.74399530887604, - "p99": 268.19200813770294 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-79a82113", - "identity": "h100|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h100_42947950", - "comparisonKey": "d4720c9e1313f28d", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:34.351891+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287499275", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287499275", - "createdAt": "2026-06-27T11:13:34.351891+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.09600156545639, - "p90": 104.25599664449692, - "p95": 107.744000852108, - "p99": 122.81599640846252 - }, - "combine": { - "p50": 78.75200361013412, - "p90": 81.24800026416779, - "p95": 81.85599744319916, - "p99": 87.3280018568039 - }, - "roundtrip": { - "p50": 149.3760049343109, - "p90": 157.72800147533417, - "p95": 160.863995552063, - "p99": 184.7359985113144 - }, - "isolatedSum": { - "p50": 174.84800517559052, - "p90": 185.5039969086647, - "p95": 189.59999829530716, - "p99": 210.14399826526642 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.82400333881378, - "p90": 103.20000350475311, - "p95": 108.19199681282043, - "p99": 178.3359944820404 - }, - "combine": { - "p50": 72.83200323581696, - "p90": 80.9599980711937, - "p95": 82.14399963617325, - "p99": 90.4960036277771 - }, - "roundtrip": { - "p50": 129.02399897575378, - "p90": 156.76799416542053, - "p95": 159.39199924468994, - "p99": 176.64000391960144 - }, - "isolatedSum": { - "p50": 146.65600657463074, - "p90": 184.1600015759468, - "p95": 190.33599644899368, - "p99": 268.8319981098175 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.95999675989151, - "p90": 101.95200145244598, - "p95": 110.1439967751503, - "p99": 251.10399723052979 - }, - "combine": { - "p50": 72.67200201749802, - "p90": 81.50400221347809, - "p95": 82.43200182914734, - "p99": 87.42400258779526 - }, - "roundtrip": { - "p50": 129.02399897575378, - "p90": 155.32800555229187, - "p95": 159.61599349975586, - "p99": 171.87200486660004 - }, - "isolatedSum": { - "p50": 145.63199877738953, - "p90": 183.45600366592407, - "p95": 192.57599860429764, - "p99": 338.52799981832504 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 95.13600170612335, - "p90": 103.2319962978363, - "p95": 106.46399855613708, - "p99": 127.93600559234619 - }, - "combine": { - "p50": 78.65600287914276, - "p90": 81.727996468544, - "p95": 86.496002972126, - "p99": 88.16000074148178 - }, - "roundtrip": { - "p50": 150.751993060112, - "p90": 161.50400042533875, - "p95": 208.41600000858307, - "p99": 230.20799458026886 - }, - "isolatedSum": { - "p50": 173.7920045852661, - "p90": 184.9599927663803, - "p95": 192.9600015282631, - "p99": 216.09600633382797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.19200229644775, - "p90": 101.50399804115295, - "p95": 103.96800190210342, - "p99": 112.57600039243698 - }, - "combine": { - "p50": 81.91999793052673, - "p90": 88.19200098514557, - "p95": 89.1840010881424, - "p99": 90.40000289678574 - }, - "roundtrip": { - "p50": 151.0400027036667, - "p90": 159.4880074262619, - "p95": 161.76000237464905, - "p99": 199.77599382400513 - }, - "isolatedSum": { - "p50": 178.1120002269745, - "p90": 189.69599902629852, - "p95": 193.15200299024582, - "p99": 202.97600328922272 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.01599997282028, - "p90": 104.22399640083313, - "p95": 106.39999806880951, - "p99": 117.47200042009354 - }, - "combine": { - "p50": 81.24800026416779, - "p90": 90.36800265312195, - "p95": 94.59199756383896, - "p99": 96.00000083446503 - }, - "roundtrip": { - "p50": 142.81600713729858, - "p90": 168.60799491405487, - "p95": 176.06399953365326, - "p99": 256.8640112876892 - }, - "isolatedSum": { - "p50": 171.26400023698807, - "p90": 194.59199905395508, - "p95": 200.99199563264847, - "p99": 213.47200125455856 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 109.40799862146378, - "p90": 139.26400244235992, - "p95": 141.7279988527298, - "p99": 146.84799313545227 - }, - "combine": { - "p50": 95.83999961614609, - "p90": 112.57600039243698, - "p95": 115.35999923944473, - "p99": 119.77600306272507 - }, - "roundtrip": { - "p50": 173.40800166130066, - "p90": 206.68800175189972, - "p95": 210.4959934949875, - "p99": 213.3760005235672 - }, - "isolatedSum": { - "p50": 205.24799823760986, - "p90": 251.8400028347969, - "p95": 257.08799809217453, - "p99": 266.62399619817734 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.07199704647064, - "p90": 131.96800649166107, - "p95": 135.19999384880066, - "p99": 139.13600146770477 - }, - "combine": { - "p50": 106.4319983124733, - "p90": 119.29599940776825, - "p95": 120.06399780511856, - "p99": 122.11199849843979 - }, - "roundtrip": { - "p50": 202.5279998779297, - "p90": 216.0319983959198, - "p95": 217.66400337219238, - "p99": 221.50400280952454 - }, - "isolatedSum": { - "p50": 221.50399535894394, - "p90": 251.26400589942932, - "p95": 255.26399165391922, - "p99": 261.24799996614456 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e96d722b", - "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h100_ff7906f8", - "comparisonKey": "c69daa1ab05193b6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:56.132475+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271667766", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271667766", - "createdAt": "2026-06-26T23:51:56.132475+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.09600156545639, - "p90": 102.81600058078766, - "p95": 104.54399883747101, - "p99": 110.59200018644333 - }, - "combine": { - "p50": 79.03999835252762, - "p90": 81.50400221347809, - "p95": 82.11199939250946, - "p99": 87.90399879217148 - }, - "roundtrip": { - "p50": 145.56799829006195, - "p90": 153.31199765205383, - "p95": 155.71199357509613, - "p99": 159.39199924468994 - }, - "isolatedSum": { - "p50": 175.135999917984, - "p90": 184.32000279426575, - "p95": 186.65599822998047, - "p99": 198.4959989786148 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.03999704122543, - "p90": 101.6319990158081, - "p95": 102.65599936246872, - "p99": 106.62399977445602 - }, - "combine": { - "p50": 72.28799909353256, - "p90": 80.54400235414505, - "p95": 81.40800148248672, - "p99": 87.00799942016602 - }, - "roundtrip": { - "p50": 129.18399274349213, - "p90": 152.70400047302246, - "p95": 156.92800283432007, - "p99": 160.76800227165222 - }, - "isolatedSum": { - "p50": 143.327996134758, - "p90": 182.17600136995316, - "p95": 184.06400084495544, - "p99": 193.63199919462204 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.57600128650665, - "p90": 101.02400183677673, - "p95": 103.61599922180176, - "p99": 110.81600189208984 - }, - "combine": { - "p50": 72.25599884986877, - "p90": 79.96799796819687, - "p95": 86.71999722719193, - "p99": 87.64799684286118 - }, - "roundtrip": { - "p50": 129.92000579833984, - "p90": 161.3759994506836, - "p95": 162.30399906635284, - "p99": 166.4319932460785 - }, - "isolatedSum": { - "p50": 144.83200013637543, - "p90": 180.9919998049736, - "p95": 190.33599644899368, - "p99": 198.46399873495102 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.3200032711029, - "p90": 101.1200025677681, - "p95": 102.52799838781357, - "p99": 109.11999642848969 - }, - "combine": { - "p50": 79.23199981451035, - "p90": 82.11199939250946, - "p95": 87.00799942016602, - "p99": 87.71199733018875 - }, - "roundtrip": { - "p50": 151.5199989080429, - "p90": 159.2320054769516, - "p95": 160.60799360275269, - "p99": 165.21599888801575 - }, - "isolatedSum": { - "p50": 175.55200308561325, - "p90": 183.23200196027756, - "p95": 189.53599780797958, - "p99": 196.83199375867844 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.22400254011154, - "p90": 102.36799716949463, - "p95": 105.05600273609161, - "p99": 110.30399799346924 - }, - "combine": { - "p50": 81.88799768686295, - "p90": 88.28800171613693, - "p95": 89.31200206279755, - "p99": 94.43199634552002 - }, - "roundtrip": { - "p50": 152.48000621795654, - "p90": 160.09600460529327, - "p95": 164.19200599193573, - "p99": 172.83199727535248 - }, - "isolatedSum": { - "p50": 178.1120002269745, - "p90": 190.65599888563156, - "p95": 194.36800479888916, - "p99": 204.73599433898926 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.36800265312195, - "p90": 102.59199887514114, - "p95": 104.3199971318245, - "p99": 108.03200304508209 - }, - "combine": { - "p50": 80.92799782752991, - "p90": 90.01599997282028, - "p95": 95.13600170612335, - "p99": 96.41599655151367 - }, - "roundtrip": { - "p50": 142.46399700641632, - "p90": 169.95200514793396, - "p95": 174.55999553203583, - "p99": 181.7920058965683 - }, - "isolatedSum": { - "p50": 171.29600048065186, - "p90": 192.60799884796143, - "p95": 199.45599883794785, - "p99": 204.44799959659576 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 95.71199864149094, - "p90": 116.54400080442429, - "p95": 118.59200149774551, - "p99": 125.63200294971466 - }, - "combine": { - "p50": 89.72799777984619, - "p90": 103.74400019645691, - "p95": 104.22399640083313, - "p99": 106.04800283908844 - }, - "roundtrip": { - "p50": 165.66400229930878, - "p90": 185.34399569034576, - "p95": 186.97600066661835, - "p99": 190.08000195026398 - }, - "isolatedSum": { - "p50": 185.43999642133713, - "p90": 220.2880010008812, - "p95": 222.81599789857864, - "p99": 231.6800057888031 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 113.11999708414078, - "p90": 133.82400572299957, - "p95": 137.05599308013916, - "p99": 140.28799533843994 - }, - "combine": { - "p50": 106.46399855613708, - "p90": 120.12799829244614, - "p95": 120.51200121641159, - "p99": 120.99199742078781 - }, - "roundtrip": { - "p50": 196.8960016965866, - "p90": 216.99200570583344, - "p95": 218.9120054244995, - "p99": 220.99199891090393 - }, - "isolatedSum": { - "p50": 219.58399564027786, - "p90": 253.9520040154457, - "p95": 257.56799429655075, - "p99": 261.27999275922775 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-62470199", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", - "colorKey": "h100_b681a3a4", - "comparisonKey": "03a9af950bebf5a9", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:00.195927+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups@s1", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s1", - "routingStep": 1, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272331593", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272331593", - "createdAt": "2026-06-27T00:12:00.195927+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 95.20000219345093, - "p90": 101.24800354242325, - "p95": 103.42399775981903, - "p99": 115.84000289440155 - }, - "combine": { - "p50": 79.29600030183792, - "p90": 80.92799782752991, - "p95": 81.79199695587158, - "p99": 88.03199976682663 - }, - "roundtrip": { - "p50": 148.03199470043182, - "p90": 153.24799716472626, - "p95": 156.41599893569946, - "p99": 176.06399953365326 - }, - "isolatedSum": { - "p50": 174.49600249528885, - "p90": 182.17600136995316, - "p95": 185.2159947156906, - "p99": 203.87200266122818 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 99.5199978351593, - "p90": 107.39199817180634, - "p95": 118.72000247240067, - "p99": 229.95199263095856 - }, - "combine": { - "p50": 87.52000331878662, - "p90": 89.34400230646133, - "p95": 92.3520028591156, - "p99": 96.44799679517746 - }, - "roundtrip": { - "p50": 155.5519998073578, - "p90": 160.70400178432465, - "p95": 164.76799547672272, - "p99": 175.07199943065643 - }, - "isolatedSum": { - "p50": 187.04000115394592, - "p90": 196.73600047826767, - "p95": 211.07200533151627, - "p99": 326.399989426136 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 133.82400572299957, - "p90": 141.08799397945404, - "p95": 142.62400567531586, - "p99": 146.40000462532043 - }, - "combine": { - "p50": 120.28799951076508, - "p90": 122.56000190973282, - "p95": 127.10399925708771, - "p99": 136.00000739097595 - }, - "roundtrip": { - "p50": 221.88800573349, - "p90": 225.79200565814972, - "p95": 227.26400196552277, - "p99": 233.024001121521 - }, - "isolatedSum": { - "p50": 254.11200523376465, - "p90": 263.64799588918686, - "p95": 269.72800493240356, - "p99": 282.4000120162964 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-62dda1f3", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", - "colorKey": "h100_b981a85d", - "comparisonKey": "03a9af950bebf5a9", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:08.462042+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups@s2", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s2", - "routingStep": 2, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "3cd13eac5b27759", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272335347", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272335347", - "createdAt": "2026-06-27T00:12:08.462042+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 91.96799993515015, - "p90": 101.85600072145462, - "p95": 102.88000106811523, - "p99": 111.00800335407257 - }, - "combine": { - "p50": 76.60800218582153, - "p90": 81.60000294446945, - "p95": 82.17599987983704, - "p99": 85.21600067615509 - }, - "roundtrip": { - "p50": 146.7839926481247, - "p90": 152.6080071926117, - "p95": 154.27200496196747, - "p99": 160.99199652671814 - }, - "isolatedSum": { - "p50": 168.57600212097168, - "p90": 183.45600366592407, - "p95": 185.05600094795227, - "p99": 196.22400403022766 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 98.88000041246414, - "p90": 104.06400263309479, - "p95": 106.30399733781815, - "p99": 139.42399621009827 - }, - "combine": { - "p50": 84.60800349712372, - "p90": 86.30400151014328, - "p95": 86.81599795818329, - "p99": 92.51199662685394 - }, - "roundtrip": { - "p50": 154.65599298477173, - "p90": 160.64000129699707, - "p95": 162.59199380874634, - "p99": 168.09600591659546 - }, - "isolatedSum": { - "p50": 183.48800390958786, - "p90": 190.36800414323807, - "p95": 193.11999529600143, - "p99": 231.9359928369522 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 141.50400459766388, - "p90": 146.68799936771393, - "p95": 147.67999947071075, - "p99": 152.41600573062897 - }, - "combine": { - "p50": 118.17599833011627, - "p90": 122.56000190973282, - "p95": 123.58400225639343, - "p99": 125.82400441169739 - }, - "roundtrip": { - "p50": 227.13600099086761, - "p90": 231.23200237751007, - "p95": 232.92799293994904, - "p99": 237.05600202083588 - }, - "isolatedSum": { - "p50": 259.68000292778015, - "p90": 269.24800127744675, - "p95": 271.2640017271042, - "p99": 278.24001014232635 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f337d9a1", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", - "colorKey": "h100_b881a6ca", - "comparisonKey": "03a9af950bebf5a9", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:29.724404+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · alternating-groups@s3", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272338723", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272338723", - "createdAt": "2026-06-27T00:12:29.724404+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 94.84799951314926, - "p90": 121.37600034475327, - "p95": 148.8959938287735, - "p99": 189.56799805164337 - }, - "combine": { - "p50": 79.58400249481201, - "p90": 96.6079980134964, - "p95": 113.0559965968132, - "p99": 123.77600371837616 - }, - "roundtrip": { - "p50": 148.44800531864166, - "p90": 183.20000171661377, - "p95": 218.78400444984436, - "p99": 249.79199469089508 - }, - "isolatedSum": { - "p50": 174.43200200796127, - "p90": 217.98399835824966, - "p95": 261.9519904255867, - "p99": 313.34400177001953 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 100.41599720716476, - "p90": 127.55200266838074, - "p95": 156.5759927034378, - "p99": 182.81599879264832 - }, - "combine": { - "p50": 87.8399983048439, - "p90": 103.93600165843964, - "p95": 120.38400024175644, - "p99": 128.89599800109863 - }, - "roundtrip": { - "p50": 156.99200332164764, - "p90": 193.7599927186966, - "p95": 223.7119972705841, - "p99": 247.23200500011444 - }, - "isolatedSum": { - "p50": 188.25599551200867, - "p90": 231.48800432682037, - "p95": 276.95999294519424, - "p99": 311.71199679374695 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 134.2719942331314, - "p90": 147.39200472831726, - "p95": 173.567995429039, - "p99": 188.1919950246811 - }, - "combine": { - "p50": 120.44800072908401, - "p90": 138.62399756908417, - "p95": 152.38399803638458, - "p99": 160.96000373363495 - }, - "roundtrip": { - "p50": 222.6880043745041, - "p90": 247.80799448490143, - "p95": 264.6079957485199, - "p99": 279.35999631881714 - }, - "isolatedSum": { - "p50": 254.71999496221542, - "p90": 286.0160022974014, - "p95": 325.9519934654236, - "p99": 349.15199875831604 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-cf5bc26b", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", - "colorKey": "h100_16047c28", - "comparisonKey": "64192d9d479bdd44", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:33.118563+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2279937619f3971", - "workloadId": "set:4:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271788376", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271788376", - "createdAt": "2026-06-26T23:54:33.118563+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 94.68799829483032, - "p90": 101.1200025677681, - "p95": 104.41599786281586, - "p99": 111.10399663448334 - }, - "combine": { - "p50": 80.99199831485748, - "p90": 86.84799820184708, - "p95": 87.8399983048439, - "p99": 89.9519994854927 - }, - "roundtrip": { - "p50": 150.30400454998016, - "p90": 156.95999562740326, - "p95": 159.67999398708344, - "p99": 164.15999829769135 - }, - "isolatedSum": { - "p50": 175.6799966096878, - "p90": 187.96800076961517, - "p95": 192.25599616765976, - "p99": 201.05599611997604 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 95.0080007314682, - "p90": 100.00000149011612, - "p95": 102.68799960613251, - "p99": 108.57599973678589 - }, - "combine": { - "p50": 81.727996468544, - "p90": 88.51200342178345, - "p95": 89.37600255012512, - "p99": 90.59199690818787 - }, - "roundtrip": { - "p50": 150.65599977970123, - "p90": 159.58400070667267, - "p95": 161.50400042533875, - "p99": 167.42399334907532 - }, - "isolatedSum": { - "p50": 176.7359972000122, - "p90": 188.51200491189957, - "p95": 192.06400215625763, - "p99": 199.16799664497375 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 104.63999956846237, - "p90": 112.28799819946289, - "p95": 114.14399743080139, - "p99": 119.84000355005264 - }, - "combine": { - "p50": 92.25600212812424, - "p90": 97.69599884748459, - "p95": 98.39999675750732, - "p99": 104.47999835014343 - }, - "roundtrip": { - "p50": 164.000004529953, - "p90": 171.64799571037292, - "p95": 175.4560023546219, - "p99": 228.4799963235855 - }, - "isolatedSum": { - "p50": 196.8960016965866, - "p90": 209.98399704694748, - "p95": 212.54399418830872, - "p99": 224.32000190019608 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 143.93599331378937, - "p90": 148.00000190734863, - "p95": 149.79200065135956, - "p99": 155.68000078201294 - }, - "combine": { - "p50": 132.06399977207184, - "p90": 138.75199854373932, - "p95": 139.29599523544312, - "p99": 145.6959992647171 - }, - "roundtrip": { - "p50": 241.2479966878891, - "p90": 247.6480007171631, - "p95": 249.15200471878052, - "p99": 252.76800990104675 - }, - "isolatedSum": { - "p50": 275.9999930858612, - "p90": 286.75200045108795, - "p95": 289.0879958868027, - "p99": 301.37600004673004 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4d49fd79", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", - "colorKey": "h100_16047c28", - "comparisonKey": "64192d9d479bdd44", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:13.030328+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271931349", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271931349", - "createdAt": "2026-06-26T23:59:13.030328+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.93600034713745, - "p90": 101.56799852848053, - "p95": 103.13600301742554, - "p99": 107.744000852108 - }, - "combine": { - "p50": 80.89599758386612, - "p90": 87.07199990749359, - "p95": 87.8399983048439, - "p99": 89.40800279378891 - }, - "roundtrip": { - "p50": 151.42400562763214, - "p90": 160.12799739837646, - "p95": 172.86400496959686, - "p99": 232.12799429893494 - }, - "isolatedSum": { - "p50": 176.83199793100357, - "p90": 188.63999843597412, - "p95": 190.97600132226944, - "p99": 197.1520036458969 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 96.47999703884125, - "p90": 103.42399775981903, - "p95": 107.71200060844421, - "p99": 161.40800714492798 - }, - "combine": { - "p50": 81.11999928951263, - "p90": 87.61599659919739, - "p95": 89.1840010881424, - "p99": 185.5359971523285 - }, - "roundtrip": { - "p50": 153.43999862670898, - "p90": 159.4880074262619, - "p95": 163.71199488639832, - "p99": 313.1200075149536 - }, - "isolatedSum": { - "p50": 177.59999632835388, - "p90": 191.03999435901642, - "p95": 196.8960016965866, - "p99": 346.94400429725647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 95.32800316810608, - "p90": 100.3199964761734, - "p95": 102.1760031580925, - "p99": 106.84800148010254 - }, - "combine": { - "p50": 80.32000064849854, - "p90": 84.22400057315826, - "p95": 88.41600269079208, - "p99": 90.14400094747543 - }, - "roundtrip": { - "p50": 150.94399452209473, - "p90": 158.4639996290207, - "p95": 159.90400314331055, - "p99": 163.32800686359406 - }, - "isolatedSum": { - "p50": 175.64800381660461, - "p90": 184.54399704933167, - "p95": 190.59200584888458, - "p99": 196.99200242757797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 94.87999975681305, - "p90": 98.43199700117111, - "p95": 100.3199964761734, - "p99": 105.3759977221489 - }, - "combine": { - "p50": 80.54400235414505, - "p90": 87.20000088214874, - "p95": 88.73599767684937, - "p99": 89.82399851083755 - }, - "roundtrip": { - "p50": 152.0960032939911, - "p90": 158.65600109100342, - "p95": 160.16000509262085, - "p99": 166.97600483894348 - }, - "isolatedSum": { - "p50": 175.4240021109581, - "p90": 185.63199788331985, - "p95": 189.05599415302277, - "p99": 195.19999623298645 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 95.93600034713745, - "p90": 103.20000350475311, - "p95": 106.20799660682678, - "p99": 168.57600212097168 - }, - "combine": { - "p50": 84.3840017914772, - "p90": 89.40800279378891, - "p95": 89.75999802350998, - "p99": 94.84799951314926 - }, - "roundtrip": { - "p50": 154.84799444675446, - "p90": 161.02400422096252, - "p95": 163.7440025806427, - "p99": 497.50399589538574 - }, - "isolatedSum": { - "p50": 180.32000213861465, - "p90": 192.60800629854202, - "p95": 195.96799463033676, - "p99": 263.42400163412094 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.16800326108932, - "p90": 109.37599837779999, - "p95": 110.75200140476227, - "p99": 113.43999952077866 - }, - "combine": { - "p50": 88.79999816417694, - "p90": 95.74399888515472, - "p95": 97.120001912117, - "p99": 97.95200079679489 - }, - "roundtrip": { - "p50": 161.6639941930771, - "p90": 167.1999990940094, - "p95": 168.73599588871002, - "p99": 172.89599776268005 - }, - "isolatedSum": { - "p50": 191.96800142526627, - "p90": 205.1199972629547, - "p95": 207.87200331687927, - "p99": 211.39200031757355 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 119.6800023317337, - "p90": 128.00000607967377, - "p95": 129.05600666999817, - "p99": 133.91999900341034 - }, - "combine": { - "p50": 103.16800326108932, - "p90": 106.55999928712845, - "p95": 107.90400207042694, - "p99": 113.63200098276138 - }, - "roundtrip": { - "p50": 186.71999871730804, - "p90": 194.65599954128265, - "p95": 196.31999731063843, - "p99": 199.48799908161163 - }, - "isolatedSum": { - "p50": 222.84800559282303, - "p90": 234.56000536680222, - "p95": 236.9600087404251, - "p99": 247.55199998617172 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 137.66400516033173, - "p90": 146.7200070619583, - "p95": 147.8080004453659, - "p99": 151.10400319099426 - }, - "combine": { - "p50": 131.1360001564026, - "p90": 137.82399892807007, - "p95": 138.46400380134583, - "p99": 145.28000354766846 - }, - "roundtrip": { - "p50": 241.40800535678864, - "p90": 248.60799312591553, - "p95": 250.59199333190918, - "p99": 258.5600018501282 - }, - "isolatedSum": { - "p50": 268.8000053167343, - "p90": 284.5440059900284, - "p95": 286.27200424671173, - "p99": 296.3840067386627 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-38b8b0c2", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", - "colorKey": "h100_0c515f8b", - "comparisonKey": "47e8e48c891afabb", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:43.774495+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d02a66236b524b8", - "workloadId": "set:4:2eebbed158fe1320", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271795429", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271795429", - "createdAt": "2026-06-26T23:54:43.774495+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.03200107812881, - "p90": 102.49599814414978, - "p95": 105.66399991512299, - "p99": 117.88800358772278 - }, - "combine": { - "p50": 71.45600020885468, - "p90": 73.98399710655212, - "p95": 77.18399912118912, - "p99": 81.56800270080566 - }, - "roundtrip": { - "p50": 142.04800128936768, - "p90": 149.98400211334229, - "p95": 151.45599842071533, - "p99": 159.07199680805206 - }, - "isolatedSum": { - "p50": 167.4880012869835, - "p90": 176.4799952507019, - "p95": 182.8479990363121, - "p99": 199.45600628852844 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.9760011434555, - "p90": 106.62399977445602, - "p95": 110.07999628782272, - "p99": 123.00799787044525 - }, - "combine": { - "p50": 71.32799923419952, - "p90": 73.69600236415863, - "p95": 78.52800190448761, - "p99": 80.22399991750717 - }, - "roundtrip": { - "p50": 143.26399564743042, - "p90": 150.14399588108063, - "p95": 153.1520038843155, - "p99": 162.88000345230103 - }, - "isolatedSum": { - "p50": 170.30400037765503, - "p90": 180.32000213861465, - "p95": 188.60799819231033, - "p99": 203.23199778795242 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 100.09600222110748, - "p90": 107.61599987745285, - "p95": 112.31999844312668, - "p99": 163.16799819469452 - }, - "combine": { - "p50": 79.71200346946716, - "p90": 87.16800063848495, - "p95": 87.74399757385254, - "p99": 95.8079993724823 - }, - "roundtrip": { - "p50": 154.01600301265717, - "p90": 161.47199273109436, - "p95": 164.5440012216568, - "p99": 176.83200538158417 - }, - "isolatedSum": { - "p50": 179.80800569057465, - "p90": 194.7840005159378, - "p95": 200.06399601697922, - "p99": 258.9759975671768 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 1, - "recvTokensMax": 32, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 104.38399761915207, - "p90": 108.44799876213074, - "p95": 111.455999314785, - "p99": 119.74400281906128 - }, - "combine": { - "p50": 83.26400071382523, - "p90": 88.03199976682663, - "p95": 88.22400122880936, - "p99": 92.83199906349182 - }, - "roundtrip": { - "p50": 154.9759954214096, - "p90": 161.18399798870087, - "p95": 165.0879979133606, - "p99": 170.01600563526154 - }, - "isolatedSum": { - "p50": 187.6479983329773, - "p90": 196.47999852895737, - "p95": 199.68000054359436, - "p99": 212.5760018825531 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-94696c7b", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", - "colorKey": "h100_c0c0ad86", - "comparisonKey": "00faf19eae8c1230", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:00.906485+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f0e66a15078595b", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271935069", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271935069", - "createdAt": "2026-06-27T00:00:00.906485+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.63199836015701, - "p90": 106.36799782514572, - "p95": 109.63200032711029, - "p99": 118.65600198507309 - }, - "combine": { - "p50": 71.45600020885468, - "p90": 78.94399762153625, - "p95": 79.42400127649307, - "p99": 82.24000036716461 - }, - "roundtrip": { - "p50": 145.4080045223236, - "p90": 154.23999726772308, - "p95": 155.64799308776855, - "p99": 157.98400342464447 - }, - "isolatedSum": { - "p50": 169.0879985690117, - "p90": 185.31199544668198, - "p95": 189.05600160360336, - "p99": 200.8960023522377 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 2, - "recvTokensMax": 3, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 77.08799839019775, - "p90": 104.06400263309479, - "p95": 105.66399991512299, - "p99": 111.1999973654747 - }, - "combine": { - "p50": 65.05600363016129, - "p90": 74.5600014925003, - "p95": 79.00799810886383, - "p99": 82.33600109815598 - }, - "roundtrip": { - "p50": 122.8799968957901, - "p90": 151.64799988269806, - "p95": 153.24799716472626, - "p99": 161.50400042533875 - }, - "isolatedSum": { - "p50": 142.14400202035904, - "p90": 178.6240041255951, - "p95": 184.67199802398682, - "p99": 193.53599846363068 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 2, - "recvTokensMax": 6, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 97.56799787282944, - "p90": 107.80800133943558, - "p95": 114.04799669981003, - "p99": 120.44800072908401 - }, - "combine": { - "p50": 65.69600105285645, - "p90": 78.87999713420868, - "p95": 79.32800054550171, - "p99": 87.13600039482117 - }, - "roundtrip": { - "p50": 123.99999797344208, - "p90": 158.720001578331, - "p95": 165.3439998626709, - "p99": 176.28799378871918 - }, - "isolatedSum": { - "p50": 163.26399892568588, - "p90": 186.68799847364426, - "p95": 193.37599724531174, - "p99": 207.58400112390518 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 2, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.11200201511383, - "p90": 105.76000064611435, - "p95": 108.64000022411346, - "p99": 122.30399996042252 - }, - "combine": { - "p50": 72.22399860620499, - "p90": 79.1039988398552, - "p95": 80.38400113582611, - "p99": 87.0399996638298 - }, - "roundtrip": { - "p50": 145.28000354766846, - "p90": 152.54400670528412, - "p95": 155.39200603961945, - "p99": 160.38399934768677 - }, - "isolatedSum": { - "p50": 170.33600062131882, - "p90": 184.86399948596954, - "p95": 189.02400135993958, - "p99": 209.34399962425232 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 2, - "recvTokensMax": 24, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 98.52799773216248, - "p90": 109.69600081443787, - "p95": 117.34399944543839, - "p99": 131.45600259304047 - }, - "combine": { - "p50": 78.59200239181519, - "p90": 81.53600245714188, - "p95": 86.91199868917465, - "p99": 88.32000195980072 - }, - "roundtrip": { - "p50": 146.97599411010742, - "p90": 156.47999942302704, - "p95": 161.56800091266632, - "p99": 173.18400740623474 - }, - "isolatedSum": { - "p50": 177.12000012397766, - "p90": 191.23200327157974, - "p95": 204.25599813461304, - "p99": 219.7760045528412 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 2, - "recvTokensMax": 48, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 84.54400300979614, - "p90": 107.07200318574905, - "p95": 113.40799927711487, - "p99": 126.08000636100769 - }, - "combine": { - "p50": 71.10399752855301, - "p90": 80.57600259780884, - "p95": 87.13600039482117, - "p99": 95.51999717950821 - }, - "roundtrip": { - "p50": 127.93600559234619, - "p90": 151.7760008573532, - "p95": 154.40000593662262, - "p99": 161.56800091266632 - }, - "isolatedSum": { - "p50": 155.64800053834915, - "p90": 187.6480057835579, - "p95": 200.54399967193604, - "p99": 221.6000035405159 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 2, - "recvTokensMax": 96, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 100.35199671983719, - "p90": 113.37599903345108, - "p95": 126.49600207805634, - "p99": 162.1759980916977 - }, - "combine": { - "p50": 79.58400249481201, - "p90": 87.16800063848495, - "p95": 87.71199733018875, - "p99": 95.45599669218063 - }, - "roundtrip": { - "p50": 154.62400019168854, - "p90": 165.18400609493256, - "p95": 170.27199268341064, - "p99": 184.7359985113144 - }, - "isolatedSum": { - "p50": 179.9359992146492, - "p90": 200.54399967193604, - "p95": 214.2079994082451, - "p99": 257.6319947838783 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 2, - "recvTokensMax": 192, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 98.1760025024414, - "p90": 120.80000340938568, - "p95": 125.56800246238708, - "p99": 134.49600338935852 - }, - "combine": { - "p50": 87.77599781751633, - "p90": 96.0640013217926, - "p95": 97.69599884748459, - "p99": 107.35999792814255 - }, - "roundtrip": { - "p50": 160.70400178432465, - "p90": 178.3680021762848, - "p95": 184.1920018196106, - "p99": 190.62399864196777 - }, - "isolatedSum": { - "p50": 185.95200031995773, - "p90": 216.86400473117828, - "p95": 223.26400130987167, - "p99": 241.85600131750107 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b4d89049", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", - "colorKey": "h100_1c83c0b0", - "comparisonKey": "b84a29c0643a5455", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:11:39.736162+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "90042e0db6a8297", - "workloadId": "set:3:8fd05d9ebee41064", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272315381", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272315381", - "createdAt": "2026-06-27T00:11:39.736162+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.1760025024414, - "p90": 105.12000322341919, - "p95": 107.4879989027977, - "p99": 114.43199962377548 - }, - "combine": { - "p50": 81.216000020504, - "p90": 87.8399983048439, - "p95": 88.19200098514557, - "p99": 89.08800035715103 - }, - "roundtrip": { - "p50": 154.4959992170334, - "p90": 160.99199652671814, - "p95": 162.59199380874634, - "p99": 167.35999286174774 - }, - "isolatedSum": { - "p50": 179.3920025229454, - "p90": 192.9600015282631, - "p95": 195.67999988794327, - "p99": 203.5199999809265 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 104.3199971318245, - "p90": 109.98400300741196, - "p95": 111.77600175142288, - "p99": 118.81600320339203 - }, - "combine": { - "p50": 89.1840010881424, - "p90": 95.58399766683578, - "p95": 96.09600156545639, - "p99": 97.18400239944458 - }, - "roundtrip": { - "p50": 164.2560064792633, - "p90": 169.69600319862366, - "p95": 171.64799571037292, - "p99": 176.64000391960144 - }, - "isolatedSum": { - "p50": 193.5039982199669, - "p90": 205.56800067424774, - "p95": 207.87200331687927, - "p99": 216.0000056028366 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 137.28000223636627, - "p90": 146.11199498176575, - "p95": 149.6639996767044, - "p99": 152.19199657440186 - }, - "combine": { - "p50": 128.48000228405, - "p90": 130.14400005340576, - "p95": 130.65600395202637, - "p99": 136.57599687576294 - }, - "roundtrip": { - "p50": 231.10400140285492, - "p90": 236.4799976348877, - "p95": 238.11200261116028, - "p99": 242.88000166416168 - }, - "isolatedSum": { - "p50": 265.76000452041626, - "p90": 276.2559950351715, - "p95": 280.3200036287308, - "p99": 288.7679934501648 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-595b6f36", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", - "colorKey": "h100_55b1ee31", - "comparisonKey": "b84a29c0643a5455", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:11:41.163804+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s2", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s2", - "routingStep": 2, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "675e15b52e37958", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272321917", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272321917", - "createdAt": "2026-06-27T00:11:41.163804+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.24000298976898, - "p90": 103.96800190210342, - "p95": 106.30399733781815, - "p99": 111.07199639081955 - }, - "combine": { - "p50": 79.52000200748444, - "p90": 86.87999844551086, - "p95": 87.52000331878662, - "p99": 88.0960002541542 - }, - "roundtrip": { - "p50": 153.28000485897064, - "p90": 161.3759994506836, - "p95": 163.4880006313324, - "p99": 455.80801367759705 - }, - "isolatedSum": { - "p50": 177.76000499725342, - "p90": 190.8480003476143, - "p95": 193.82400065660477, - "p99": 199.16799664497375 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.84000092744827, - "p90": 109.0560033917427, - "p95": 110.1439967751503, - "p99": 113.88800293207169 - }, - "combine": { - "p50": 87.87199854850769, - "p90": 95.32800316810608, - "p95": 95.90400010347366, - "p99": 96.25600278377533 - }, - "roundtrip": { - "p50": 161.98399662971497, - "p90": 168.99199783802032, - "p95": 170.56000232696533, - "p99": 175.80799758434296 - }, - "isolatedSum": { - "p50": 191.71199947595596, - "p90": 204.38400655984879, - "p95": 206.04799687862396, - "p99": 210.14400571584702 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 136.80000603199005, - "p90": 145.4399973154068, - "p95": 146.68799936771393, - "p99": 149.4079977273941 - }, - "combine": { - "p50": 123.99999797344208, - "p90": 129.05600666999817, - "p95": 130.36799430847168, - "p99": 136.00000739097595 - }, - "roundtrip": { - "p50": 228.7999987602234, - "p90": 236.12800240516663, - "p95": 237.98400163650513, - "p99": 241.5039986371994 - }, - "isolatedSum": { - "p50": 260.80000400543213, - "p90": 274.49600398540497, - "p95": 277.0559936761856, - "p99": 285.40800511837006 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f5ba95c3", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", - "colorKey": "h100_54b1ec9e", - "comparisonKey": "b84a29c0643a5455", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:09.752348+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s3", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "82b2963fc322419", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272325031", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272325031", - "createdAt": "2026-06-27T00:12:09.752348+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.98400104045868, - "p90": 104.44799810647964, - "p95": 107.84000158309937, - "p99": 116.06399714946747 - }, - "combine": { - "p50": 81.02399855852127, - "p90": 87.64799684286118, - "p95": 88.06400001049042, - "p99": 96.00000083446503 - }, - "roundtrip": { - "p50": 156.41599893569946, - "p90": 162.62400150299072, - "p95": 165.75999557971954, - "p99": 176.7359972000122 - }, - "isolatedSum": { - "p50": 179.00799959897995, - "p90": 192.09599494934082, - "p95": 195.90400159358978, - "p99": 212.0639979839325 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.20000350475311, - "p90": 107.39199817180634, - "p95": 111.51999980211258, - "p99": 119.00799721479416 - }, - "combine": { - "p50": 88.16000074148178, - "p90": 95.8079993724823, - "p95": 96.16000205278397, - "p99": 98.11200201511383 - }, - "roundtrip": { - "p50": 162.78399527072906, - "p90": 168.73599588871002, - "p95": 170.9440052509308, - "p99": 176.57600343227386 - }, - "isolatedSum": { - "p50": 191.3600042462349, - "p90": 203.19999754428864, - "p95": 207.68000185489655, - "p99": 217.119999229908 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 137.28000223636627, - "p90": 149.1200029850006, - "p95": 151.0079950094223, - "p99": 153.18399667739868 - }, - "combine": { - "p50": 128.86400520801544, - "p90": 131.1360001564026, - "p95": 135.71199774742126, - "p99": 138.3039951324463 - }, - "roundtrip": { - "p50": 234.49599742889404, - "p90": 241.4720058441162, - "p95": 242.65600740909576, - "p99": 247.9040026664734 - }, - "isolatedSum": { - "p50": 266.1440074443817, - "p90": 280.2560031414032, - "p95": 286.71999275684357, - "p99": 291.48799180984497 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fb3ea9d7", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", - "colorKey": "h100_b654f9b2", - "comparisonKey": "10b5062b8e23fcad", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:39.087780+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2ad5ef98d328fa1", - "workloadId": "set:4:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271817166", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271817166", - "createdAt": "2026-06-26T23:55:39.087780+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.82399982213974, - "p90": 105.95200210809708, - "p95": 109.95200276374817, - "p99": 121.50400131940842 - }, - "combine": { - "p50": 80.25600016117096, - "p90": 81.88799768686295, - "p95": 83.3280012011528, - "p99": 89.37600255012512 - }, - "roundtrip": { - "p50": 152.12799608707428, - "p90": 158.78400206565857, - "p95": 160.64000129699707, - "p99": 166.81599617004395 - }, - "isolatedSum": { - "p50": 178.0799999833107, - "p90": 187.83999979496002, - "p95": 193.28000396490097, - "p99": 210.88000386953354 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.08800166845322, - "p90": 103.39199751615524, - "p95": 107.51999914646149, - "p99": 115.93600362539291 - }, - "combine": { - "p50": 80.89599758386612, - "p90": 84.03199911117554, - "p95": 87.42400258779526, - "p99": 89.47200328111649 - }, - "roundtrip": { - "p50": 153.60000729560852, - "p90": 161.15200519561768, - "p95": 163.83999586105347, - "p99": 171.55200242996216 - }, - "isolatedSum": { - "p50": 177.98399925231934, - "p90": 187.42399662733078, - "p95": 194.94400173425674, - "p99": 205.4080069065094 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.39199751615524, - "p90": 108.47999900579453, - "p95": 110.55999994277954, - "p99": 117.18399822711945 - }, - "combine": { - "p50": 89.34400230646133, - "p90": 95.551997423172, - "p95": 97.34400361776352, - "p99": 99.93600100278854 - }, - "roundtrip": { - "p50": 162.75200247764587, - "p90": 170.43200135231018, - "p95": 172.83199727535248, - "p99": 179.61600422859192 - }, - "isolatedSum": { - "p50": 192.73599982261658, - "p90": 204.03199642896652, - "p95": 207.90400356054306, - "p99": 217.119999229908 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 137.85600662231445, - "p90": 144.41600441932678, - "p95": 147.0080018043518, - "p99": 151.16800367832184 - }, - "combine": { - "p50": 128.83199751377106, - "p90": 131.23199343681335, - "p95": 131.99999928474426, - "p99": 137.95199990272522 - }, - "roundtrip": { - "p50": 233.75999927520752, - "p90": 239.3919974565506, - "p95": 240.92799425125122, - "p99": 245.1840043067932 - }, - "isolatedSum": { - "p50": 266.6880041360855, - "p90": 275.64799785614014, - "p95": 279.00800108909607, - "p99": 289.12000358104706 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e0ce741a", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", - "colorKey": "h100_b654f9b2", - "comparisonKey": "10b5062b8e23fcad", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:31.374180+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b6caf944f6bb621", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272004392", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272004392", - "createdAt": "2026-06-27T00:01:31.374180+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.67199850082397, - "p90": 104.60799932479858, - "p95": 106.11200332641602, - "p99": 113.56800049543381 - }, - "combine": { - "p50": 79.00799810886383, - "p90": 82.0159986615181, - "p95": 82.36800134181976, - "p99": 87.67999708652496 - }, - "roundtrip": { - "p50": 147.2640037536621, - "p90": 154.59200739860535, - "p95": 157.3439985513687, - "p99": 161.5999937057495 - }, - "isolatedSum": { - "p50": 175.6799966096878, - "p90": 186.62399798631668, - "p95": 188.48000466823578, - "p99": 201.24799758195877 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 99.67999905347824, - "p90": 105.0880029797554, - "p95": 107.16799646615982, - "p99": 112.99200356006622 - }, - "combine": { - "p50": 81.11999928951263, - "p90": 82.49600231647491, - "p95": 83.03999900817871, - "p99": 87.2960016131401 - }, - "roundtrip": { - "p50": 147.0080018043518, - "p90": 153.6639928817749, - "p95": 155.71199357509613, - "p99": 159.10400450229645 - }, - "isolatedSum": { - "p50": 180.79999834299088, - "p90": 187.58400529623032, - "p95": 190.20799547433853, - "p99": 200.28800517320633 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, - "recvTokensMax": 16, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 97.18400239944458, - "p90": 103.93600165843964, - "p95": 106.30399733781815, - "p99": 122.04799801111221 - }, - "combine": { - "p50": 78.94399762153625, - "p90": 82.43200182914734, - "p95": 86.40000224113464, - "p99": 103.45599800348282 - }, - "roundtrip": { - "p50": 148.15999567508698, - "p90": 158.55999290943146, - "p95": 160.3199988603592, - "p99": 164.09599781036377 - }, - "isolatedSum": { - "p50": 176.12800002098083, - "p90": 186.36800348758698, - "p95": 192.7039995789528, - "p99": 225.50399601459503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 32, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.91200065612793, - "p90": 104.35199737548828, - "p95": 106.65600001811981, - "p99": 112.47999966144562 - }, - "combine": { - "p50": 81.24800026416779, - "p90": 83.3280012011528, - "p95": 87.0399996638298, - "p99": 87.93599903583527 - }, - "roundtrip": { - "p50": 153.4080058336258, - "p90": 159.61599349975586, - "p95": 161.47199273109436, - "p99": 165.21599888801575 - }, - "isolatedSum": { - "p50": 180.16000092029572, - "p90": 187.67999857664108, - "p95": 193.69599968194962, - "p99": 200.41599869728088 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.92800045013428, - "p90": 104.3199971318245, - "p95": 110.55999994277954, - "p99": 161.9199961423874 - }, - "combine": { - "p50": 81.4720019698143, - "p90": 87.2960016131401, - "p95": 87.8399983048439, - "p99": 90.27200192213058 - }, - "roundtrip": { - "p50": 153.43999862670898, - "p90": 160.19199788570404, - "p95": 162.78399527072906, - "p99": 169.98399794101715 - }, - "isolatedSum": { - "p50": 178.40000241994858, - "p90": 191.6159987449646, - "p95": 198.39999824762344, - "p99": 252.19199806451797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, - "fanoutMean": 5.3125, - "recvTokensMax": 128, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.64799946546555, - "p90": 108.31999778747559, - "p95": 110.62400043010712, - "p99": 114.84800279140472 - }, - "combine": { - "p50": 87.5839963555336, - "p90": 91.839998960495, - "p95": 95.39200365543365, - "p99": 96.38399630784988 - }, - "roundtrip": { - "p50": 155.96799552440643, - "p90": 165.50399363040924, - "p95": 168.41599345207214, - "p99": 175.64800381660461 - }, - "isolatedSum": { - "p50": 191.23199582099915, - "p90": 200.15999674797058, - "p95": 206.01600408554077, - "p99": 211.2319990992546 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 114.88000303506851, - "p90": 126.11199915409088, - "p95": 127.6479959487915, - "p99": 133.56800377368927 - }, - "combine": { - "p50": 98.43199700117111, - "p90": 103.96800190210342, - "p95": 105.8880016207695, - "p99": 119.71200257539749 - }, - "roundtrip": { - "p50": 180.38399517536163, - "p90": 191.39200448989868, - "p95": 194.39999759197235, - "p99": 201.9840031862259 - }, - "isolatedSum": { - "p50": 213.31200003623962, - "p90": 230.0800010561943, - "p95": 233.535997569561, - "p99": 253.28000634908676 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38621184, - "combineLogicalBytes": 38621184, - "fanoutMean": 5.26171875, - "recvTokensMax": 512, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 141.79199934005737, - "p90": 147.2959965467453, - "p95": 149.82399344444275, - "p99": 153.3759981393814 - }, - "combine": { - "p50": 122.36800044775009, - "p90": 128.4160017967224, - "p95": 129.02399897575378, - "p99": 136.1600011587143 - }, - "roundtrip": { - "p50": 231.77599906921387, - "p90": 241.85599386692047, - "p95": 244.9280023574829, - "p99": 248.76800179481506 - }, - "isolatedSum": { - "p50": 264.15999978780746, - "p90": 275.7119983434677, - "p95": 278.84799242019653, - "p99": 289.5359992980957 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-73951147", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", - "colorKey": "h100_456a963c", - "comparisonKey": "12dbc31e8daf0a44", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:37.187210+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_01", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "e41f5099a9733ac", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.830078125, - "eplbImbalanceAfter": 1.0007595486111112, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272008867", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272008867", - "createdAt": "2026-06-27T00:01:37.187210+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 68.41599941253662, - "p90": 76.1599987745285, - "p95": 77.69600301980972, - "p99": 84.83199775218964 - }, - "combine": { - "p50": 71.07199728488922, - "p90": 73.11999797821045, - "p95": 73.7600028514862, - "p99": 79.74400371313095 - }, - "roundtrip": { - "p50": 126.46399438381195, - "p90": 130.62399625778198, - "p95": 131.55199587345123, - "p99": 136.4479959011078 - }, - "isolatedSum": { - "p50": 139.48799669742584, - "p90": 149.27999675273895, - "p95": 151.45600587129593, - "p99": 164.5760014653206 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 68.70400160551071, - "p90": 76.9599974155426, - "p95": 81.727996468544, - "p99": 107.10400342941284 - }, - "combine": { - "p50": 71.48800045251846, - "p90": 73.15199822187424, - "p95": 73.56800138950348, - "p99": 79.55200225114822 - }, - "roundtrip": { - "p50": 127.77599692344666, - "p90": 131.23199343681335, - "p95": 132.60799646377563, - "p99": 138.7840062379837 - }, - "isolatedSum": { - "p50": 140.19200205802917, - "p90": 150.11199563741684, - "p95": 155.29599785804749, - "p99": 186.65600568056107 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 70.8480030298233, - "p90": 77.79199630022049, - "p95": 80.09599894285202, - "p99": 87.0399996638298 - }, - "combine": { - "p50": 72.4480003118515, - "p90": 73.56800138950348, - "p95": 74.27199929952621, - "p99": 79.80799674987793 - }, - "roundtrip": { - "p50": 126.94400548934937, - "p90": 131.77600502967834, - "p95": 133.4719955921173, - "p99": 137.2479945421219 - }, - "isolatedSum": { - "p50": 143.2960033416748, - "p90": 151.35999768972397, - "p95": 154.36799824237823, - "p99": 166.84799641370773 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 23, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 70.11199742555618, - "p90": 76.9599974155426, - "p95": 79.3600007891655, - "p99": 86.14400029182434 - }, - "combine": { - "p50": 72.64000177383423, - "p90": 73.82400333881378, - "p95": 74.94399696588516, - "p99": 81.08799904584885 - }, - "roundtrip": { - "p50": 125.47199428081512, - "p90": 131.6480040550232, - "p95": 133.66399705410004, - "p99": 139.29599523544312 - }, - "isolatedSum": { - "p50": 142.7519991993904, - "p90": 150.78400075435638, - "p95": 154.30399775505066, - "p99": 167.2319993376732 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4730880, - "combineLogicalBytes": 4730880, - "fanoutMean": 5.15625, - "recvTokensMax": 44, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 81.40800148248672, - "p90": 83.99999886751175, - "p95": 86.33600175380707, - "p99": 91.36000275611877 - }, - "combine": { - "p50": 73.37599992752075, - "p90": 78.75200361013412, - "p95": 79.6160027384758, - "p99": 81.34400099515915 - }, - "roundtrip": { - "p50": 125.95200538635254, - "p90": 133.15199315547943, - "p95": 134.5919966697693, - "p99": 140.32000303268433 - }, - "isolatedSum": { - "p50": 154.78400141000748, - "p90": 162.75200247764587, - "p95": 165.95200449228287, - "p99": 172.70400375127792 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9691136, - "combineLogicalBytes": 9691136, - "fanoutMean": 5.28125, - "recvTokensMax": 88, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.08000046014786, - "p90": 92.54399687051773, - "p95": 94.4959968328476, - "p99": 98.52799773216248 - }, - "combine": { - "p50": 80.09599894285202, - "p90": 81.56800270080566, - "p95": 82.07999914884567, - "p99": 87.2960016131401 - }, - "roundtrip": { - "p50": 141.08799397945404, - "p90": 144.96000111103058, - "p95": 146.30399644374847, - "p99": 150.33599734306335 - }, - "isolatedSum": { - "p50": 170.17599940299988, - "p90": 174.1119995713234, - "p95": 176.57599598169327, - "p99": 185.82399934530258 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19568640, - "combineLogicalBytes": 19568640, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 95.23200243711472, - "p90": 113.24799805879593, - "p95": 114.59200084209442, - "p99": 119.10399794578552 - }, - "combine": { - "p50": 89.85599875450134, - "p90": 98.2080027461052, - "p95": 114.3679991364479, - "p99": 130.49599528312683 - }, - "roundtrip": { - "p50": 159.39199924468994, - "p90": 165.53600132465363, - "p95": 167.87199676036835, - "p99": 179.51999604701996 - }, - "isolatedSum": { - "p50": 185.08800119161606, - "p90": 211.45600080490112, - "p95": 228.95999997854233, - "p99": 249.59999322891235 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38750208, - "combineLogicalBytes": 38750208, - "fanoutMean": 5.279296875, - "recvTokensMax": 348, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.29599809646606, - "p90": 117.21599847078323, - "p95": 118.43200027942657, - "p99": 122.72000312805176 - }, - "combine": { - "p50": 106.39999806880951, - "p90": 112.28799819946289, - "p95": 113.11999708414078, - "p99": 114.33599889278412 - }, - "roundtrip": { - "p50": 197.63199985027313, - "p90": 202.11200416088104, - "p95": 203.39199900627136, - "p99": 206.9759964942932 - }, - "isolatedSum": { - "p50": 217.69599616527557, - "p90": 229.50399667024612, - "p95": 231.55199736356735, - "p99": 237.05600202083588 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77342720, - "combineLogicalBytes": 77342720, - "fanoutMean": 5.2685546875, - "recvTokensMax": 687, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fc133662", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", - "colorKey": "h100_d54acd03", - "comparisonKey": "fb346b1019e55bb0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:31.132134+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_01", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform·empty-rank", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform·empty-rank", - "routingStep": 0, - "unevenTokens": "empty-rank", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5621f0d4899ad7a", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272375977", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272375977", - "createdAt": "2026-06-27T00:13:31.132134+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 63, - "dispatch": { - "p50": 98.01600128412247, - "p90": 108.03200304508209, - "p95": 124.22399967908859, - "p99": 164.000004529953 - }, - "combine": { - "p50": 80.73599636554718, - "p90": 89.63199704885483, - "p95": 104.63999956846237, - "p99": 112.5440001487732 - }, - "roundtrip": { - "p50": 154.1759967803955, - "p90": 160.35200655460358, - "p95": 162.08000481128693, - "p99": 175.3920018672943 - }, - "isolatedSum": { - "p50": 178.75199764966965, - "p90": 197.66400009393692, - "p95": 228.86399924755096, - "p99": 276.5440046787262 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4888576, - "combineLogicalBytes": 4888576, - "fanoutMean": 5.412698268890381, - "recvTokensMax": 46, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 252, - "dispatch": { - "p50": 104.76800054311752, - "p90": 134.0479999780655, - "p95": 136.1279934644699, - "p99": 144.41600441932678 - }, - "combine": { - "p50": 89.02399986982346, - "p90": 104.12800312042236, - "p95": 104.41599786281586, - "p99": 107.90400207042694 - }, - "roundtrip": { - "p50": 166.59200191497803, - "p90": 189.95200097560883, - "p95": 191.96799397468567, - "p99": 199.5840072631836 - }, - "isolatedSum": { - "p50": 193.79200041294098, - "p90": 238.17600309848785, - "p95": 240.54399132728577, - "p99": 252.32000648975372 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19396608, - "combineLogicalBytes": 19396608, - "fanoutMean": 5.36904764175415, - "recvTokensMax": 180, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1022, - "dispatch": { - "p50": 130.52800297737122, - "p90": 139.90400731563568, - "p95": 151.61600708961487, - "p99": 458.5599899291992 - }, - "combine": { - "p50": 120.7680031657219, - "p90": 127.93600559234619, - "p95": 128.54400277137756, - "p99": 129.50399518013 - }, - "roundtrip": { - "p50": 216.35200083255768, - "p90": 221.98399901390076, - "p95": 224.7679978609085, - "p99": 229.5359969139099 - }, - "isolatedSum": { - "p50": 251.2960061430931, - "p90": 267.8400129079819, - "p95": 280.16000986099243, - "p99": 588.0639851093292 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77529088, - "combineLogicalBytes": 77529088, - "fanoutMean": 5.2915849685668945, - "recvTokensMax": 722, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e7e5caec", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", - "colorKey": "h100_f70758a0", - "comparisonKey": "fb346b1019e55bb0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:24.801629+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform·linear", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform·linear", - "routingStep": 0, - "unevenTokens": "linear", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272372388", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272372388", - "createdAt": "2026-06-27T00:13:24.801629+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.24000298976898, - "p90": 103.64799946546555, - "p95": 106.4319983124733, - "p99": 112.5119999051094 - }, - "combine": { - "p50": 80.73599636554718, - "p90": 87.55200356245041, - "p95": 88.03199976682663, - "p99": 90.08000046014786 - }, - "roundtrip": { - "p50": 154.33600544929504, - "p90": 159.45599973201752, - "p95": 161.6639941930771, - "p99": 166.75199568271637 - }, - "isolatedSum": { - "p50": 178.97599935531616, - "p90": 191.20000302791595, - "p95": 194.46399807929993, - "p99": 202.59200036525726 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 100.09600222110748, - "p90": 105.27999699115753, - "p95": 106.91200196743011, - "p99": 113.37599903345108 - }, - "combine": { - "p50": 89.53599631786346, - "p90": 96.16000205278397, - "p95": 96.73599898815155, - "p99": 98.43199700117111 - }, - "roundtrip": { - "p50": 163.39200735092163, - "p90": 168.99199783802032, - "p95": 170.43200135231018, - "p99": 174.81599748134613 - }, - "isolatedSum": { - "p50": 189.63199853897095, - "p90": 201.4399990439415, - "p95": 203.64800095558167, - "p99": 211.8079960346222 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 130.75199723243713, - "p90": 136.99199259281158, - "p95": 138.7840062379837, - "p99": 143.42400431632996 - }, - "combine": { - "p50": 128.1599998474121, - "p90": 130.40000200271606, - "p95": 135.8720064163208, - "p99": 278.6880135536194 - }, - "roundtrip": { - "p50": 225.75999796390533, - "p90": 231.74400627613068, - "p95": 232.80000686645508, - "p99": 235.6480062007904 - }, - "isolatedSum": { - "p50": 258.91199707984924, - "p90": 267.39199459552765, - "p95": 274.6560126543045, - "p99": 422.11201786994934 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-5fad8218", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", - "colorKey": "h100_fb5b86de", - "comparisonKey": "bba2bec66db838b4", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:15.450287+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "73351bbcd4d02de", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.078125, - "eplbImbalanceAfter": 1.00048828125, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271923814", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271923814", - "createdAt": "2026-06-26T23:59:15.450287+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.99200093746185, - "p90": 104.89600151777267, - "p95": 107.04000294208527, - "p99": 111.68000102043152 - }, - "combine": { - "p50": 75.29599964618683, - "p90": 81.28000050783157, - "p95": 81.69600367546082, - "p99": 83.20000022649765 - }, - "roundtrip": { - "p50": 146.27200365066528, - "p90": 154.11199629306793, - "p95": 156.031996011734, - "p99": 158.6879938840866 - }, - "isolatedSum": { - "p50": 172.28800058364868, - "p90": 186.17600202560425, - "p95": 188.73600661754608, - "p99": 194.88000124692917 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 516096, - "combineLogicalBytes": 516096, - "fanoutMean": 4.5, - "recvTokensMax": 6, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.09599763154984, - "p90": 103.87200117111206, - "p95": 106.4319983124733, - "p99": 113.76000195741653 - }, - "combine": { - "p50": 72.67200201749802, - "p90": 81.18399977684021, - "p95": 81.82399719953537, - "p99": 84.28800106048584 - }, - "roundtrip": { - "p50": 127.48800218105316, - "p90": 153.76000106334686, - "p95": 156.3200056552887, - "p99": 158.720001578331 - }, - "isolatedSum": { - "p50": 144.76799964904785, - "p90": 185.05600094795227, - "p95": 188.25599551200867, - "p99": 198.04800301790237 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1089536, - "combineLogicalBytes": 1089536, - "fanoutMean": 4.75, - "recvTokensMax": 11, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 71.26399874687195, - "p90": 100.89600086212158, - "p95": 104.19200360774994, - "p99": 112.96000331640244 - }, - "combine": { - "p50": 72.7040022611618, - "p90": 80.4160013794899, - "p95": 80.6720033288002, - "p99": 87.80799806118011 - }, - "roundtrip": { - "p50": 130.0159990787506, - "p90": 154.78399395942688, - "p95": 158.81599485874176, - "p99": 165.53600132465363 - }, - "isolatedSum": { - "p50": 143.96800100803375, - "p90": 181.31200224161148, - "p95": 184.86400693655014, - "p99": 200.76800137758255 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2207744, - "combineLogicalBytes": 2207744, - "fanoutMean": 4.8125, - "recvTokensMax": 23, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.79999947547913, - "p90": 103.16800326108932, - "p95": 105.79200088977814, - "p99": 110.46399921178818 - }, - "combine": { - "p50": 80.73599636554718, - "p90": 81.98399841785431, - "p95": 82.36800134181976, - "p99": 89.75999802350998 - }, - "roundtrip": { - "p50": 150.2400040626526, - "p90": 156.47999942302704, - "p95": 158.91200304031372, - "p99": 168.2240068912506 - }, - "isolatedSum": { - "p50": 177.5359958410263, - "p90": 185.15200167894363, - "p95": 188.1600022315979, - "p99": 200.22399723529816 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4558848, - "combineLogicalBytes": 4558848, - "fanoutMean": 4.96875, - "recvTokensMax": 46, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.83199971914291, - "p90": 102.24000364542007, - "p95": 104.44799810647964, - "p99": 107.77600109577179 - }, - "combine": { - "p50": 81.05599880218506, - "p90": 87.80799806118011, - "p95": 88.70399743318558, - "p99": 89.75999802350998 - }, - "roundtrip": { - "p50": 152.73599326610565, - "p90": 160.73599457740784, - "p95": 162.75200247764587, - "p99": 167.55199432373047 - }, - "isolatedSum": { - "p50": 177.88799852132797, - "p90": 190.0480017066002, - "p95": 193.15199553966522, - "p99": 197.53599911928177 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9347072, - "combineLogicalBytes": 9347072, - "fanoutMean": 5.09375, - "recvTokensMax": 86, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 91.32800251245499, - "p90": 101.9200012087822, - "p95": 104.19200360774994, - "p99": 108.57599973678589 - }, - "combine": { - "p50": 81.216000020504, - "p90": 90.01599997282028, - "p95": 90.40000289678574, - "p99": 97.88800030946732 - }, - "roundtrip": { - "p50": 142.2400027513504, - "p90": 161.8880033493042, - "p95": 163.96799683570862, - "p99": 168.67199540138245 - }, - "isolatedSum": { - "p50": 172.54400253295898, - "p90": 191.93600118160248, - "p95": 194.59200650453568, - "p99": 206.4640000462532 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 18995200, - "combineLogicalBytes": 18995200, - "fanoutMean": 5.17578125, - "recvTokensMax": 178, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 108.25599730014801, - "p90": 114.9120032787323, - "p95": 117.08799749612808, - "p99": 121.72800302505493 - }, - "combine": { - "p50": 96.0640013217926, - "p90": 97.85600006580353, - "p95": 102.11200267076492, - "p99": 108.96000266075134 - }, - "roundtrip": { - "p50": 166.46400094032288, - "p90": 181.63199722766876, - "p95": 186.0159933567047, - "p99": 189.91999328136444 - }, - "isolatedSum": { - "p50": 204.3199986219406, - "p90": 212.76800334453583, - "p95": 219.200000166893, - "p99": 230.68800568580627 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38291456, - "combineLogicalBytes": 38291456, - "fanoutMean": 5.216796875, - "recvTokensMax": 348, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 112.12799698114395, - "p90": 131.26400113105774, - "p95": 135.6479972600937, - "p99": 141.05600118637085 - }, - "combine": { - "p50": 106.36799782514572, - "p90": 117.37599968910217, - "p95": 120.80000340938568, - "p99": 121.8239963054657 - }, - "roundtrip": { - "p50": 195.68000733852386, - "p90": 214.59199488162994, - "p95": 216.60800278186798, - "p99": 221.91999852657318 - }, - "isolatedSum": { - "p50": 218.49599480628967, - "p90": 248.6400008201599, - "p95": 256.44800066947937, - "p99": 262.87999749183655 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77113344, - "combineLogicalBytes": 77113344, - "fanoutMean": 5.2529296875, - "recvTokensMax": 685, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-7f743bfe", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h100_aa268d13", - "comparisonKey": "791af0af2f802328", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:41.322977+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271945409", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409", - "createdAt": "2026-06-26T23:59:41.322977+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 93.37600320577621, - "p90": 101.59999877214432, - "p95": 103.16800326108932, - "p99": 108.15999656915665 - }, - "combine": { - "p50": 73.69600236415863, - "p90": 78.17599922418594, - "p95": 79.99999821186066, - "p99": 82.59200304746628 - }, - "roundtrip": { - "p50": 142.59199798107147, - "p90": 150.62400698661804, - "p95": 152.54400670528412, - "p99": 159.5200002193451 - }, - "isolatedSum": { - "p50": 167.07200556993484, - "p90": 179.77599799633026, - "p95": 183.16800147294998, - "p99": 190.75199961662292 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 96.0640013217926, - "p90": 100.89600086212158, - "p95": 101.82400047779083, - "p99": 107.07200318574905 - }, - "combine": { - "p50": 74.43200051784515, - "p90": 80.48000186681747, - "p95": 81.216000020504, - "p99": 82.11199939250946 - }, - "roundtrip": { - "p50": 143.39199662208557, - "p90": 147.87200093269348, - "p95": 153.31199765205383, - "p99": 168.60799491405487 - }, - "isolatedSum": { - "p50": 170.49600183963776, - "p90": 181.37600272893906, - "p95": 183.04000049829483, - "p99": 189.18400257825851 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 95.87199985980988, - "p90": 100.73599964380264, - "p95": 102.81600058078766, - "p99": 109.95200276374817 - }, - "combine": { - "p50": 74.30399954319, - "p90": 80.89599758386612, - "p95": 81.4720019698143, - "p99": 84.19200032949448 - }, - "roundtrip": { - "p50": 142.752006649971, - "p90": 153.02400290966034, - "p95": 154.9759954214096, - "p99": 160.0639969110489 - }, - "isolatedSum": { - "p50": 170.17599940299988, - "p90": 181.63199722766876, - "p95": 184.28800255060196, - "p99": 194.14400309324265 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.88800030946732, - "p90": 101.82400047779083, - "p95": 103.96800190210342, - "p99": 111.42399907112122 - }, - "combine": { - "p50": 75.6160020828247, - "p90": 81.4720019698143, - "p95": 82.04799890518188, - "p99": 84.03199911117554 - }, - "roundtrip": { - "p50": 146.7519998550415, - "p90": 153.47200632095337, - "p95": 154.9759954214096, - "p99": 167.9680049419403 - }, - "isolatedSum": { - "p50": 173.50400239229202, - "p90": 183.29600244760513, - "p95": 186.0160008072853, - "p99": 195.45599818229675 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 97.08800166845322, - "p90": 100.67199915647507, - "p95": 104.25599664449692, - "p99": 110.6560006737709 - }, - "combine": { - "p50": 78.94399762153625, - "p90": 82.04799890518188, - "p95": 82.78399705886841, - "p99": 89.40800279378891 - }, - "roundtrip": { - "p50": 150.7200002670288, - "p90": 159.10400450229645, - "p95": 161.69600188732147, - "p99": 167.07199811935425 - }, - "isolatedSum": { - "p50": 176.03199928998947, - "p90": 182.71999806165695, - "p95": 187.03999370336533, - "p99": 200.06400346755981 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 96.47999703884125, - "p90": 101.31199657917023, - "p95": 104.5759990811348, - "p99": 110.62400043010712 - }, - "combine": { - "p50": 86.46400272846222, - "p90": 90.11200070381165, - "p95": 90.62399715185165, - "p99": 93.18400174379349 - }, - "roundtrip": { - "p50": 158.75199437141418, - "p90": 163.55200111865997, - "p95": 164.89599645137787, - "p99": 169.21600699424744 - }, - "isolatedSum": { - "p50": 182.94399976730347, - "p90": 191.42399728298187, - "p95": 195.19999623298645, - "p99": 203.8080021739006 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 106.9440022110939, - "p90": 138.36799561977386, - "p95": 143.0400013923645, - "p99": 250.2720057964325 - }, - "combine": { - "p50": 95.0080007314682, - "p90": 98.39999675750732, - "p95": 98.91200065612793, - "p99": 105.59999942779541 - }, - "roundtrip": { - "p50": 176.67199671268463, - "p90": 184.03199315071106, - "p95": 187.3600035905838, - "p99": 190.5599981546402 - }, - "isolatedSum": { - "p50": 201.9520029425621, - "p90": 236.7679923772812, - "p95": 241.95200204849243, - "p99": 355.8720052242279 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 128.4160017967224, - "p90": 145.9520012140274, - "p95": 148.83199334144592, - "p99": 151.99999511241913 - }, - "combine": { - "p50": 119.74400281906128, - "p90": 122.56000190973282, - "p95": 123.80799651145935, - "p99": 129.7920048236847 - }, - "roundtrip": { - "p50": 228.2560020685196, - "p90": 233.88800024986267, - "p95": 236.12800240516663, - "p99": 240.28800427913666 - }, - "isolatedSum": { - "p50": 248.1600046157837, - "p90": 268.5120031237602, - "p95": 272.6399898529053, - "p99": 281.7919999361038 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-456ed1f6", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", - "colorKey": "h100_aa268d13", - "comparisonKey": "791af0af2f802328", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:00.953910+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_16", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "1fa7fe74d0e30a3", - "workloadId": "set:4:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271802749", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271802749", - "createdAt": "2026-06-26T23:55:00.953910+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.34400361776352, - "p90": 106.33599758148193, - "p95": 108.99200290441513, - "p99": 118.14399808645248 - }, - "combine": { - "p50": 78.72000336647034, - "p90": 81.11999928951263, - "p95": 82.14399963617325, - "p99": 87.42400258779526 - }, - "roundtrip": { - "p50": 148.76799285411835, - "p90": 160.5439931154251, - "p95": 164.73600268363953, - "p99": 172.44799435138702 - }, - "isolatedSum": { - "p50": 176.06400698423386, - "p90": 187.45599687099457, - "p95": 191.13600254058838, - "p99": 205.56800067424774 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.50399738550186, - "p90": 104.38399761915207, - "p95": 108.99200290441513, - "p99": 137.2479945421219 - }, - "combine": { - "p50": 79.39200103282928, - "p90": 86.68799698352814, - "p95": 87.52000331878662, - "p99": 103.90400141477585 - }, - "roundtrip": { - "p50": 152.99199521541595, - "p90": 162.9759967327118, - "p95": 165.69599509239197, - "p99": 171.55200242996216 - }, - "isolatedSum": { - "p50": 176.89599841833115, - "p90": 191.0719946026802, - "p95": 196.51200622320175, - "p99": 241.15199595689774 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 102.33599692583084, - "p90": 111.68000102043152, - "p95": 115.68000167608261, - "p99": 123.74400347471237 - }, - "combine": { - "p50": 87.45600283145905, - "p90": 94.81599926948547, - "p95": 95.32800316810608, - "p99": 96.3200032711029 - }, - "roundtrip": { - "p50": 160.7999950647354, - "p90": 168.67199540138245, - "p95": 171.29600048065186, - "p99": 178.52799594402313 - }, - "isolatedSum": { - "p50": 189.7919997572899, - "p90": 206.496000289917, - "p95": 211.0080048441887, - "p99": 220.06400674581528 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 139.39200341701508, - "p90": 145.34400403499603, - "p95": 147.5200057029724, - "p99": 163.71199488639832 - }, - "combine": { - "p50": 120.15999853610992, - "p90": 128.1599998474121, - "p95": 128.86400520801544, - "p99": 129.88799810409546 - }, - "roundtrip": { - "p50": 227.87199914455414, - "p90": 232.7360063791275, - "p95": 235.32800376415253, - "p99": 255.13601303100586 - }, - "isolatedSum": { - "p50": 259.552001953125, - "p90": 273.50400388240814, - "p95": 276.38401091098785, - "p99": 293.5999929904938 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-db353ddd", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", - "colorKey": "h100_002beb29", - "comparisonKey": "d83561aeea03cdbc", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:11.693533+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "22da8b58646609c", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271987393", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271987393", - "createdAt": "2026-06-27T00:01:11.693533+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 94.14400160312653, - "p90": 104.41599786281586, - "p95": 109.8560020327568, - "p99": 133.69600474834442 - }, - "combine": { - "p50": 71.32799923419952, - "p90": 75.03999769687653, - "p95": 80.86399734020233, - "p99": 237.34399676322937 - }, - "roundtrip": { - "p50": 141.2159949541092, - "p90": 150.39999783039093, - "p95": 151.8079936504364, - "p99": 244.73600089550018 - }, - "isolatedSum": { - "p50": 165.47200083732605, - "p90": 179.45599555969238, - "p95": 190.71999937295914, - "p99": 371.0400015115738 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 94.43199634552002, - "p90": 101.50399804115295, - "p95": 103.04000228643417, - "p99": 105.85600137710571 - }, - "combine": { - "p50": 72.03199714422226, - "p90": 73.95199686288834, - "p95": 74.5600014925003, - "p99": 79.80799674987793 - }, - "roundtrip": { - "p50": 141.02399349212646, - "p90": 147.77599275112152, - "p95": 150.176003575325, - "p99": 175.6799966096878 - }, - "isolatedSum": { - "p50": 166.46399348974228, - "p90": 175.4559949040413, - "p95": 177.60000377893448, - "p99": 185.66399812698364 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 95.20000219345093, - "p90": 101.47199779748917, - "p95": 103.13600301742554, - "p99": 108.12799632549286 - }, - "combine": { - "p50": 70.8480030298233, - "p90": 78.65600287914276, - "p95": 79.0719985961914, - "p99": 81.53600245714188 - }, - "roundtrip": { - "p50": 143.93599331378937, - "p90": 152.41600573062897, - "p95": 155.61600029468536, - "p99": 564.3519759178162 - }, - "isolatedSum": { - "p50": 166.04800522327423, - "p90": 180.12800067663193, - "p95": 182.20800161361694, - "p99": 189.66399878263474 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.25600278377533, - "p90": 103.58399897813797, - "p95": 107.58399963378906, - "p99": 168.09600591659546 - }, - "combine": { - "p50": 75.71200281381607, - "p90": 80.1599994301796, - "p95": 80.83199709653854, - "p99": 82.30400085449219 - }, - "roundtrip": { - "p50": 144.73600685596466, - "p90": 150.81599354743958, - "p95": 152.79999375343323, - "p99": 157.95199573040009 - }, - "isolatedSum": { - "p50": 171.9680055975914, - "p90": 183.74399840831757, - "p95": 188.4159967303276, - "p99": 250.40000677108765 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.3200032711029, - "p90": 102.39999741315842, - "p95": 104.51199859380722, - "p99": 110.27199774980545 - }, - "combine": { - "p50": 78.65600287914276, - "p90": 81.37600123882294, - "p95": 81.82399719953537, - "p99": 87.0399996638298 - }, - "roundtrip": { - "p50": 146.33600413799286, - "p90": 152.38399803638458, - "p95": 153.76000106334686, - "p99": 157.82399475574493 - }, - "isolatedSum": { - "p50": 174.97600615024567, - "p90": 183.77599865198135, - "p95": 186.3359957933426, - "p99": 197.31199741363525 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 96.67199850082397, - "p90": 101.95200145244598, - "p95": 103.87200117111206, - "p99": 109.56799983978271 - }, - "combine": { - "p50": 83.20000022649765, - "p90": 88.639996945858, - "p95": 89.28000181913376, - "p99": 90.27200192213058 - }, - "roundtrip": { - "p50": 154.27200496196747, - "p90": 159.90400314331055, - "p95": 161.8880033493042, - "p99": 171.64799571037292 - }, - "isolatedSum": { - "p50": 179.87199872732162, - "p90": 190.59199839830399, - "p95": 193.15200299024582, - "p99": 199.8400017619133 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 104.80000078678131, - "p90": 112.5440001487732, - "p95": 115.35999923944473, - "p99": 119.64800208806992 - }, - "combine": { - "p50": 95.32800316810608, - "p90": 97.6639986038208, - "p95": 98.14400225877762, - "p99": 103.45599800348282 - }, - "roundtrip": { - "p50": 173.21600019931793, - "p90": 177.47199535369873, - "p95": 178.97599935531616, - "p99": 184.09599363803864 - }, - "isolatedSum": { - "p50": 200.1280039548874, - "p90": 210.207998752594, - "p95": 213.50400149822235, - "p99": 223.10400009155273 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11210752, - "combineLogicalBytes": 11210752, - "fanoutMean": 1.52734375, - "recvTokensMax": 512, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 120.64000219106674, - "p90": 141.9840008020401, - "p95": 143.23200285434723, - "p99": 148.54399859905243 - }, - "combine": { - "p50": 119.48800086975098, - "p90": 122.04799801111221, - "p95": 122.56000190973282, - "p99": 123.58400225639343 - }, - "roundtrip": { - "p50": 219.84000504016876, - "p90": 226.17599368095398, - "p95": 227.29599475860596, - "p99": 232.16000199317932 - }, - "isolatedSum": { - "p50": 240.12800306081772, - "p90": 264.0319988131523, - "p95": 265.79200476408005, - "p99": 272.12800085544586 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-acf36978", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", - "colorKey": "h100_002beb29", - "comparisonKey": "d83561aeea03cdbc", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:11.297271+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "47fddabb3277bec", - "workloadId": "set:4:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271810135", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271810135", - "createdAt": "2026-06-26T23:55:11.297271+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.83999961614609, - "p90": 101.27999633550644, - "p95": 104.86400127410889, - "p99": 111.51999980211258 - }, - "combine": { - "p50": 71.74400240182877, - "p90": 73.95199686288834, - "p95": 79.03999835252762, - "p99": 81.08799904584885 - }, - "roundtrip": { - "p50": 142.5279974937439, - "p90": 149.79200065135956, - "p95": 151.71200037002563, - "p99": 156.73600137233734 - }, - "isolatedSum": { - "p50": 167.58400201797485, - "p90": 175.23199319839478, - "p95": 183.9039996266365, - "p99": 192.60799884796143 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.1760025024414, - "p90": 104.96000200510025, - "p95": 106.91200196743011, - "p99": 112.44799941778183 - }, - "combine": { - "p50": 73.34399968385696, - "p90": 79.99999821186066, - "p95": 80.48000186681747, - "p99": 85.08799970149994 - }, - "roundtrip": { - "p50": 146.14400267601013, - "p90": 152.6080071926117, - "p95": 154.7520011663437, - "p99": 160.73599457740784 - }, - "isolatedSum": { - "p50": 171.52000218629837, - "p90": 184.9600002169609, - "p95": 187.3920038342476, - "p99": 197.53599911928177 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 98.91200065612793, - "p90": 105.92000186443329, - "p95": 108.47999900579453, - "p99": 115.93600362539291 - }, - "combine": { - "p50": 82.87999778985977, - "p90": 88.54400366544724, - "p95": 88.92799913883209, - "p99": 90.27200192213058 - }, - "roundtrip": { - "p50": 156.19200468063354, - "p90": 162.84799575805664, - "p95": 165.56799411773682, - "p99": 169.72799599170685 - }, - "isolatedSum": { - "p50": 181.7919984459877, - "p90": 194.46400552988052, - "p95": 197.40799814462662, - "p99": 206.2080055475235 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 121.88799679279327, - "p90": 129.88799810409546, - "p95": 131.16799294948578, - "p99": 136.1279934644699 - }, - "combine": { - "p50": 114.68800157308578, - "p90": 121.18399888277054, - "p95": 122.079998254776, - "p99": 129.2160004377365 - }, - "roundtrip": { - "p50": 219.90400552749634, - "p90": 224.73600506782532, - "p95": 226.623997092247, - "p99": 230.30400276184082 - }, - "isolatedSum": { - "p50": 236.57599836587906, - "p90": 251.071996986866, - "p95": 253.24799120426178, - "p99": 265.3439939022064 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-18fdfbeb", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", - "colorKey": "h100_c44978e5", - "comparisonKey": "26b5ab23f62d3389", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:10.918377+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5a3054422534366", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.40625, - "eplbImbalanceAfter": 1.0004417782738093, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271992225", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271992225", - "createdAt": "2026-06-27T00:01:10.918377+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 94.01600062847137, - "p90": 101.59999877214432, - "p95": 102.68799960613251, - "p99": 107.96800255775452 - }, - "combine": { - "p50": 71.87200337648392, - "p90": 78.87999713420868, - "p95": 79.48800176382065, - "p99": 80.99199831485748 - }, - "roundtrip": { - "p50": 138.72000575065613, - "p90": 147.2640037536621, - "p95": 148.76799285411835, - "p99": 153.08800339698792 - }, - "isolatedSum": { - "p50": 165.8880040049553, - "p90": 180.479995906353, - "p95": 182.17600136995316, - "p99": 188.960000872612 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 559104, - "combineLogicalBytes": 559104, - "fanoutMean": 4.875, - "recvTokensMax": 6, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 69.92000341415405, - "p90": 99.64799880981445, - "p95": 101.43999755382538, - "p99": 106.84800148010254 - }, - "combine": { - "p50": 71.6480016708374, - "p90": 79.71200346946716, - "p95": 80.64000308513641, - "p99": 81.91999793052673 - }, - "roundtrip": { - "p50": 129.34400141239166, - "p90": 143.71199905872345, - "p95": 146.08000218868256, - "p99": 150.39999783039093 - }, - "isolatedSum": { - "p50": 141.56800508499146, - "p90": 179.36000227928162, - "p95": 182.0800006389618, - "p99": 188.76799941062927 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 71.74400240182877, - "p90": 99.80800002813339, - "p95": 101.79200023412704, - "p99": 107.96800255775452 - }, - "combine": { - "p50": 72.67200201749802, - "p90": 81.56800270080566, - "p95": 86.43200248479843, - "p99": 88.73599767684937 - }, - "roundtrip": { - "p50": 129.50399518013, - "p90": 156.47999942302704, - "p95": 159.13599729537964, - "p99": 162.6880019903183 - }, - "isolatedSum": { - "p50": 144.41600441932678, - "p90": 181.37600272893906, - "p95": 188.22400271892548, - "p99": 196.70400023460388 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2465792, - "combineLogicalBytes": 2465792, - "fanoutMean": 5.375, - "recvTokensMax": 25, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 72.12799787521362, - "p90": 96.16000205278397, - "p95": 98.30400347709656, - "p99": 103.64799946546555 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 81.08799904584885, - "p95": 81.60000294446945, - "p99": 87.13600039482117 - }, - "roundtrip": { - "p50": 127.9039978981018, - "p90": 152.16000378131866, - "p95": 155.90399503707886, - "p99": 157.24800527095795 - }, - "isolatedSum": { - "p50": 145.11999487876892, - "p90": 177.2480010986328, - "p95": 179.904006421566, - "p99": 190.7839998602867 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4988928, - "combineLogicalBytes": 4988928, - "fanoutMean": 5.4375, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 82.0159986615181, - "p90": 98.55999797582626, - "p95": 101.50399804115295, - "p99": 106.33599758148193 - }, - "combine": { - "p50": 73.56800138950348, - "p90": 87.87199854850769, - "p95": 88.8959988951683, - "p99": 89.88799899816513 - }, - "roundtrip": { - "p50": 127.71199643611908, - "p90": 159.32799875736237, - "p95": 160.99199652671814, - "p99": 163.90399634838104 - }, - "isolatedSum": { - "p50": 155.58400005102158, - "p90": 186.43199652433395, - "p95": 190.39999693632126, - "p99": 196.22399657964706 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9791488, - "combineLogicalBytes": 9791488, - "fanoutMean": 5.3359375, - "recvTokensMax": 94, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 89.82399851083755, - "p90": 101.27999633550644, - "p95": 102.65599936246872, - "p99": 107.29599744081497 - }, - "combine": { - "p50": 80.73599636554718, - "p90": 89.4400030374527, - "p95": 89.85599875450134, - "p99": 95.42399644851685 - }, - "roundtrip": { - "p50": 141.59999787807465, - "p90": 158.9439958333969, - "p95": 161.18399798870087, - "p99": 167.32800006866455 - }, - "isolatedSum": { - "p50": 170.55999487638474, - "p90": 190.71999937295914, - "p95": 192.51199811697006, - "p99": 202.71999388933182 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19410944, - "combineLogicalBytes": 19410944, - "fanoutMean": 5.2890625, - "recvTokensMax": 178, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 96.09600156545639, - "p90": 118.1119978427887, - "p95": 120.57600170373917, - "p99": 127.83999741077423 - }, - "combine": { - "p50": 89.82399851083755, - "p90": 103.20000350475311, - "p95": 103.80800068378448, - "p99": 104.70400005578995 - }, - "roundtrip": { - "p50": 160.288006067276, - "p90": 180.95999956130981, - "p95": 185.18400192260742, - "p99": 188.60800564289093 - }, - "isolatedSum": { - "p50": 185.92000007629395, - "p90": 221.3120013475418, - "p95": 224.38400238752365, - "p99": 232.54399746656418 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38678528, - "combineLogicalBytes": 38678528, - "fanoutMean": 5.26953125, - "recvTokensMax": 360, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 114.52800035476685, - "p90": 135.0719928741455, - "p95": 136.6720050573349, - "p99": 140.00000059604645 - }, - "combine": { - "p50": 106.01600259542465, - "p90": 119.71200257539749, - "p95": 120.35199999809265, - "p99": 122.14399874210358 - }, - "roundtrip": { - "p50": 195.96800208091736, - "p90": 214.33599293231964, - "p95": 216.86400473117828, - "p99": 220.44800221920013 - }, - "isolatedSum": { - "p50": 220.5440029501915, - "p90": 254.783995449543, - "p95": 257.02400505542755, - "p99": 262.14399933815 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 77285376, - "fanoutMean": 5.2646484375, - "recvTokensMax": 704, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-efff3174", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", - "colorKey": "h100_9aa30544", - "comparisonKey": "c4aa2e0da9446ced", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:21.116102+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f3df51be7d5c32b", - "workloadId": "set:8:289b7f9c14292e96", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271958693", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271958693", - "createdAt": "2026-06-27T00:00:21.116102+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.28000313043594, - "p90": 104.70400005578995, - "p95": 106.11200332641602, - "p99": 112.73600161075592 - }, - "combine": { - "p50": 79.71200346946716, - "p90": 82.65600353479385, - "p95": 99.13600236177444, - "p99": 275.4560112953186 - }, - "roundtrip": { - "p50": 147.61599898338318, - "p90": 155.32800555229187, - "p95": 156.73600137233734, - "p99": 162.91199624538422 - }, - "isolatedSum": { - "p50": 176.9920065999031, - "p90": 187.3600035905838, - "p95": 205.24800568819046, - "p99": 388.1920129060745 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.80000299215317, - "p90": 102.7199998497963, - "p95": 104.89600151777267, - "p99": 109.66400057077408 - }, - "combine": { - "p50": 73.15199822187424, - "p90": 81.44000172615051, - "p95": 81.88799768686295, - "p99": 82.91199803352356 - }, - "roundtrip": { - "p50": 129.4720023870468, - "p90": 153.3759981393814, - "p95": 156.15999698638916, - "p99": 164.92800414562225 - }, - "isolatedSum": { - "p50": 145.9520012140274, - "p90": 184.1600015759468, - "p95": 186.78399920463562, - "p99": 192.57599860429764 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1103872, - "combineLogicalBytes": 1103872, - "fanoutMean": 4.8125, - "recvTokensMax": 16, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 76.25599950551987, - "p90": 102.62399911880493, - "p95": 105.24799674749374, - "p99": 109.47199910879135 - }, - "combine": { - "p50": 73.31199944019318, - "p90": 81.4720019698143, - "p95": 86.20800077915192, - "p99": 89.34400230646133 - }, - "roundtrip": { - "p50": 129.56799566745758, - "p90": 157.9200029373169, - "p95": 160.35200655460358, - "p99": 166.04800522327423 - }, - "isolatedSum": { - "p50": 149.56799894571304, - "p90": 184.09600108861923, - "p95": 191.45599752664566, - "p99": 198.81600141525269 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2250752, - "combineLogicalBytes": 2250752, - "fanoutMean": 4.90625, - "recvTokensMax": 31, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 76.83199644088745, - "p90": 101.79200023412704, - "p95": 105.02400249242783, - "p99": 109.31199789047241 - }, - "combine": { - "p50": 73.5040009021759, - "p90": 82.04799890518188, - "p95": 86.40000224113464, - "p99": 88.54400366544724 - }, - "roundtrip": { - "p50": 130.23999333381653, - "p90": 159.39199924468994, - "p95": 161.82400286197662, - "p99": 165.98400473594666 - }, - "isolatedSum": { - "p50": 150.33599734306335, - "p90": 183.83999913930893, - "p95": 191.42400473356247, - "p99": 197.85600155591965 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4472832, - "combineLogicalBytes": 4472832, - "fanoutMean": 4.875, - "recvTokensMax": 62, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.00000083446503, - "p90": 104.73600029945374, - "p95": 108.51199924945831, - "p99": 115.74400216341019 - }, - "combine": { - "p50": 80.03199845552444, - "p90": 87.23200112581253, - "p95": 88.51200342178345, - "p99": 90.01599997282028 - }, - "roundtrip": { - "p50": 135.1040005683899, - "p90": 161.40800714492798, - "p95": 164.5440012216568, - "p99": 169.50400173664093 - }, - "isolatedSum": { - "p50": 176.03199928998947, - "p90": 191.96800142526627, - "p95": 197.02400267124176, - "p99": 205.76000213623047 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8888320, - "combineLogicalBytes": 8888320, - "fanoutMean": 4.84375, - "recvTokensMax": 124, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.65599739551544, - "p90": 102.75200009346008, - "p95": 105.69600015878677, - "p99": 109.37599837779999 - }, - "combine": { - "p50": 81.60000294446945, - "p90": 90.59199690818787, - "p95": 95.32800316810608, - "p99": 97.47199714183807 - }, - "roundtrip": { - "p50": 145.1839953660965, - "p90": 165.56799411773682, - "p95": 168.5439944267273, - "p99": 174.68799650669098 - }, - "isolatedSum": { - "p50": 172.2560003399849, - "p90": 193.34399700164795, - "p95": 201.02400332689285, - "p99": 206.84799551963806 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 17733632, - "combineLogicalBytes": 17733632, - "fanoutMean": 4.83203125, - "recvTokensMax": 248, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 101.43999755382538, - "p90": 116.89600348472595, - "p95": 119.77600306272507, - "p99": 138.7840062379837 - }, - "combine": { - "p50": 90.59199690818787, - "p90": 103.35999727249146, - "p95": 104.3199971318245, - "p99": 105.92000186443329 - }, - "roundtrip": { - "p50": 168.7680035829544, - "p90": 185.88800728321075, - "p95": 188.6720061302185, - "p99": 193.37600469589233 - }, - "isolatedSum": { - "p50": 192.03199446201324, - "p90": 220.2560007572174, - "p95": 224.09600019454956, - "p99": 244.704008102417 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 35424256, - "combineLogicalBytes": 35424256, - "fanoutMean": 4.826171875, - "recvTokensMax": 492, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.5920021533966, - "p90": 134.91199910640717, - "p95": 136.9280070066452, - "p99": 143.64799857139587 - }, - "combine": { - "p50": 115.07199704647064, - "p90": 128.63999605178833, - "p95": 130.40000200271606, - "p99": 139.71200585365295 - }, - "roundtrip": { - "p50": 215.5199944972992, - "p90": 233.66400599479675, - "p95": 235.35999655723572, - "p99": 240.12799561023712 - }, - "isolatedSum": { - "p50": 237.66399919986725, - "p90": 263.5519951581955, - "p95": 267.32800900936127, - "p99": 283.3600044250488 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6d1780ec", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", - "colorKey": "h100_e8b903ea", - "comparisonKey": "0d93a7b7a0fcf6d0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:17.527263+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_01", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "16babcaf4204243", - "workloadId": "set:8:289b7f9c14292e96", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.61328125, - "eplbImbalanceAfter": 1.0009114583333334, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271962037", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271962037", - "createdAt": "2026-06-27T00:00:17.527263+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 98.55999797582626, - "p90": 106.33599758148193, - "p95": 108.51199924945831, - "p99": 113.21599781513214 - }, - "combine": { - "p50": 79.39200103282928, - "p90": 81.85599744319916, - "p95": 82.56000280380249, - "p99": 87.10400015115738 - }, - "roundtrip": { - "p50": 145.50399780273438, - "p90": 154.7199934720993, - "p95": 156.8640023469925, - "p99": 160.7999950647354 - }, - "isolatedSum": { - "p50": 177.95199900865555, - "p90": 188.1919950246811, - "p95": 191.0720020532608, - "p99": 200.31999796628952 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.60000163316727, - "p90": 108.31999778747559, - "p95": 109.66400057077408, - "p99": 115.13599753379822 - }, - "combine": { - "p50": 72.51200079917908, - "p90": 81.60000294446945, - "p95": 82.36800134181976, - "p99": 87.20000088214874 - }, - "roundtrip": { - "p50": 129.05600666999817, - "p90": 156.47999942302704, - "p95": 160.0639969110489, - "p99": 162.1759980916977 - }, - "isolatedSum": { - "p50": 146.11200243234634, - "p90": 189.92000073194504, - "p95": 192.03200191259384, - "p99": 202.33599841594696 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1189888, - "combineLogicalBytes": 1189888, - "fanoutMean": 5.1875, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 97.120001912117, - "p90": 103.87200117111206, - "p95": 105.66399991512299, - "p99": 110.68800091743469 - }, - "combine": { - "p50": 79.55200225114822, - "p90": 82.20800012350082, - "p95": 86.30400151014328, - "p99": 88.3840024471283 - }, - "roundtrip": { - "p50": 151.32799744606018, - "p90": 159.61599349975586, - "p95": 161.15200519561768, - "p99": 167.71200299263 - }, - "isolatedSum": { - "p50": 176.67200416326523, - "p90": 186.08000129461288, - "p95": 191.96800142526627, - "p99": 199.072003364563 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 23, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.83199971914291, - "p90": 103.07200253009796, - "p95": 104.47999835014343, - "p99": 111.48799955844879 - }, - "combine": { - "p50": 79.48800176382065, - "p90": 82.49600231647491, - "p95": 87.0399996638298, - "p99": 88.76799792051315 - }, - "roundtrip": { - "p50": 152.38399803638458, - "p90": 159.96800363063812, - "p95": 162.20800578594208, - "p99": 166.59200191497803 - }, - "isolatedSum": { - "p50": 176.32000148296356, - "p90": 185.56800484657288, - "p95": 191.51999801397324, - "p99": 200.25599747896194 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.92800045013428, - "p90": 102.01600193977356, - "p95": 104.76800054311752, - "p99": 113.02399635314941 - }, - "combine": { - "p50": 80.86399734020233, - "p90": 88.3840024471283, - "p95": 89.63199704885483, - "p99": 94.65599805116653 - }, - "roundtrip": { - "p50": 153.21600437164307, - "p90": 159.39199924468994, - "p95": 160.8320027589798, - "p99": 165.3759926557541 - }, - "isolatedSum": { - "p50": 177.7919977903366, - "p90": 190.40000438690186, - "p95": 194.39999759197235, - "p99": 207.67999440431595 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9605120, - "combineLogicalBytes": 9605120, - "fanoutMean": 5.234375, - "recvTokensMax": 93, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 91.5519967675209, - "p90": 105.27999699115753, - "p95": 106.52799904346466, - "p99": 110.55999994277954 - }, - "combine": { - "p50": 81.216000020504, - "p90": 90.17600119113922, - "p95": 94.33600306510925, - "p99": 96.79999947547913 - }, - "roundtrip": { - "p50": 144.1279947757721, - "p90": 167.52000153064728, - "p95": 168.99199783802032, - "p99": 173.567995429039 - }, - "isolatedSum": { - "p50": 172.7679967880249, - "p90": 195.45599818229675, - "p95": 200.8640021085739, - "p99": 207.35999941825867 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19367936, - "combineLogicalBytes": 19367936, - "fanoutMean": 5.27734375, - "recvTokensMax": 182, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 104.80000078678131, - "p90": 116.35199934244156, - "p95": 118.81600320339203, - "p99": 122.97599762678146 - }, - "combine": { - "p50": 96.38399630784988, - "p90": 104.00000214576721, - "p95": 104.5759990811348, - "p99": 106.4319983124733 - }, - "roundtrip": { - "p50": 177.76000499725342, - "p90": 185.44000387191772, - "p95": 187.16800212860107, - "p99": 190.3039962053299 - }, - "isolatedSum": { - "p50": 201.1839970946312, - "p90": 220.35200148820877, - "p95": 223.39200228452682, - "p99": 229.40799593925476 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38535168, - "combineLogicalBytes": 38535168, - "fanoutMean": 5.25, - "recvTokensMax": 358, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 113.56800049543381, - "p90": 131.58400356769562, - "p95": 133.66399705410004, - "p99": 139.96799290180206 - }, - "combine": { - "p50": 106.55999928712845, - "p90": 119.55200135707855, - "p95": 120.09599804878235, - "p99": 121.05599790811539 - }, - "roundtrip": { - "p50": 198.46400618553162, - "p90": 217.6000028848648, - "p95": 218.75199675559998, - "p99": 224.2880016565323 - }, - "isolatedSum": { - "p50": 220.12799978256226, - "p90": 251.13600492477417, - "p95": 253.75999510288239, - "p99": 261.02399080991745 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76869632, - "combineLogicalBytes": 76869632, - "fanoutMean": 5.236328125, - "recvTokensMax": 688, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9d829c00", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h100_552a4b73", - "comparisonKey": "95c165fc74bc43c0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:35.674306+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:120a8dc1dba92ca9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271971983", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271971983", - "createdAt": "2026-06-27T00:00:35.674306+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.8079993724823, - "p90": 103.10400277376175, - "p95": 104.16000336408615, - "p99": 110.01600325107574 - }, - "combine": { - "p50": 74.33599978685379, - "p90": 81.56800270080566, - "p95": 81.98399841785431, - "p99": 83.29600095748901 - }, - "roundtrip": { - "p50": 142.2719955444336, - "p90": 148.67199957370758, - "p95": 150.4639983177185, - "p99": 154.11199629306793 - }, - "isolatedSum": { - "p50": 170.1439991593361, - "p90": 184.6720054745674, - "p95": 186.14400178194046, - "p99": 193.31200420856476 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.88000327348709, - "p90": 101.98400169610977, - "p95": 102.94400155544281, - "p99": 106.01600259542465 - }, - "combine": { - "p50": 72.4480003118515, - "p90": 81.40800148248672, - "p95": 81.95199817419052, - "p99": 85.7279971241951 - }, - "roundtrip": { - "p50": 128.7039965391159, - "p90": 147.71200716495514, - "p95": 149.59999918937683, - "p99": 152.79999375343323 - }, - "isolatedSum": { - "p50": 143.3280035853386, - "p90": 183.3920031785965, - "p95": 184.89599972963333, - "p99": 191.74399971961975 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.18399846553802, - "p90": 93.82399916648865, - "p95": 96.41599655151367, - "p99": 104.99200224876404 - }, - "combine": { - "p50": 70.8480030298233, - "p90": 77.82399654388428, - "p95": 78.59200239181519, - "p99": 83.45600217580795 - }, - "roundtrip": { - "p50": 125.44000148773193, - "p90": 151.74399316310883, - "p95": 154.1759967803955, - "p99": 160.09600460529327 - }, - "isolatedSum": { - "p50": 144.03200149536133, - "p90": 171.64799571037292, - "p95": 175.00799894332886, - "p99": 188.448004424572 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.83999633789062, - "p90": 100.22400319576263, - "p95": 102.39999741315842, - "p99": 107.4879989027977 - }, - "combine": { - "p50": 73.18399846553802, - "p90": 81.44000172615051, - "p95": 82.24000036716461, - "p99": 87.23200112581253 - }, - "roundtrip": { - "p50": 126.27199292182922, - "p90": 154.88000214099884, - "p95": 157.47199952602386, - "p99": 159.4880074262619 - }, - "isolatedSum": { - "p50": 149.02399480342865, - "p90": 181.66400492191315, - "p95": 184.63999778032303, - "p99": 194.72000002861023 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 79.3600007891655, - "p90": 100.0640019774437, - "p95": 123.80799651145935, - "p99": 229.76000607013702 - }, - "combine": { - "p50": 73.88799637556076, - "p90": 82.2720006108284, - "p95": 83.36000144481659, - "p99": 89.28000181913376 - }, - "roundtrip": { - "p50": 130.17599284648895, - "p90": 154.62400019168854, - "p95": 157.3760062456131, - "p99": 162.7199947834015 - }, - "isolatedSum": { - "p50": 153.24799716472626, - "p90": 182.3360025882721, - "p95": 207.16799795627594, - "p99": 319.0400078892708 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 88.44800293445587, - "p90": 103.71199995279312, - "p95": 105.76000064611435, - "p99": 110.1439967751503 - }, - "combine": { - "p50": 81.60000294446945, - "p90": 89.6959975361824, - "p95": 90.27200192213058, - "p99": 91.80799871683121 - }, - "roundtrip": { - "p50": 141.34399592876434, - "p90": 161.98399662971497, - "p95": 163.455992937088, - "p99": 169.24799978733063 - }, - "isolatedSum": { - "p50": 170.04800587892532, - "p90": 193.40799748897552, - "p95": 196.03200256824493, - "p99": 201.9519954919815 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 102.36799716949463, - "p90": 119.6800023317337, - "p95": 121.31199985742569, - "p99": 123.77600371837616 - }, - "combine": { - "p50": 89.9839997291565, - "p90": 96.03200107812881, - "p95": 99.48799759149551, - "p99": 102.04800218343735 - }, - "roundtrip": { - "p50": 165.69599509239197, - "p90": 182.43199586868286, - "p95": 184.1599941253662, - "p99": 187.51999735832214 - }, - "isolatedSum": { - "p50": 192.35199689865112, - "p90": 215.71200340986252, - "p95": 220.7999974489212, - "p99": 225.8240059018135 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 125.91999769210815, - "p90": 144.70399916172028, - "p95": 145.9520012140274, - "p99": 148.00000190734863 - }, - "combine": { - "p50": 114.56000059843063, - "p90": 119.99999731779099, - "p95": 122.30399996042252, - "p99": 126.91199779510498 - }, - "roundtrip": { - "p50": 218.9760059118271, - "p90": 233.63199830055237, - "p95": 235.1360023021698, - "p99": 238.304004073143 - }, - "isolatedSum": { - "p50": 240.4799982905388, - "p90": 264.70399647951126, - "p95": 268.2560011744499, - "p99": 274.9119997024536 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c61b6088", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h100_106a51ab", - "comparisonKey": "6643ae5a97d68820", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:43.354862+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:120a8dc1dba92ca9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271975554", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271975554", - "createdAt": "2026-06-27T00:00:43.354862+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 69.72800195217133, - "p90": 76.7040029168129, - "p95": 82.24000036716461, - "p99": 100.09600222110748 - }, - "combine": { - "p50": 70.78400254249573, - "p90": 73.11999797821045, - "p95": 73.53600114583969, - "p99": 78.3040001988411 - }, - "roundtrip": { - "p50": 124.35200065374374, - "p90": 129.88799810409546, - "p95": 131.20000064373016, - "p99": 137.40800321102142 - }, - "isolatedSum": { - "p50": 140.51200449466705, - "p90": 149.82400089502335, - "p95": 155.7760015130043, - "p99": 178.40000241994858 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 69.92000341415405, - "p90": 77.79199630022049, - "p95": 80.19199967384338, - "p99": 96.19200229644775 - }, - "combine": { - "p50": 71.16799801588058, - "p90": 73.27999919652939, - "p95": 73.85600358247757, - "p99": 78.94399762153625 - }, - "roundtrip": { - "p50": 126.94400548934937, - "p90": 130.91200590133667, - "p95": 132.1280002593994, - "p99": 138.33600282669067 - }, - "isolatedSum": { - "p50": 141.08800143003464, - "p90": 151.07199549674988, - "p95": 154.04800325632095, - "p99": 175.135999917984 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.07999783754349, - "p90": 101.34399682283401, - "p95": 103.13600301742554, - "p99": 111.39199882745743 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 82.0159986615181, - "p95": 87.00799942016602, - "p99": 89.31200206279755 - }, - "roundtrip": { - "p50": 131.32800161838531, - "p90": 158.59200060367584, - "p95": 163.13600540161133, - "p99": 169.69600319862366 - }, - "isolatedSum": { - "p50": 147.07199484109879, - "p90": 183.3599954843521, - "p95": 190.14400243759155, - "p99": 200.70400089025497 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.6480023264885, - "p90": 100.76799988746643, - "p95": 102.01600193977356, - "p99": 105.95200210809708 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 79.68000322580338, - "p95": 80.6720033288002, - "p99": 85.88799834251404 - }, - "roundtrip": { - "p50": 129.63199615478516, - "p90": 154.91199493408203, - "p95": 156.47999942302704, - "p99": 159.96800363063812 - }, - "isolatedSum": { - "p50": 148.6399993300438, - "p90": 180.4480031132698, - "p95": 182.68800526857376, - "p99": 191.84000045061111 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 82.49600231647491, - "p90": 100.73599964380264, - "p95": 103.04000228643417, - "p99": 106.81600123643875 - }, - "combine": { - "p50": 74.36800003051758, - "p90": 87.0399996638298, - "p95": 87.90399879217148, - "p99": 89.63199704885483 - }, - "roundtrip": { - "p50": 132.38400220870972, - "p90": 161.02400422096252, - "p95": 162.81600296497345, - "p99": 166.72000288963318 - }, - "isolatedSum": { - "p50": 156.8640023469925, - "p90": 187.77599930763245, - "p95": 190.94400107860565, - "p99": 196.44799828529358 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.30400216579437, - "p90": 103.32799702882767, - "p95": 104.35199737548828, - "p99": 109.6000000834465 - }, - "combine": { - "p50": 81.31200075149536, - "p90": 89.75999802350998, - "p95": 90.43200314044952, - "p99": 91.61599725484848 - }, - "roundtrip": { - "p50": 142.20799505710602, - "p90": 158.65600109100342, - "p95": 161.50400042533875, - "p99": 167.39200055599213 - }, - "isolatedSum": { - "p50": 171.61600291728973, - "p90": 193.08799505233765, - "p95": 194.7840005159378, - "p99": 201.21599733829498 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 96.89600020647049, - "p90": 116.60800129175186, - "p95": 118.43200027942657, - "p99": 124.32000041007996 - }, - "combine": { - "p50": 90.30400216579437, - "p90": 103.32799702882767, - "p95": 103.74400019645691, - "p99": 104.25599664449692 - }, - "roundtrip": { - "p50": 162.08000481128693, - "p90": 178.8800060749054, - "p95": 181.85600638389587, - "p99": 186.49600446224213 - }, - "isolatedSum": { - "p50": 187.20000237226486, - "p90": 219.93599832057953, - "p95": 222.17600047588348, - "p99": 228.57599705457687 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.07999759912491, - "p90": 135.3279948234558, - "p95": 138.2399946451187, - "p99": 140.57600498199463 - }, - "combine": { - "p50": 106.84800148010254, - "p90": 119.45600062608719, - "p95": 119.74400281906128, - "p99": 120.54400146007538 - }, - "roundtrip": { - "p50": 198.84799420833588, - "p90": 216.2880003452301, - "p95": 219.67999637126923, - "p99": 221.47199511528015 - }, - "isolatedSum": { - "p50": 224.92799907922745, - "p90": 254.783995449543, - "p95": 257.98399746418, - "p99": 261.12000644207 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a38d13e8", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h100_769b9c4b", - "comparisonKey": "115d84ad1ee38d09", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:11.807854+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271948775", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775", - "createdAt": "2026-06-27T00:00:11.807854+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.39999961853027, - "p90": 100.832000374794, - "p95": 105.56799918413162, - "p99": 192.73599982261658 - }, - "combine": { - "p50": 73.18399846553802, - "p90": 88.44800293445587, - "p95": 188.38399648666382, - "p99": 344.2560136318207 - }, - "roundtrip": { - "p50": 123.77600371837616, - "p90": 133.08799266815186, - "p95": 149.4400054216385, - "p99": 156.12800419330597 - }, - "isolatedSum": { - "p50": 143.5839980840683, - "p90": 189.28000330924988, - "p95": 293.95199567079544, - "p99": 536.9920134544373 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 68.38399916887283, - "p90": 75.71200281381607, - "p95": 77.11999863386154, - "p99": 95.61599791049957 - }, - "combine": { - "p50": 71.29599899053574, - "p90": 73.44000041484833, - "p95": 74.36800003051758, - "p99": 82.2720006108284 - }, - "roundtrip": { - "p50": 126.68800354003906, - "p90": 130.87999820709229, - "p95": 133.56800377368927, - "p99": 142.59199798107147 - }, - "isolatedSum": { - "p50": 139.67999815940857, - "p90": 149.1520032286644, - "p95": 151.48799866437912, - "p99": 177.88799852132797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.54400104284286, - "p90": 99.2640033364296, - "p95": 102.08000242710114, - "p99": 107.39199817180634 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 79.71200346946716, - "p95": 84.22400057315826, - "p99": 87.39200234413147 - }, - "roundtrip": { - "p50": 130.23999333381653, - "p90": 156.41599893569946, - "p95": 160.22400557994843, - "p99": 165.53600132465363 - }, - "isolatedSum": { - "p50": 145.53599804639816, - "p90": 178.97600680589676, - "p95": 186.3040030002594, - "p99": 194.7840005159378 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.19200229644775, - "p90": 109.56799983978271, - "p95": 112.73600161075592, - "p99": 155.87200224399567 - }, - "combine": { - "p50": 75.45600086450577, - "p90": 88.06400001049042, - "p95": 89.4400030374527, - "p99": 97.37599641084671 - }, - "roundtrip": { - "p50": 130.94399869441986, - "p90": 154.4319987297058, - "p95": 156.44800662994385, - "p99": 176.67199671268463 - }, - "isolatedSum": { - "p50": 171.64800316095352, - "p90": 197.63199985027313, - "p95": 202.17600464820862, - "p99": 253.24799865484238 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 82.97599852085114, - "p90": 100.16000270843506, - "p95": 103.55199873447418, - "p99": 106.72000050544739 - }, - "combine": { - "p50": 74.14399832487106, - "p90": 87.3280018568039, - "p95": 88.95999938249588, - "p99": 89.82399851083755 - }, - "roundtrip": { - "p50": 131.6480040550232, - "p90": 158.9760035276413, - "p95": 161.31199896335602, - "p99": 166.78400337696075 - }, - "isolatedSum": { - "p50": 157.1199968457222, - "p90": 187.48800456523895, - "p95": 192.51199811697006, - "p99": 196.54399901628494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.30400216579437, - "p90": 105.6319996714592, - "p95": 106.6880002617836, - "p99": 111.04000359773636 - }, - "combine": { - "p50": 80.99199831485748, - "p90": 89.15200084447861, - "p95": 89.88799899816513, - "p99": 90.91199934482574 - }, - "roundtrip": { - "p50": 142.17600226402283, - "p90": 157.6640009880066, - "p95": 160.44799983501434, - "p99": 164.8319959640503 - }, - "isolatedSum": { - "p50": 171.29600048065186, - "p90": 194.7840005159378, - "p95": 196.57599925994873, - "p99": 201.9520029425621 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 95.74399888515472, - "p90": 116.2559986114502, - "p95": 121.98399752378464, - "p99": 398.6560106277466 - }, - "combine": { - "p50": 90.20800143480301, - "p90": 101.1200025677681, - "p95": 104.25599664449692, - "p99": 111.55200004577637 - }, - "roundtrip": { - "p50": 160.76800227165222, - "p90": 181.536003947258, - "p95": 185.37600338459015, - "p99": 188.35200369358063 - }, - "isolatedSum": { - "p50": 185.95200031995773, - "p90": 217.3760011792183, - "p95": 226.23999416828156, - "p99": 510.20801067352295 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 114.3679991364479, - "p90": 133.44000279903412, - "p95": 137.31199502944946, - "p99": 142.7839994430542 - }, - "combine": { - "p50": 108.15999656915665, - "p90": 120.2239990234375, - "p95": 121.24799937009811, - "p99": 123.99999797344208 - }, - "roundtrip": { - "p50": 199.35999810695648, - "p90": 217.31199324131012, - "p95": 220.15999257564545, - "p99": 380.8319866657257 - }, - "isolatedSum": { - "p50": 222.52799570560455, - "p90": 253.66400182247162, - "p95": 258.5599943995476, - "p99": 266.7839974164963 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4ad32f1a", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "2a087c80bac58077", - "schemaVersion": 3, - "generatedAt": "2026-06-26T15:27:59.966964+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "unknown", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28247603308", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308", - "createdAt": "2026-06-26T15:27:59.966964+00:00", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.73599898815155, - "p90": 102.49599814414978, - "p95": 104.12800312042236, - "p99": 112.19199746847153 - }, - "combine": { - "p50": 79.42400127649307, - "p90": 81.4720019698143, - "p95": 82.14399963617325, - "p99": 87.93599903583527 - }, - "roundtrip": { - "p50": 146.84799313545227, - "p90": 156.15999698638916, - "p95": 159.13599729537964, - "p99": 164.000004529953 - }, - "isolatedSum": { - "p50": 176.16000026464462, - "p90": 183.96800011396408, - "p95": 186.2720027565956, - "p99": 200.1279965043068 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.33600372076035, - "p90": 103.93600165843964, - "p95": 106.52799904346466, - "p99": 111.58400028944016 - }, - "combine": { - "p50": 80.03199845552444, - "p90": 86.84799820184708, - "p95": 87.61599659919739, - "p99": 88.06400001049042 - }, - "roundtrip": { - "p50": 151.64799988269806, - "p90": 159.16800498962402, - "p95": 160.35200655460358, - "p99": 165.50399363040924 - }, - "isolatedSum": { - "p50": 178.3680021762848, - "p90": 190.7839998602867, - "p95": 194.14399564266205, - "p99": 199.64800029993057 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 99.90400075912476, - "p90": 105.76000064611435, - "p95": 108.15999656915665, - "p99": 116.60800129175186 - }, - "combine": { - "p50": 87.90399879217148, - "p90": 90.55999666452408, - "p95": 95.23200243711472, - "p99": 96.57599776983261 - }, - "roundtrip": { - "p50": 157.82399475574493, - "p90": 163.7759953737259, - "p95": 166.78400337696075, - "p99": 169.95200514793396 - }, - "isolatedSum": { - "p50": 187.80799955129623, - "p90": 196.31999731063843, - "p95": 203.39199900627136, - "p99": 213.18399906158447 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 128.60800325870514, - "p90": 133.53599607944489, - "p95": 135.51999628543854, - "p99": 138.49599659442902 - }, - "combine": { - "p50": 112.57600039243698, - "p90": 120.4800009727478, - "p95": 120.7680031657219, - "p99": 122.40000069141388 - }, - "roundtrip": { - "p50": 208.3519995212555, - "p90": 215.71199595928192, - "p95": 217.56799519062042, - "p99": 220.5439954996109 - }, - "isolatedSum": { - "p50": 241.18400365114212, - "p90": 254.0159970521927, - "p95": 256.28799945116043, - "p99": 260.8959972858429 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b5d97134", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.1|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:27:16.815311+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.1, - "achievedFraction": 0.0985, - "configuredUnits": 13, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254271442", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254271442", - "createdAt": "2026-06-26T17:27:16.815311+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.24800288677216, - "p90": 103.39199751615524, - "p95": 105.8880016207695, - "p99": 111.13599687814713 - }, - "combine": { - "p50": 78.84799689054489, - "p90": 81.727996468544, - "p95": 85.11999994516373, - "p99": 89.02399986982346 - }, - "roundtrip": { - "p50": 151.36000514030457, - "p90": 157.53600001335144, - "p95": 159.67999398708344, - "p99": 164.63999450206757 - }, - "isolatedSum": { - "p50": 176.09599977731705, - "p90": 185.11999398469925, - "p95": 191.00800156593323, - "p99": 200.15999674797058 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 99.29600358009338, - "p90": 104.70400005578995, - "p95": 106.72000050544739, - "p99": 113.53600025177002 - }, - "combine": { - "p50": 79.58400249481201, - "p90": 86.97599917650223, - "p95": 87.39200234413147, - "p99": 91.5519967675209 - }, - "roundtrip": { - "p50": 153.85599434375763, - "p90": 161.28000617027283, - "p95": 162.432000041008, - "p99": 166.07999801635742 - }, - "isolatedSum": { - "p50": 178.8800060749054, - "p90": 191.67999923229218, - "p95": 194.11200284957886, - "p99": 205.08799701929092 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 103.29599678516388, - "p90": 107.64800012111664, - "p95": 109.98400300741196, - "p99": 121.40800058841705 - }, - "combine": { - "p50": 87.74399757385254, - "p90": 95.20000219345093, - "p95": 95.48799693584442, - "p99": 97.18400239944458 - }, - "roundtrip": { - "p50": 161.6639941930771, - "p90": 169.50400173664093, - "p95": 170.9440052509308, - "p99": 175.52000284194946 - }, - "isolatedSum": { - "p50": 191.03999435901642, - "p90": 202.84800231456757, - "p95": 205.47199994325638, - "p99": 218.59200298786163 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 129.66400384902954, - "p90": 137.79200613498688, - "p95": 139.55199718475342, - "p99": 143.93599331378937 - }, - "combine": { - "p50": 113.72800171375275, - "p90": 120.15999853610992, - "p95": 120.83200365304947, - "p99": 123.55200201272964 - }, - "roundtrip": { - "p50": 211.776003241539, - "p90": 217.21599996089935, - "p95": 218.9439982175827, - "p99": 222.75200486183167 - }, - "isolatedSum": { - "p50": 243.3920055627823, - "p90": 257.9520046710968, - "p95": 260.3840008378029, - "p99": 267.487995326519 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2f9f6948", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:02.253264+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254315809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", - "createdAt": "2026-06-26T17:29:02.253264+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.74399888515472, - "p90": 102.78400033712387, - "p95": 104.99200224876404, - "p99": 109.37599837779999 - }, - "combine": { - "p50": 79.32800054550171, - "p90": 82.07999914884567, - "p95": 82.87999778985977, - "p99": 88.03199976682663 - }, - "roundtrip": { - "p50": 147.74399995803833, - "p90": 154.6880006790161, - "p95": 157.44000673294067, - "p99": 171.9360053539276 - }, - "isolatedSum": { - "p50": 175.07199943065643, - "p90": 184.86399948596954, - "p95": 187.8720000386238, - "p99": 197.40799814462662 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.23199850320816, - "p90": 101.27999633550644, - "p95": 102.52799838781357, - "p99": 107.87200182676315 - }, - "combine": { - "p50": 72.22399860620499, - "p90": 80.92799782752991, - "p95": 81.44000172615051, - "p99": 84.76799726486206 - }, - "roundtrip": { - "p50": 127.45599448680878, - "p90": 153.02400290966034, - "p95": 155.64799308776855, - "p99": 159.4880074262619 - }, - "isolatedSum": { - "p50": 143.45599710941315, - "p90": 182.20799416303635, - "p95": 183.96800011396408, - "p99": 192.6399990916252 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 95.23200243711472, - "p90": 102.36799716949463, - "p95": 107.84000158309937, - "p99": 439.64800238609314 - }, - "combine": { - "p50": 72.95999675989151, - "p90": 81.66400343179703, - "p95": 86.81599795818329, - "p99": 88.92799913883209 - }, - "roundtrip": { - "p50": 128.7360042333603, - "p90": 159.19999778270721, - "p95": 161.31199896335602, - "p99": 167.1680063009262 - }, - "isolatedSum": { - "p50": 168.19199919700623, - "p90": 184.03200060129166, - "p95": 194.65599954128265, - "p99": 528.5760015249252 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 95.42399644851685, - "p90": 102.52799838781357, - "p95": 104.89600151777267, - "p99": 113.53600025177002 - }, - "combine": { - "p50": 79.58400249481201, - "p90": 82.91199803352356, - "p95": 87.07199990749359, - "p99": 87.96799927949905 - }, - "roundtrip": { - "p50": 151.48800611495972, - "p90": 159.90400314331055, - "p95": 162.20800578594208, - "p99": 169.47199404239655 - }, - "isolatedSum": { - "p50": 175.00799894332886, - "p90": 185.43999642133713, - "p95": 191.96800142526627, - "p99": 201.50399953126907 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 95.71199864149094, - "p90": 100.8640006184578, - "p95": 102.68799960613251, - "p99": 106.49599879980087 - }, - "combine": { - "p50": 80.64000308513641, - "p90": 87.90399879217148, - "p95": 89.24800157546997, - "p99": 95.23200243711472 - }, - "roundtrip": { - "p50": 152.319997549057, - "p90": 160.19199788570404, - "p95": 162.23999857902527, - "p99": 168.92799735069275 - }, - "isolatedSum": { - "p50": 176.35200172662735, - "p90": 188.76799941062927, - "p95": 191.93600118160248, - "p99": 201.7280012369156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 80.86399734020233, - "p90": 103.26399654150009, - "p95": 105.47199845314026, - "p99": 113.18399757146835 - }, - "combine": { - "p50": 80.35200089216232, - "p90": 89.31200206279755, - "p95": 90.04800021648407, - "p99": 95.74399888515472 - }, - "roundtrip": { - "p50": 136.48000359535217, - "p90": 164.60800170898438, - "p95": 167.10400581359863, - "p99": 175.10400712490082 - }, - "isolatedSum": { - "p50": 161.21599823236465, - "p90": 192.57599860429764, - "p95": 195.51999866962433, - "p99": 208.92799645662308 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 103.4879982471466, - "p90": 112.8000020980835, - "p95": 114.3679991364479, - "p99": 125.72799623012543 - }, - "combine": { - "p50": 96.83199971914291, - "p90": 104.12800312042236, - "p95": 104.99200224876404, - "p99": 106.33599758148193 - }, - "roundtrip": { - "p50": 170.71999609470367, - "p90": 181.21600151062012, - "p95": 182.91200697422028, - "p99": 186.81600689888 - }, - "isolatedSum": { - "p50": 200.31999796628952, - "p90": 216.92800521850586, - "p95": 219.36000138521194, - "p99": 232.06399381160736 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.29599809646606, - "p90": 130.87999820709229, - "p95": 133.5040032863617, - "p99": 139.93600010871887 - }, - "combine": { - "p50": 106.27199709415436, - "p90": 119.58400160074234, - "p95": 119.99999731779099, - "p99": 122.3360002040863 - }, - "roundtrip": { - "p50": 197.56799936294556, - "p90": 215.80800414085388, - "p95": 217.92000532150269, - "p99": 219.80799734592438 - }, - "isolatedSum": { - "p50": 217.56799519062042, - "p90": 250.46399980783463, - "p95": 253.50400060415268, - "p99": 262.2720003128052 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-3752524d", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.6|8c8497a77d9085d", - "colorKey": "h100_7b3247bf", - "comparisonKey": "b51e047646ec8fac", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:39.045176+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.6, - "achievedFraction": 0.5985, - "configuredUnits": 79, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254286950", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254286950", - "createdAt": "2026-06-26T17:30:39.045176+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.28800302743912, - "p90": 103.55199873447418, - "p95": 105.66399991512299, - "p99": 108.51199924945831 - }, - "combine": { - "p50": 79.1039988398552, - "p90": 81.37600123882294, - "p95": 84.89599823951721, - "p99": 89.91999924182892 - }, - "roundtrip": { - "p50": 146.27200365066528, - "p90": 156.38400614261627, - "p95": 161.82400286197662, - "p99": 219.2319929599762 - }, - "isolatedSum": { - "p50": 175.3920018672943, - "p90": 184.92799997329712, - "p95": 190.5599981546402, - "p99": 198.43199849128723 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 96.70399874448776, - "p90": 102.30399668216705, - "p95": 104.51199859380722, - "p99": 112.22399771213531 - }, - "combine": { - "p50": 79.58400249481201, - "p90": 87.3280018568039, - "p95": 87.80799806118011, - "p99": 89.9519994854927 - }, - "roundtrip": { - "p50": 153.3759981393814, - "p90": 161.21600568294525, - "p95": 162.56000101566315, - "p99": 166.72000288963318 - }, - "isolatedSum": { - "p50": 176.28800123929977, - "p90": 189.63199853897095, - "p95": 192.31999665498734, - "p99": 202.17599719762802 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 102.88000106811523, - "p90": 106.81600123643875, - "p95": 109.0560033917427, - "p99": 114.3679991364479 - }, - "combine": { - "p50": 87.99999952316284, - "p90": 95.48799693584442, - "p95": 96.22400254011154, - "p99": 119.1679984331131 - }, - "roundtrip": { - "p50": 161.95200383663177, - "p90": 170.0800061225891, - "p95": 172.5119948387146, - "p99": 460.7999920845032 - }, - "isolatedSum": { - "p50": 190.88000059127808, - "p90": 202.30399817228317, - "p95": 205.28000593185425, - "p99": 233.535997569561 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 129.08799946308136, - "p90": 135.80800592899323, - "p95": 137.56799697875977, - "p99": 142.14399456977844 - }, - "combine": { - "p50": 113.27999830245972, - "p90": 120.44800072908401, - "p95": 120.67200243473053, - "p99": 123.74400347471237 - }, - "roundtrip": { - "p50": 211.5200012922287, - "p90": 218.176007270813, - "p95": 219.64800357818604, - "p99": 223.68000447750092 - }, - "isolatedSum": { - "p50": 242.36799776554108, - "p90": 256.25600665807724, - "p95": 258.2399994134903, - "p99": 265.8879980444908 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-7db267e7", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", - "colorKey": "h100_716e65b9", - "comparisonKey": "259b0e9f1092ac0e", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:32:00.320566+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254367516", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", - "createdAt": "2026-06-26T17:32:00.320566+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 95.93600034713745, - "p90": 103.00800204277039, - "p95": 104.38399761915207, - "p99": 107.64800012111664 - }, - "combine": { - "p50": 81.08799904584885, - "p90": 87.93599903583527, - "p95": 88.60799670219421, - "p99": 90.36800265312195 - }, - "roundtrip": { - "p50": 151.2639969587326, - "p90": 158.9760035276413, - "p95": 160.73599457740784, - "p99": 164.06400501728058 - }, - "isolatedSum": { - "p50": 177.0239993929863, - "p90": 190.94400107860565, - "p95": 192.99199432134628, - "p99": 198.0160027742386 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.23999905586243, - "p90": 96.79999947547913, - "p95": 100.00000149011612, - "p99": 103.7760004401207 - }, - "combine": { - "p50": 73.98399710655212, - "p90": 87.64799684286118, - "p95": 88.54400366544724, - "p99": 89.66399729251862 - }, - "roundtrip": { - "p50": 127.32799351215363, - "p90": 158.1439971923828, - "p95": 159.32799875736237, - "p99": 162.52799332141876 - }, - "isolatedSum": { - "p50": 148.22399616241455, - "p90": 184.4479963183403, - "p95": 188.54400515556335, - "p99": 193.4399977326393 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.87999647855759, - "p90": 99.5199978351593, - "p95": 103.20000350475311, - "p99": 106.62399977445602 - }, - "combine": { - "p50": 73.95199686288834, - "p90": 87.74399757385254, - "p95": 88.06400001049042, - "p99": 88.76799792051315 - }, - "roundtrip": { - "p50": 127.80800461769104, - "p90": 156.3519984483719, - "p95": 158.81599485874176, - "p99": 162.33600676059723 - }, - "isolatedSum": { - "p50": 148.83199334144592, - "p90": 187.26399540901184, - "p95": 191.26400351524353, - "p99": 195.39199769496918 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 94.36800330877304, - "p90": 100.09600222110748, - "p95": 101.95200145244598, - "p99": 107.4879989027977 - }, - "combine": { - "p50": 80.92799782752991, - "p90": 88.03199976682663, - "p95": 88.86399865150452, - "p99": 89.79199826717377 - }, - "roundtrip": { - "p50": 149.85600113868713, - "p90": 156.95999562740326, - "p95": 158.1760048866272, - "p99": 161.98399662971497 - }, - "isolatedSum": { - "p50": 175.29600113630295, - "p90": 188.1280019879341, - "p95": 190.8160001039505, - "p99": 197.27999716997147 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 94.36800330877304, - "p90": 104.80000078678131, - "p95": 106.78400099277496, - "p99": 115.00799655914307 - }, - "combine": { - "p50": 86.59200370311737, - "p90": 88.76799792051315, - "p95": 89.56799656152725, - "p99": 96.83199971914291 - }, - "roundtrip": { - "p50": 150.11200308799744, - "p90": 161.50400042533875, - "p95": 166.24000668525696, - "p99": 490.62401056289673 - }, - "isolatedSum": { - "p50": 180.9600070118904, - "p90": 193.56799870729446, - "p95": 196.35199755430222, - "p99": 211.83999627828598 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 87.0399996638298, - "p90": 106.04800283908844, - "p95": 110.1439967751503, - "p99": 123.83999675512314 - }, - "combine": { - "p50": 82.5280025601387, - "p90": 96.3200032711029, - "p95": 96.73599898815155, - "p99": 97.56799787282944 - }, - "roundtrip": { - "p50": 143.5839980840683, - "p90": 166.55999422073364, - "p95": 168.7680035829544, - "p99": 175.55199563503265 - }, - "isolatedSum": { - "p50": 169.5680022239685, - "p90": 202.36800611019135, - "p95": 206.87999576330185, - "p99": 221.40799462795258 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 116.92799627780914, - "p90": 126.3359934091568, - "p95": 128.63999605178833, - "p99": 132.6719969511032 - }, - "combine": { - "p50": 104.19200360774994, - "p90": 112.06399649381638, - "p95": 112.99200356006622, - "p99": 113.76000195741653 - }, - "roundtrip": { - "p50": 190.49599766731262, - "p90": 199.74400103092194, - "p95": 202.36800611019135, - "p99": 204.76800203323364 - }, - "isolatedSum": { - "p50": 221.11999988555908, - "p90": 238.39998990297318, - "p95": 241.63199961185455, - "p99": 246.43199890851974 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 129.85600531101227, - "p90": 152.96000242233276, - "p95": 154.78399395942688, - "p99": 158.87999534606934 - }, - "combine": { - "p50": 121.2799996137619, - "p90": 129.43999469280243, - "p95": 130.3360015153885, - "p99": 145.34400403499603 - }, - "roundtrip": { - "p50": 226.8799990415573, - "p90": 240.31999707221985, - "p95": 242.01600253582, - "p99": 245.02399563789368 - }, - "isolatedSum": { - "p50": 251.13600492477417, - "p90": 282.3999971151352, - "p95": 285.11999547481537, - "p99": 304.22399938106537 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c5b168ae", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", - "colorKey": "h100_f7ec28aa", - "comparisonKey": "9896b8e4d81bc6a5", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:32:03.917674+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254376151", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", - "createdAt": "2026-06-26T17:32:03.917674+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 96.89600020647049, - "p90": 104.032002389431, - "p95": 106.04800283908844, - "p99": 111.04000359773636 - }, - "combine": { - "p50": 74.36800003051758, - "p90": 80.03199845552444, - "p95": 81.31200075149536, - "p99": 82.68799632787704 - }, - "roundtrip": { - "p50": 145.82400023937225, - "p90": 153.76000106334686, - "p95": 160.0639969110489, - "p99": 226.30399465560913 - }, - "isolatedSum": { - "p50": 171.26400023698807, - "p90": 184.06400084495544, - "p95": 187.3600035905838, - "p99": 193.7279999256134 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.72000205516815, - "p90": 103.93600165843964, - "p95": 105.18400371074677, - "p99": 113.63200098276138 - }, - "combine": { - "p50": 71.35999947786331, - "p90": 80.32000064849854, - "p95": 81.18399977684021, - "p99": 88.16000074148178 - }, - "roundtrip": { - "p50": 126.68800354003906, - "p90": 152.5759994983673, - "p95": 155.32800555229187, - "p99": 159.29600596427917 - }, - "isolatedSum": { - "p50": 142.08000153303146, - "p90": 184.25600230693817, - "p95": 186.36800348758698, - "p99": 201.79200172424316 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 70.14399766921997, - "p90": 100.28800368309021, - "p95": 102.55999863147736, - "p99": 131.71200454235077 - }, - "combine": { - "p50": 71.61600142717361, - "p90": 79.55200225114822, - "p95": 79.74400371313095, - "p99": 84.22400057315826 - }, - "roundtrip": { - "p50": 127.77599692344666, - "p90": 153.50399911403656, - "p95": 155.2640050649643, - "p99": 160.73599457740784 - }, - "isolatedSum": { - "p50": 141.75999909639359, - "p90": 179.84000593423843, - "p95": 182.3040023446083, - "p99": 215.93600511550903 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 94.97600048780441, - "p90": 100.832000374794, - "p95": 102.30399668216705, - "p99": 114.3999993801117 - }, - "combine": { - "p50": 71.52000069618225, - "p90": 81.18399977684021, - "p95": 81.7599967122078, - "p99": 86.94399893283844 - }, - "roundtrip": { - "p50": 125.31200051307678, - "p90": 153.05599570274353, - "p95": 156.0640037059784, - "p99": 159.42400693893433 - }, - "isolatedSum": { - "p50": 166.49600118398666, - "p90": 182.01600015163422, - "p95": 184.06399339437485, - "p99": 201.34399831295013 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 95.551997423172, - "p90": 100.89600086212158, - "p95": 103.26399654150009, - "p99": 112.31999844312668 - }, - "combine": { - "p50": 79.48800176382065, - "p90": 86.87999844551086, - "p95": 87.71199733018875, - "p99": 88.22400122880936 - }, - "roundtrip": { - "p50": 149.79200065135956, - "p90": 158.24000537395477, - "p95": 160.0320041179657, - "p99": 165.69599509239197 - }, - "isolatedSum": { - "p50": 175.03999918699265, - "p90": 187.77599930763245, - "p95": 190.97599387168884, - "p99": 200.54399967193604 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.16799998283386, - "p90": 99.96800124645233, - "p95": 104.96000200510025, - "p99": 109.11999642848969 - }, - "combine": { - "p50": 79.8719972372055, - "p90": 87.93599903583527, - "p95": 89.28000181913376, - "p99": 95.39200365543365 - }, - "roundtrip": { - "p50": 135.26399433612823, - "p90": 159.19999778270721, - "p95": 161.72799468040466, - "p99": 166.6560024023056 - }, - "isolatedSum": { - "p50": 163.03999722003937, - "p90": 187.9040002822876, - "p95": 194.240003824234, - "p99": 204.51200008392334 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 100.832000374794, - "p90": 114.68800157308578, - "p95": 116.67200177907944, - "p99": 134.91199910640717 - }, - "combine": { - "p50": 90.27200192213058, - "p90": 103.32799702882767, - "p95": 104.16000336408615, - "p99": 152.12799608707428 - }, - "roundtrip": { - "p50": 164.70399498939514, - "p90": 182.8480064868927, - "p95": 186.49600446224213, - "p99": 189.40800428390503 - }, - "isolatedSum": { - "p50": 191.1040022969246, - "p90": 218.01599860191345, - "p95": 220.8320051431656, - "p99": 287.03999519348145 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 121.31199985742569, - "p90": 139.67999815940857, - "p95": 144.57599818706512, - "p99": 150.87999403476715 - }, - "combine": { - "p50": 112.99200356006622, - "p90": 120.64000219106674, - "p95": 120.80000340938568, - "p99": 128.51199507713318 - }, - "roundtrip": { - "p50": 212.67199516296387, - "p90": 228.4799963235855, - "p95": 230.0799936056137, - "p99": 235.74399948120117 - }, - "isolatedSum": { - "p50": 234.3040034174919, - "p90": 260.3200003504753, - "p95": 265.3760015964508, - "p99": 279.39198911190033 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-cf899bce", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", - "colorKey": "h100_93503624", - "comparisonKey": "74d307ed048ea3b5", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:46:24.194442+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28255296001", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", - "createdAt": "2026-06-26T17:46:24.194442+00:00", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 69.72800195217133, - "p90": 75.83999633789062, - "p95": 77.85599678754807, - "p99": 83.39200168848038 - }, - "combine": { - "p50": 71.26399874687195, - "p90": 73.40800017118454, - "p95": 74.0479975938797, - "p99": 78.87999713420868 - }, - "roundtrip": { - "p50": 121.85599654912949, - "p90": 128.12800705432892, - "p95": 130.3039938211441, - "p99": 134.71999764442444 - }, - "isolatedSum": { - "p50": 140.99200069904327, - "p90": 149.24799650907516, - "p95": 151.90399438142776, - "p99": 162.27199882268906 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.3359991312027, - "p90": 76.25599950551987, - "p95": 78.59200239181519, - "p99": 84.6719965338707 - }, - "combine": { - "p50": 71.16799801588058, - "p90": 73.53600114583969, - "p95": 74.27199929952621, - "p99": 79.80799674987793 - }, - "roundtrip": { - "p50": 127.20000743865967, - "p90": 131.00799918174744, - "p95": 133.27999413013458, - "p99": 138.08000087738037 - }, - "isolatedSum": { - "p50": 141.50399714708328, - "p90": 149.79200065135956, - "p95": 152.8640016913414, - "p99": 164.47999328374863 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.18399846553802, - "p90": 102.14400291442871, - "p95": 105.50399869680405, - "p99": 108.44799876213074 - }, - "combine": { - "p50": 73.40800017118454, - "p90": 81.82399719953537, - "p95": 87.10400015115738, - "p99": 88.95999938249588 - }, - "roundtrip": { - "p50": 131.8719983100891, - "p90": 160.3199988603592, - "p95": 162.88000345230103, - "p99": 167.1680063009262 - }, - "isolatedSum": { - "p50": 146.59199863672256, - "p90": 183.96800011396408, - "p95": 192.60799884796143, - "p99": 197.40799814462662 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 70.30399888753891, - "p90": 78.20799946784973, - "p95": 81.02399855852127, - "p99": 89.4400030374527 - }, - "combine": { - "p50": 72.7040022611618, - "p90": 73.91999661922455, - "p95": 74.27199929952621, - "p99": 79.58400249481201 - }, - "roundtrip": { - "p50": 128.67200374603271, - "p90": 132.83200562000275, - "p95": 135.0719928741455, - "p99": 140.22399485111237 - }, - "isolatedSum": { - "p50": 143.0080011487007, - "p90": 152.12799608707428, - "p95": 155.29599785804749, - "p99": 169.0240055322647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 82.97599852085114, - "p90": 102.14400291442871, - "p95": 104.70400005578995, - "p99": 109.56799983978271 - }, - "combine": { - "p50": 74.30399954319, - "p90": 87.87199854850769, - "p95": 89.12000060081482, - "p99": 89.9519994854927 - }, - "roundtrip": { - "p50": 132.4480026960373, - "p90": 161.47199273109436, - "p95": 163.26400637626648, - "p99": 166.9120043516159 - }, - "isolatedSum": { - "p50": 157.27999806404114, - "p90": 190.0160014629364, - "p95": 193.82400065660477, - "p99": 199.51999932527542 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.40800148248672, - "p90": 103.80800068378448, - "p95": 105.3759977221489, - "p99": 108.0000028014183 - }, - "combine": { - "p50": 79.77599650621414, - "p90": 90.08000046014786, - "p95": 90.71999788284302, - "p99": 247.67999351024628 - }, - "roundtrip": { - "p50": 138.17599415779114, - "p90": 156.3519984483719, - "p95": 159.7760021686554, - "p99": 163.83999586105347 - }, - "isolatedSum": { - "p50": 161.18399798870087, - "p90": 193.88800114393234, - "p95": 196.0959956049919, - "p99": 355.6799963116646 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 99.74399954080582, - "p90": 115.35999923944473, - "p95": 117.37599968910217, - "p99": 125.2799928188324 - }, - "combine": { - "p50": 90.55999666452408, - "p90": 103.61599922180176, - "p95": 104.19200360774994, - "p99": 104.8320010304451 - }, - "roundtrip": { - "p50": 163.87200355529785, - "p90": 178.0479997396469, - "p95": 180.2240014076233, - "p99": 185.47199666500092 - }, - "isolatedSum": { - "p50": 190.3039962053299, - "p90": 218.9759984612465, - "p95": 221.5680032968521, - "p99": 230.1119938492775 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 113.66400122642517, - "p90": 132.22399353981018, - "p95": 133.88800621032715, - "p99": 139.64800536632538 - }, - "combine": { - "p50": 106.59199953079224, - "p90": 114.75200206041336, - "p95": 119.99999731779099, - "p99": 121.91999703645706 - }, - "roundtrip": { - "p50": 198.91199469566345, - "p90": 213.69600296020508, - "p95": 216.0319983959198, - "p99": 220.60799598693848 - }, - "isolatedSum": { - "p50": 220.2560007572174, - "p90": 246.97599560022354, - "p95": 253.88800352811813, - "p99": 261.56800240278244 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4eb12954", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_5df912ff", - "comparisonKey": "5074d4febd922e2d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:28:11.272284+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254332840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", - "createdAt": "2026-06-26T17:28:11.272284+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 78.91199737787247, - "p90": 85.21600067615509, - "p95": 87.20000088214874, - "p99": 93.34400296211243 - }, - "combine": { - "p50": 79.68000322580338, - "p90": 81.60000294446945, - "p95": 86.91199868917465, - "p99": 88.54400366544724 - }, - "roundtrip": { - "p50": 133.69600474834442, - "p90": 141.184002161026, - "p95": 143.2960033416748, - "p99": 151.48800611495972 - }, - "isolatedSum": { - "p50": 158.59200060367584, - "p90": 166.81600362062454, - "p95": 174.1119995713234, - "p99": 181.88800662755966 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 62.39999830722809, - "p90": 84.35200154781342, - "p95": 87.00799942016602, - "p99": 96.57599776983261 - }, - "combine": { - "p50": 71.99999690055847, - "p90": 81.02399855852127, - "p95": 81.44000172615051, - "p99": 87.80799806118011 - }, - "roundtrip": { - "p50": 116.7680025100708, - "p90": 140.00000059604645, - "p95": 141.6960060596466, - "p99": 143.96800100803375 - }, - "isolatedSum": { - "p50": 134.39999520778656, - "p90": 165.3760001063347, - "p95": 168.44800114631653, - "p99": 184.38399583101273 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 62.01599910855293, - "p90": 82.56000280380249, - "p95": 84.76799726486206, - "p99": 91.90399944782257 - }, - "combine": { - "p50": 72.89600372314453, - "p90": 86.94399893283844, - "p95": 87.61599659919739, - "p99": 88.22400122880936 - }, - "roundtrip": { - "p50": 116.57600104808807, - "p90": 143.13599467277527, - "p95": 144.96000111103058, - "p99": 189.40800428390503 - }, - "isolatedSum": { - "p50": 134.91200283169746, - "p90": 169.50400173664093, - "p95": 172.38399386405945, - "p99": 180.12800067663193 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 66.78400188684464, - "p90": 82.46400207281113, - "p95": 85.1840004324913, - "p99": 90.65599739551544 - }, - "combine": { - "p50": 73.02399724721909, - "p90": 86.87999844551086, - "p95": 87.55200356245041, - "p99": 88.57599645853043 - }, - "roundtrip": { - "p50": 116.67200177907944, - "p90": 142.4960047006607, - "p95": 143.64799857139587, - "p99": 149.1200029850006 - }, - "isolatedSum": { - "p50": 139.80799913406372, - "p90": 169.344000518322, - "p95": 172.7360039949417, - "p99": 179.23199385404587 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 78.97599786520004, - "p90": 84.83199775218964, - "p95": 86.94399893283844, - "p99": 90.87999910116196 - }, - "combine": { - "p50": 80.4160013794899, - "p90": 87.99999952316284, - "p95": 88.25600147247314, - "p99": 89.75999802350998 - }, - "roundtrip": { - "p50": 116.73600226640701, - "p90": 140.00000059604645, - "p95": 143.23200285434723, - "p99": 146.94400131702423 - }, - "isolatedSum": { - "p50": 159.39199924468994, - "p90": 172.83199727535248, - "p95": 175.20000040531158, - "p99": 180.63999712467194 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 75.58400183916092, - "p90": 84.6719965338707, - "p95": 86.20800077915192, - "p99": 90.97599983215332 - }, - "combine": { - "p50": 80.19199967384338, - "p90": 88.51200342178345, - "p95": 95.10400146245956, - "p99": 111.77600175142288 - }, - "roundtrip": { - "p50": 143.16800236701965, - "p90": 153.28000485897064, - "p95": 154.7520011663437, - "p99": 170.6240028142929 - }, - "isolatedSum": { - "p50": 155.7760015130043, - "p90": 173.18399995565414, - "p95": 181.31200224161148, - "p99": 202.7520015835762 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 85.15200018882751, - "p90": 96.67199850082397, - "p95": 98.30400347709656, - "p99": 158.65600109100342 - }, - "combine": { - "p50": 91.20000153779984, - "p90": 105.02400249242783, - "p95": 106.04800283908844, - "p99": 127.87200510501862 - }, - "roundtrip": { - "p50": 151.8079936504364, - "p90": 167.67999529838562, - "p95": 172.06400632858276, - "p99": 198.2399970293045 - }, - "isolatedSum": { - "p50": 176.35200172662735, - "p90": 201.6960009932518, - "p95": 204.352006316185, - "p99": 286.52800619602203 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 104.73600029945374, - "p90": 119.64800208806992, - "p95": 128.03199887275696, - "p99": 401.43999457359314 - }, - "combine": { - "p50": 106.49599879980087, - "p90": 120.83200365304947, - "p95": 121.47200107574463, - "p99": 128.00000607967377 - }, - "roundtrip": { - "p50": 187.45599687099457, - "p90": 201.34399831295013, - "p95": 202.55999267101288, - "p99": 206.68800175189972 - }, - "isolatedSum": { - "p50": 211.2319990992546, - "p90": 240.48000574111938, - "p95": 249.5039999485016, - "p99": 529.4400006532669 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-76b84ec2", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_17694d2c", - "comparisonKey": "d31efe4aa43e0223", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:16.080205+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271551406", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271551406", - "createdAt": "2026-06-26T23:47:16.080205+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 77.7600035071373, - "p90": 84.25600081682205, - "p95": 86.496002972126, - "p99": 92.57599711418152 - }, - "combine": { - "p50": 75.9039968252182, - "p90": 81.95199817419052, - "p95": 82.40000158548355, - "p99": 87.2960016131401 - }, - "roundtrip": { - "p50": 131.45600259304047, - "p90": 136.25599443912506, - "p95": 138.59200477600098, - "p99": 142.68800616264343 - }, - "isolatedSum": { - "p50": 153.6640003323555, - "p90": 166.20799899101257, - "p95": 168.89600455760956, - "p99": 179.87199872732162 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 66.23999774456024, - "p90": 80.99199831485748, - "p95": 83.13599973917007, - "p99": 87.52000331878662 - }, - "combine": { - "p50": 72.06399738788605, - "p90": 81.85599744319916, - "p95": 82.11199939250946, - "p99": 85.91999858617783 - }, - "roundtrip": { - "p50": 115.55200070142746, - "p90": 136.06399297714233, - "p95": 137.9839926958084, - "p99": 142.4960047006607 - }, - "isolatedSum": { - "p50": 138.3039951324463, - "p90": 162.84799575805664, - "p95": 165.24799913167953, - "p99": 173.44000190496445 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 77.60000228881836, - "p90": 81.69600367546082, - "p95": 83.93599838018417, - "p99": 89.02399986982346 - }, - "combine": { - "p50": 79.52000200748444, - "p90": 82.20800012350082, - "p95": 83.16799998283386, - "p99": 87.2960016131401 - }, - "roundtrip": { - "p50": 133.82400572299957, - "p90": 140.86399972438812, - "p95": 143.10400187969208, - "p99": 149.72800016403198 - }, - "isolatedSum": { - "p50": 157.1200042963028, - "p90": 163.90400379896164, - "p95": 167.10399836301804, - "p99": 176.32000148296356 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 77.66400277614594, - "p90": 83.13599973917007, - "p95": 87.8399983048439, - "p99": 131.67999684810638 - }, - "combine": { - "p50": 81.216000020504, - "p90": 82.71999657154083, - "p95": 84.03199911117554, - "p99": 90.20800143480301 - }, - "roundtrip": { - "p50": 134.68800485134125, - "p90": 139.55199718475342, - "p95": 142.752006649971, - "p99": 145.56799829006195 - }, - "isolatedSum": { - "p50": 158.88000279664993, - "p90": 165.8559963107109, - "p95": 171.87199741601944, - "p99": 221.8879982829094 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 77.79199630022049, - "p90": 81.66400343179703, - "p95": 84.73599702119827, - "p99": 87.23200112581253 - }, - "combine": { - "p50": 81.69600367546082, - "p90": 84.79999750852585, - "p95": 88.95999938249588, - "p99": 90.27200192213058 - }, - "roundtrip": { - "p50": 135.29600203037262, - "p90": 143.5839980840683, - "p95": 144.96000111103058, - "p99": 150.30400454998016 - }, - "isolatedSum": { - "p50": 159.4879999756813, - "p90": 166.46400094032288, - "p95": 173.69599640369415, - "p99": 177.50400304794312 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.42400193214417, - "p90": 88.3840024471283, - "p95": 89.28000181913376, - "p99": 95.20000219345093 - }, - "combine": { - "p50": 81.44000172615051, - "p90": 89.9839997291565, - "p95": 90.27200192213058, - "p99": 92.47999638319016 - }, - "roundtrip": { - "p50": 129.18399274349213, - "p90": 144.51199769973755, - "p95": 147.0080018043518, - "p99": 152.73599326610565 - }, - "isolatedSum": { - "p50": 164.86400365829468, - "p90": 178.3680021762848, - "p95": 179.55200374126434, - "p99": 187.67999857664108 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 93.56799721717834, - "p90": 101.40799731016159, - "p95": 102.36799716949463, - "p99": 109.47199910879135 - }, - "combine": { - "p50": 94.81599926948547, - "p90": 99.61599856615067, - "p95": 102.33599692583084, - "p99": 105.82400113344193 - }, - "roundtrip": { - "p50": 158.78400206565857, - "p90": 165.72800278663635, - "p95": 167.04000532627106, - "p99": 170.01600563526154 - }, - "isolatedSum": { - "p50": 188.38399648666382, - "p90": 201.02399587631226, - "p95": 204.70399409532547, - "p99": 215.29600024223328 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 112.41599917411804, - "p90": 120.4800009727478, - "p95": 123.48800152540207, - "p99": 303.6800026893616 - }, - "combine": { - "p50": 111.90400272607803, - "p90": 117.34399944543839, - "p95": 120.03199756145477, - "p99": 125.08800625801086 - }, - "roundtrip": { - "p50": 192.80000030994415, - "p90": 199.74400103092194, - "p95": 201.9519954919815, - "p99": 206.9759964942932 - }, - "isolatedSum": { - "p50": 224.32000190019608, - "p90": 237.8240004181862, - "p95": 243.51999908685684, - "p99": 428.76800894737244 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6f4d88a5", - "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_8abde1a9", - "comparisonKey": "a63125ec759ccc03", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:24.132792+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271587010", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271587010", - "createdAt": "2026-06-26T23:48:24.132792+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 47.29599878191948, - "p90": 48.70399832725525, - "p95": 49.02400076389313, - "p99": 54.75199967622757 - }, - "combine": { - "p50": 36.57599911093712, - "p90": 37.408001720905304, - "p95": 38.59199956059456, - "p99": 44.60800066590309 - }, - "roundtrip": { - "p50": 58.97599831223488, - "p90": 66.6240006685257, - "p95": 67.1359971165657, - "p99": 67.6800012588501 - }, - "isolatedSum": { - "p50": 83.8719978928566, - "p90": 86.11200004816055, - "p95": 87.61600032448769, - "p99": 99.36000034213066 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 40.32000154256821, - "p90": 48.51200059056282, - "p95": 48.73599857091904, - "p99": 53.82400006055832 - }, - "combine": { - "p50": 35.77600046992302, - "p90": 37.02399879693985, - "p95": 38.94399851560593, - "p99": 44.47999969124794 - }, - "roundtrip": { - "p50": 56.57599866390228, - "p90": 65.05600363016129, - "p95": 66.27199798822403, - "p99": 67.07199662923813 - }, - "isolatedSum": { - "p50": 76.09600201249123, - "p90": 85.53599938750267, - "p95": 87.67999708652496, - "p99": 98.30399975180626 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.27200150489807, - "p90": 48.70399832725525, - "p95": 49.056001007556915, - "p99": 55.39200082421303 - }, - "combine": { - "p50": 36.70400008559227, - "p90": 37.50399872660637, - "p95": 43.07200014591217, - "p99": 45.05600035190582 - }, - "roundtrip": { - "p50": 59.167999774217606, - "p90": 66.880002617836, - "p95": 67.45599955320358, - "p99": 68.57600063085556 - }, - "isolatedSum": { - "p50": 78.97600159049034, - "p90": 86.20799705386162, - "p95": 92.12800115346909, - "p99": 100.44800117611885 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 47.359999269247055, - "p90": 48.70399832725525, - "p95": 48.895999789237976, - "p99": 55.26399984955788 - }, - "combine": { - "p50": 36.57599911093712, - "p90": 43.2640016078949, - "p95": 43.776001781225204, - "p99": 45.024000108242035 - }, - "roundtrip": { - "p50": 64.67200070619583, - "p90": 67.10399687290192, - "p95": 67.29599833488464, - "p99": 69.47200000286102 - }, - "isolatedSum": { - "p50": 83.93599838018417, - "p90": 91.96799993515015, - "p95": 92.67200157046318, - "p99": 100.28799995779991 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 48.448000103235245, - "p90": 55.64799904823303, - "p95": 56.2559999525547, - "p99": 56.89600110054016 - }, - "combine": { - "p50": 43.776001781225204, - "p90": 44.73600164055824, - "p95": 44.89599913358688, - "p99": 48.22399839758873 - }, - "roundtrip": { - "p50": 66.880002617836, - "p90": 73.82400333881378, - "p95": 74.68800246715546, - "p99": 75.29599964618683 - }, - "isolatedSum": { - "p50": 92.22400188446045, - "p90": 100.38400068879128, - "p95": 101.15199908614159, - "p99": 105.11999949812889 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 49.60000142455101, - "p90": 56.8000003695488, - "p95": 57.08799883723259, - "p99": 59.167999774217606 - }, - "combine": { - "p50": 51.00800096988678, - "p90": 52.86400020122528, - "p95": 53.0879981815815, - "p99": 53.98400127887726 - }, - "roundtrip": { - "p50": 75.39200037717819, - "p90": 83.26400071382523, - "p95": 83.74399691820145, - "p99": 84.63999629020691 - }, - "isolatedSum": { - "p50": 100.60800239443779, - "p90": 109.66400057077408, - "p95": 110.17599701881409, - "p99": 113.15200105309486 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 64.92800265550613, - "p90": 67.45599955320358, - "p95": 72.41600006818771, - "p99": 74.0479975938797 - }, - "combine": { - "p50": 61.055999249219894, - "p90": 63.1679967045784, - "p95": 68.54400038719177, - "p99": 77.18399912118912 - }, - "roundtrip": { - "p50": 105.76000064611435, - "p90": 108.67200046777725, - "p95": 109.18399691581726, - "p99": 113.69600147008896 - }, - "isolatedSum": { - "p50": 125.98400190472603, - "p90": 130.62399625778198, - "p95": 140.9600004553795, - "p99": 151.23199671506882 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.04799956083298, - "p90": 91.71199798583984, - "p95": 92.83199906349182, - "p99": 94.62399780750275 - }, - "combine": { - "p50": 94.36800330877304, - "p90": 96.79999947547913, - "p95": 97.82399982213974, - "p99": 218.78400444984436 - }, - "roundtrip": { - "p50": 152.8960019350052, - "p90": 158.91200304031372, - "p95": 159.67999398708344, - "p99": 163.2000058889389 - }, - "isolatedSum": { - "p50": 180.41600286960602, - "p90": 188.51199746131897, - "p95": 190.65599888563156, - "p99": 313.4080022573471 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fecf5035", - "identity": "h100|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_45e1ef29", - "comparisonKey": "b17b52153b29fbde", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:28.951078+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271590306", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271590306", - "createdAt": "2026-06-26T23:48:28.951078+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 42.17600077390671, - "p90": 48.928000032901764, - "p95": 49.8879998922348, - "p99": 51.77599936723709 - }, - "combine": { - "p50": 36.99199855327606, - "p90": 38.176000118255615, - "p95": 38.40000182390213, - "p99": 44.03200000524521 - }, - "roundtrip": { - "p50": 59.42400172352791, - "p90": 61.216000467538834, - "p95": 61.63199990987778, - "p99": 69.31199878454208 - }, - "isolatedSum": { - "p50": 79.16799932718277, - "p90": 87.10400015115738, - "p95": 88.28800171613693, - "p99": 95.8079993724823 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.30400174856186, - "p90": 49.375999718904495, - "p95": 49.95200037956238, - "p99": 51.80799961090088 - }, - "combine": { - "p50": 38.11199963092804, - "p90": 39.0079990029335, - "p95": 39.84000161290169, - "p99": 45.9199994802475 - }, - "roundtrip": { - "p50": 60.47999858856201, - "p90": 61.69600039720535, - "p95": 63.90400230884552, - "p99": 69.21599805355072 - }, - "isolatedSum": { - "p50": 80.4160013794899, - "p90": 88.383998721838, - "p95": 89.79200199246407, - "p99": 97.72799909114838 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.367998510599136, - "p90": 49.855999648571014, - "p95": 50.20799860358238, - "p99": 57.95200169086456 - }, - "combine": { - "p50": 37.47199848294258, - "p90": 38.7520007789135, - "p95": 39.03999924659729, - "p99": 46.30399867892265 - }, - "roundtrip": { - "p50": 59.26400050520897, - "p90": 61.983998864889145, - "p95": 63.19999694824219, - "p99": 69.50400024652481 - }, - "isolatedSum": { - "p50": 79.83999699354172, - "p90": 88.60800042748451, - "p95": 89.24799785017967, - "p99": 104.25600036978722 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.81599819660187, - "p90": 49.247998744249344, - "p95": 49.855999648571014, - "p99": 51.42400041222572 - }, - "combine": { - "p50": 37.9519984126091, - "p90": 38.784001022577286, - "p95": 40.352001786231995, - "p99": 46.39999940991402 - }, - "roundtrip": { - "p50": 60.63999980688095, - "p90": 68.35199892520905, - "p95": 68.80000233650208, - "p99": 69.88800317049026 - }, - "isolatedSum": { - "p50": 80.76799660921097, - "p90": 88.03199976682663, - "p95": 90.20800143480301, - "p99": 97.82399982213974 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 43.74400153756142, - "p90": 50.23999884724617, - "p95": 50.84799975156784, - "p99": 57.18399956822395 - }, - "combine": { - "p50": 38.2080003619194, - "p90": 45.791998505592346, - "p95": 46.08000069856644, - "p99": 49.056001007556915 - }, - "roundtrip": { - "p50": 66.91200286149979, - "p90": 69.15199756622314, - "p95": 69.98399645090103, - "p99": 76.7040029168129 - }, - "isolatedSum": { - "p50": 81.95200189948082, - "p90": 96.03199735283852, - "p95": 96.92800045013428, - "p99": 106.24000057578087 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 50.464000552892685, - "p90": 52.352000027894974, - "p95": 57.023998349905014, - "p99": 59.90400165319443 - }, - "combine": { - "p50": 46.68800160288811, - "p90": 48.128001391887665, - "p95": 49.056001007556915, - "p99": 54.84800040721893 - }, - "roundtrip": { - "p50": 76.76800340414047, - "p90": 84.44800227880478, - "p95": 85.21600067615509, - "p99": 86.30400151014328 - }, - "isolatedSum": { - "p50": 97.15200215578079, - "p90": 100.48000141978264, - "p95": 106.07999935746193, - "p99": 114.75200206041336 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 61.69600039720535, - "p90": 66.6240006685257, - "p95": 67.55200028419495, - "p99": 73.7600028514862 - }, - "combine": { - "p50": 62.17600032687187, - "p90": 63.551999628543854, - "p95": 64.06400352716446, - "p99": 70.49600034952164 - }, - "roundtrip": { - "p50": 102.11200267076492, - "p90": 109.8560020327568, - "p95": 110.27199774980545, - "p99": 111.39199882745743 - }, - "isolatedSum": { - "p50": 123.87200072407722, - "p90": 130.17600029706955, - "p95": 131.6160038113594, - "p99": 144.25600320100784 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 85.56800335645676, - "p90": 89.50400352478027, - "p95": 90.14400094747543, - "p99": 95.45599669218063 - }, - "combine": { - "p50": 91.45600348711014, - "p90": 99.16800260543823, - "p95": 99.80800002813339, - "p99": 101.05600208044052 - }, - "roundtrip": { - "p50": 158.52800011634827, - "p90": 164.60800170898438, - "p95": 166.52800142765045, - "p99": 168.38400065898895 - }, - "isolatedSum": { - "p50": 177.0240068435669, - "p90": 188.6720061302185, - "p95": 189.95200097560883, - "p99": 196.51199877262115 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f1655975", - "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_81ce2214", - "comparisonKey": "16f06985ac4d7bde", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:24.570568+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 LL (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254350430", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254350430", - "createdAt": "2026-06-26T17:31:24.570568+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 42.49599948525429, - "p90": 50.27199909090996, - "p95": 50.87999999523163, - "p99": 57.920001447200775 - }, - "combine": { - "p50": 37.98399865627289, - "p90": 39.135999977588654, - "p95": 45.3759990632534, - "p99": 46.911999583244324 - }, - "roundtrip": { - "p50": 60.83200126886368, - "p90": 62.272001057863235, - "p95": 67.90400296449661, - "p99": 69.88800317049026 - }, - "isolatedSum": { - "p50": 80.47999814152718, - "p90": 89.40799906849861, - "p95": 96.25599905848503, - "p99": 104.8320010304451 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 49.02400076389313, - "p90": 50.40000006556511, - "p95": 50.87999999523163, - "p99": 57.11999908089638 - }, - "combine": { - "p50": 38.2080003619194, - "p90": 38.84800150990486, - "p95": 39.64800015091896, - "p99": 45.85599899291992 - }, - "roundtrip": { - "p50": 61.216000467538834, - "p90": 67.84000247716904, - "p95": 68.9919963479042, - "p99": 69.88800317049026 - }, - "isolatedSum": { - "p50": 87.23200112581253, - "p90": 89.24800157546997, - "p95": 90.52800014615059, - "p99": 102.9759980738163 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.75200143456459, - "p90": 50.04800111055374, - "p95": 50.52800104022026, - "p99": 57.88800120353699 - }, - "combine": { - "p50": 37.9519984126091, - "p90": 38.84800150990486, - "p95": 40.44799879193306, - "p99": 46.52800038456917 - }, - "roundtrip": { - "p50": 60.736000537872314, - "p90": 62.431998550891876, - "p95": 67.9360032081604, - "p99": 70.0799971818924 - }, - "isolatedSum": { - "p50": 80.70399984717369, - "p90": 88.8960026204586, - "p95": 90.97599983215332, - "p99": 104.41600158810616 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 49.12000149488449, - "p90": 50.36799982190132, - "p95": 50.783999264240265, - "p99": 56.44800141453743 - }, - "combine": { - "p50": 38.2080003619194, - "p90": 39.8080013692379, - "p95": 44.89599913358688, - "p99": 46.23999819159508 - }, - "roundtrip": { - "p50": 61.08799949288368, - "p90": 68.54400038719177, - "p95": 69.023996591568, - "p99": 70.01599669456482 - }, - "isolatedSum": { - "p50": 87.3280018568039, - "p90": 90.17600119113922, - "p95": 95.67999839782715, - "p99": 102.68799960613251 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 49.536000937223434, - "p90": 50.783999264240265, - "p95": 52.73599922657013, - "p99": 58.079998940229416 - }, - "combine": { - "p50": 45.24800181388855, - "p90": 46.431999653577805, - "p95": 46.68800160288811, - "p99": 48.48000034689903 - }, - "roundtrip": { - "p50": 68.67200136184692, - "p90": 70.30399888753891, - "p95": 75.42400062084198, - "p99": 77.504001557827 - }, - "isolatedSum": { - "p50": 94.78400275111198, - "p90": 97.21599891781807, - "p95": 99.42400082945824, - "p99": 106.55999928712845 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 50.52800104022026, - "p90": 57.5999990105629, - "p95": 58.079998940229416, - "p99": 58.97599831223488 - }, - "combine": { - "p50": 46.592000871896744, - "p90": 53.568001836538315, - "p95": 54.207999259233475, - "p99": 55.10399863123894 - }, - "roundtrip": { - "p50": 77.56800204515457, - "p90": 85.34400165081024, - "p95": 85.79199761152267, - "p99": 86.496002972126 - }, - "isolatedSum": { - "p50": 97.120001912117, - "p90": 111.16800084710121, - "p95": 112.28799819946289, - "p99": 114.07999694347382 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 66.01600348949432, - "p90": 107.35999792814255, - "p95": 108.06400328874588, - "p99": 109.40799862146378 - }, - "combine": { - "p50": 62.52799928188324, - "p90": 63.93600255250931, - "p95": 65.85600227117538, - "p99": 79.29600030183792 - }, - "roundtrip": { - "p50": 102.39999741315842, - "p90": 110.1439967751503, - "p95": 110.68800091743469, - "p99": 112.89600282907486 - }, - "isolatedSum": { - "p50": 128.54400277137756, - "p90": 171.29600048065186, - "p95": 173.92000555992126, - "p99": 188.7039989233017 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 87.2960016131401, - "p90": 90.91199934482574, - "p95": 94.08000111579895, - "p99": 95.51999717950821 - }, - "combine": { - "p50": 88.86399865150452, - "p90": 95.64799815416336, - "p95": 96.3520035147667, - "p99": 97.43999689817429 - }, - "roundtrip": { - "p50": 153.21600437164307, - "p90": 159.90400314331055, - "p95": 160.67199409008026, - "p99": 161.95200383663177 - }, - "isolatedSum": { - "p50": 176.16000026464462, - "p90": 186.5599974989891, - "p95": 190.43200463056564, - "p99": 192.9599940776825 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-075b23a8", - "identity": "h100|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h100_a96c99f3", - "comparisonKey": "b300aeac7d2a6068", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:15:32.751842+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287505969", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287505969", - "createdAt": "2026-06-27T11:15:32.751842+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 93.34400296211243, - "p90": 97.15200215578079, - "p95": 99.2640033364296, - "p99": 102.20800340175629 - }, - "combine": { - "p50": 60.15999987721443, - "p90": 61.63199990987778, - "p95": 63.07200342416763, - "p99": 68.25599819421768 - }, - "roundtrip": { - "p50": 174.97600615024567, - "p90": 179.55200374126434, - "p95": 182.40000307559967, - "p99": 185.5359971523285 - }, - "isolatedSum": { - "p50": 153.50400283932686, - "p90": 158.78400206565857, - "p95": 162.33600676059723, - "p99": 170.46400159597397 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.3359991312027, - "p90": 95.0080007314682, - "p95": 98.11200201511383, - "p99": 103.4879982471466 - }, - "combine": { - "p50": 53.18399891257286, - "p90": 61.11999973654747, - "p95": 61.69600039720535, - "p99": 64.19199705123901 - }, - "roundtrip": { - "p50": 145.4080045223236, - "p90": 176.70400440692902, - "p95": 179.26399409770966, - "p99": 185.44000387191772 - }, - "isolatedSum": { - "p50": 123.51999804377556, - "p90": 156.12800046801567, - "p95": 159.80800241231918, - "p99": 167.67999529838562 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.68800246715546, - "p90": 98.04800152778625, - "p95": 100.0640019774437, - "p99": 110.97600311040878 - }, - "combine": { - "p50": 52.191998809576035, - "p90": 62.431998550891876, - "p95": 63.1679967045784, - "p99": 67.52000004053116 - }, - "roundtrip": { - "p50": 145.9520012140274, - "p90": 179.77599799633026, - "p95": 183.07200074195862, - "p99": 188.06399405002594 - }, - "isolatedSum": { - "p50": 126.88000127673149, - "p90": 160.48000007867813, - "p95": 163.2319986820221, - "p99": 178.49600315093994 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 70.23999840021133, - "p90": 96.12800180912018, - "p95": 98.43199700117111, - "p99": 103.42399775981903 - }, - "combine": { - "p50": 53.75999957323074, - "p90": 62.752000987529755, - "p95": 63.87200206518173, - "p99": 71.87200337648392 - }, - "roundtrip": { - "p50": 146.2399959564209, - "p90": 179.83999848365784, - "p95": 182.81599879264832, - "p99": 186.71999871730804 - }, - "isolatedSum": { - "p50": 123.99999797344208, - "p90": 158.88000279664993, - "p95": 162.30399906635284, - "p99": 175.29600113630295 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 93.85599941015244, - "p90": 98.14400225877762, - "p95": 100.832000374794, - "p99": 104.60799932479858 - }, - "combine": { - "p50": 62.144000083208084, - "p90": 63.80800157785416, - "p95": 65.08799642324448, - "p99": 69.24799829721451 - }, - "roundtrip": { - "p50": 147.2959965467453, - "p90": 180.7679980993271, - "p95": 184.86399948596954, - "p99": 189.82400000095367 - }, - "isolatedSum": { - "p50": 155.99999949336052, - "p90": 161.95200383663177, - "p95": 165.91999679803848, - "p99": 173.8559976220131 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 71.10399752855301, - "p90": 95.10400146245956, - "p95": 97.75999933481216, - "p99": 105.92000186443329 - }, - "combine": { - "p50": 57.95200169086456, - "p90": 66.84800237417221, - "p95": 67.4239993095398, - "p99": 71.74400240182877 - }, - "roundtrip": { - "p50": 150.9760022163391, - "p90": 184.25600230693817, - "p95": 188.7039989233017, - "p99": 192.80000030994415 - }, - "isolatedSum": { - "p50": 129.05599921941757, - "p90": 161.95200383663177, - "p95": 165.18399864435196, - "p99": 177.66400426626205 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 74.43200051784515, - "p90": 96.54399752616882, - "p95": 100.5759984254837, - "p99": 110.75200140476227 - }, - "combine": { - "p50": 66.17599725723267, - "p90": 75.39200037717819, - "p95": 76.22399926185608, - "p99": 80.79999685287476 - }, - "roundtrip": { - "p50": 158.75199437141418, - "p90": 192.51200556755066, - "p95": 196.19199633598328, - "p99": 201.6959935426712 - }, - "isolatedSum": { - "p50": 140.60799777507782, - "p90": 171.93599790334702, - "p95": 176.79999768733978, - "p99": 191.55199825763702 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 80.44800162315369, - "p90": 96.67199850082397, - "p95": 99.23200309276581, - "p99": 107.04000294208527 - }, - "combine": { - "p50": 78.3040001988411, - "p90": 88.79999816417694, - "p95": 89.63199704885483, - "p99": 92.83199906349182 - }, - "roundtrip": { - "p50": 173.21600019931793, - "p90": 207.519993185997, - "p95": 211.13599836826324, - "p99": 220.64000368118286 - }, - "isolatedSum": { - "p50": 158.75200182199478, - "p90": 185.47199666500092, - "p95": 188.86400014162064, - "p99": 199.8720020055771 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1bb82fc0", - "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h100_97196257", - "comparisonKey": "efcc4c7d487df84c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:08.338542+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271676478", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271676478", - "createdAt": "2026-06-26T23:51:08.338542+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 184.7359985113144, - "p90": 193.08799505233765, - "p95": 196.86399400234222, - "p99": 204.25599813461304 - }, - "combine": { - "p50": 49.79199916124344, - "p90": 51.96800082921982, - "p95": 53.79199981689453, - "p99": 56.86400085687637 - }, - "roundtrip": { - "p50": 218.9760059118271, - "p90": 226.52800381183624, - "p95": 230.0799936056137, - "p99": 235.6480062007904 - }, - "isolatedSum": { - "p50": 234.52799767255783, - "p90": 245.05599588155746, - "p95": 250.65599381923676, - "p99": 261.1199989914894 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 183.87199938297272, - "p90": 192.19200313091278, - "p95": 195.16800343990326, - "p99": 201.56799256801605 - }, - "combine": { - "p50": 50.87999999523163, - "p90": 54.17599901556969, - "p95": 55.67999929189682, - "p99": 59.328000992536545 - }, - "roundtrip": { - "p50": 220.12799978256226, - "p90": 227.87199914455414, - "p95": 230.43200373649597, - "p99": 237.31200397014618 - }, - "isolatedSum": { - "p50": 234.75199937820435, - "p90": 246.36800214648247, - "p95": 250.84800273180008, - "p99": 260.8959935605526 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 187.77599930763245, - "p90": 268.0320143699646, - "p95": 271.36000990867615, - "p99": 282.49600529670715 - }, - "combine": { - "p50": 52.44800075888634, - "p90": 63.90400230884552, - "p95": 64.86400216817856, - "p99": 69.76000219583511 - }, - "roundtrip": { - "p50": 225.3440022468567, - "p90": 308.9280128479004, - "p95": 312.48000264167786, - "p99": 320.5440044403076 - }, - "isolatedSum": { - "p50": 240.22400006651878, - "p90": 331.9360166788101, - "p95": 336.2240120768547, - "p99": 352.25600749254227 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 184.03199315071106, - "p90": 193.31200420856476, - "p95": 197.79199361801147, - "p99": 205.9839963912964 - }, - "combine": { - "p50": 51.7439991235733, - "p90": 55.296000093221664, - "p95": 57.312000542879105, - "p99": 63.19999694824219 - }, - "roundtrip": { - "p50": 220.8320051431656, - "p90": 228.7680059671402, - "p95": 231.455996632576, - "p99": 239.55200612545013 - }, - "isolatedSum": { - "p50": 235.77599227428436, - "p90": 248.60800430178642, - "p95": 255.10399416089058, - "p99": 269.1839933395386 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 187.96800076961517, - "p90": 273.24798703193665, - "p95": 286.6879999637604, - "p99": 400.06399154663086 - }, - "combine": { - "p50": 53.75999957323074, - "p90": 65.15199691057205, - "p95": 67.45599955320358, - "p99": 75.23199915885925 - }, - "roundtrip": { - "p50": 225.600004196167, - "p90": 310.8479976654053, - "p95": 322.6880133152008, - "p99": 449.7919976711273 - }, - "isolatedSum": { - "p50": 241.72800034284592, - "p90": 338.3999839425087, - "p95": 354.14399951696396, - "p99": 475.2959907054901 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 189.11999464035034, - "p90": 271.36000990867615, - "p95": 286.9440019130707, - "p99": 324.0959942340851 - }, - "combine": { - "p50": 56.44800141453743, - "p90": 68.57600063085556, - "p95": 69.11999732255936, - "p99": 73.56800138950348 - }, - "roundtrip": { - "p50": 226.27200186252594, - "p90": 234.14400219917297, - "p95": 238.68800699710846, - "p99": 254.27201390266418 - }, - "isolatedSum": { - "p50": 245.56799605488777, - "p90": 339.9360105395317, - "p95": 356.06399923563004, - "p99": 397.66399562358856 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 189.34400379657745, - "p90": 270.08000016212463, - "p95": 275.2639949321747, - "p99": 289.98398780822754 - }, - "combine": { - "p50": 64.60800021886826, - "p90": 76.89599692821503, - "p95": 78.23999971151352, - "p99": 82.2720006108284 - }, - "roundtrip": { - "p50": 238.3359968662262, - "p90": 318.015992641449, - "p95": 321.4719891548157, - "p99": 329.72800731658936 - }, - "isolatedSum": { - "p50": 253.9520040154457, - "p90": 346.97599709033966, - "p95": 353.5039946436882, - "p99": 372.25598841905594 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 192.19200313091278, - "p90": 272.15999364852905, - "p95": 275.7120132446289, - "p99": 291.29600524902344 - }, - "combine": { - "p50": 78.17599922418594, - "p90": 87.93599903583527, - "p95": 89.15200084447861, - "p99": 95.20000219345093 - }, - "roundtrip": { - "p50": 255.3279995918274, - "p90": 335.6480002403259, - "p95": 343.9359962940216, - "p99": 380.0320029258728 - }, - "isolatedSum": { - "p50": 270.3680023550987, - "p90": 360.0959926843643, - "p95": 364.8640140891075, - "p99": 386.49600744247437 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c961a187", - "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h100_97196257", - "comparisonKey": "994b6e44326c8d14", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:36.382828+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271691858", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271691858", - "createdAt": "2026-06-26T23:51:36.382828+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 196.03200256824493, - "p90": 203.48800718784332, - "p95": 207.32800662517548, - "p99": 214.9759978055954 - }, - "combine": { - "p50": 53.727999329566956, - "p90": 55.48800155520439, - "p95": 57.760000228881836, - "p99": 60.80000102519989 - }, - "roundtrip": { - "p50": 231.26399517059326, - "p90": 238.91200125217438, - "p95": 242.36799776554108, - "p99": 250.0160038471222 - }, - "isolatedSum": { - "p50": 249.7600018978119, - "p90": 258.9760087430477, - "p95": 265.0880068540573, - "p99": 275.7759988307953 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 215040, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 195.80799341201782, - "p90": 202.78400182724, - "p95": 205.1199972629547, - "p99": 212.12799847126007 - }, - "combine": { - "p50": 55.93600124120712, - "p90": 57.53599852323532, - "p95": 59.93599817156792, - "p99": 62.880001962184906 - }, - "roundtrip": { - "p50": 233.60000550746918, - "p90": 240.9600019454956, - "p95": 243.13600361347198, - "p99": 255.10400533676147 - }, - "isolatedSum": { - "p50": 251.74399465322495, - "p90": 260.3200003504753, - "p95": 265.05599543452263, - "p99": 275.008000433445 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 440320, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 200.15999674797058, - "p90": 287.48801350593567, - "p95": 290.2719974517822, - "p99": 298.17599058151245 - }, - "combine": { - "p50": 57.11999908089638, - "p90": 68.67200136184692, - "p95": 69.56800073385239, - "p99": 75.3600001335144 - }, - "roundtrip": { - "p50": 238.01599442958832, - "p90": 328.5120129585266, - "p95": 332.73598551750183, - "p99": 340.1600122451782 - }, - "isolatedSum": { - "p50": 257.27999582886696, - "p90": 356.1600148677826, - "p95": 359.8399981856346, - "p99": 373.53599071502686 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 870400, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 199.072003364563, - "p90": 282.1120023727417, - "p95": 285.8240008354187, - "p99": 292.7359938621521 - }, - "combine": { - "p50": 57.5999990105629, - "p90": 66.14399701356888, - "p95": 66.72000139951706, - "p99": 71.48800045251846 - }, - "roundtrip": { - "p50": 236.32000386714935, - "p90": 315.3280019760132, - "p95": 318.91199946403503, - "p99": 326.2079954147339 - }, - "isolatedSum": { - "p50": 256.6720023751259, - "p90": 348.2559993863106, - "p95": 352.54400223493576, - "p99": 364.22399431467056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1735680, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 199.71199333667755, - "p90": 288.86398673057556, - "p95": 291.23198986053467, - "p99": 296.4160144329071 - }, - "combine": { - "p50": 58.62399935722351, - "p90": 70.14399766921997, - "p95": 71.03999704122543, - "p99": 74.11199808120728 - }, - "roundtrip": { - "p50": 239.19999599456787, - "p90": 329.75998520851135, - "p95": 332.5439989566803, - "p99": 338.3359909057617 - }, - "isolatedSum": { - "p50": 258.33599269390106, - "p90": 359.00798439979553, - "p95": 362.2719869017601, - "p99": 370.5280125141144 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3456000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 200.3519982099533, - "p90": 288.2559895515442, - "p95": 290.49599170684814, - "p99": 295.1360046863556 - }, - "combine": { - "p50": 63.040003180503845, - "p90": 73.44000041484833, - "p95": 73.95199686288834, - "p99": 79.45600152015686 - }, - "roundtrip": { - "p50": 244.25600469112396, - "p90": 330.7200074195862, - "p95": 333.24798941612244, - "p99": 339.35999870300293 - }, - "isolatedSum": { - "p50": 263.39200139045715, - "p90": 361.6959899663925, - "p95": 364.4479885697365, - "p99": 374.59200620651245 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6988800, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 199.5519995689392, - "p90": 287.55199909210205, - "p95": 291.6480004787445, - "p99": 305.5360019207001 - }, - "combine": { - "p50": 73.34399968385696, - "p90": 85.02399921417236, - "p95": 86.5280032157898, - "p99": 89.72799777984619 - }, - "roundtrip": { - "p50": 254.72000241279602, - "p90": 339.83999490737915, - "p95": 342.97600388526917, - "p99": 349.5680093765259 - }, - "isolatedSum": { - "p50": 272.8959992527962, - "p90": 372.5759983062744, - "p95": 378.1760036945343, - "p99": 395.26399970054626 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13987840, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 206.33600652217865, - "p90": 288.32000494003296, - "p95": 292.4480140209198, - "p99": 296.671986579895 - }, - "combine": { - "p50": 86.87999844551086, - "p90": 100.19200295209885, - "p95": 104.63999956846237, - "p99": 326.24000310897827 - }, - "roundtrip": { - "p50": 274.944007396698, - "p90": 355.0719916820526, - "p95": 358.8480055332184, - "p99": 364.8959994316101 - }, - "isolatedSum": { - "p50": 293.2160049676895, - "p90": 388.5120078921318, - "p95": 397.0880135893822, - "p99": 622.9119896888733 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0c56b994", - "identity": "h100|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_a96c99f3", - "comparisonKey": "b1bf09d425749f09", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:21.071476+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287494014", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287494014", - "createdAt": "2026-06-27T11:13:21.071476+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 98.68799895048141, - "p90": 103.26399654150009, - "p95": 105.27999699115753, - "p99": 110.11199653148651 - }, - "combine": { - "p50": 69.24799829721451, - "p90": 71.16799801588058, - "p95": 72.51200079917908, - "p99": 74.97599720954895 - }, - "roundtrip": { - "p50": 197.40800559520721, - "p90": 202.4639993906021, - "p95": 204.96000349521637, - "p99": 210.87999641895294 - }, - "isolatedSum": { - "p50": 167.93599724769592, - "p90": 174.43199455738068, - "p95": 177.7919977903366, - "p99": 185.08799374103546 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.23199850320816, - "p90": 100.73599964380264, - "p95": 103.26399654150009, - "p99": 108.83200168609619 - }, - "combine": { - "p50": 58.27200040221214, - "p90": 69.95200365781784, - "p95": 71.68000191450119, - "p99": 75.45600086450577 - }, - "roundtrip": { - "p50": 151.96800231933594, - "p90": 197.24799692630768, - "p95": 199.71199333667755, - "p99": 207.93600380420685 - }, - "isolatedSum": { - "p50": 129.5039989054203, - "p90": 170.68800330162048, - "p95": 174.94399845600128, - "p99": 184.28800255060196 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 71.84000313282013, - "p90": 94.87999975681305, - "p95": 98.49599748849869, - "p99": 103.93600165843964 - }, - "combine": { - "p50": 60.447998344898224, - "p90": 67.1359971165657, - "p95": 68.64000111818314, - "p99": 72.95999675989151 - }, - "roundtrip": { - "p50": 154.40000593662262, - "p90": 196.31999731063843, - "p95": 197.79199361801147, - "p99": 202.2400051355362 - }, - "isolatedSum": { - "p50": 132.28800147771835, - "p90": 162.01599687337875, - "p95": 167.13599860668182, - "p99": 176.89599841833115 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 93.88799965381622, - "p90": 101.02400183677673, - "p95": 103.42399775981903, - "p99": 116.12799763679504 - }, - "combine": { - "p50": 66.3359984755516, - "p90": 71.48800045251846, - "p95": 73.02399724721909, - "p99": 77.31200009584427 - }, - "roundtrip": { - "p50": 193.6960071325302, - "p90": 200.00000298023224, - "p95": 202.5279998779297, - "p99": 206.56000077724457 - }, - "isolatedSum": { - "p50": 160.22399812936783, - "p90": 172.5120022892952, - "p95": 176.44799500703812, - "p99": 193.4399977326393 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 92.96000003814697, - "p90": 100.99200159311295, - "p95": 102.78400033712387, - "p99": 106.78400099277496 - }, - "combine": { - "p50": 67.52000004053116, - "p90": 72.9919970035553, - "p95": 74.30399954319, - "p99": 78.59200239181519 - }, - "roundtrip": { - "p50": 196.76800072193146, - "p90": 203.0400037765503, - "p95": 205.1199972629547, - "p99": 208.8640034198761 - }, - "isolatedSum": { - "p50": 160.48000007867813, - "p90": 173.98399859666824, - "p95": 177.08799988031387, - "p99": 185.37600338459015 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 72.92799651622772, - "p90": 95.48799693584442, - "p95": 99.20000284910202, - "p99": 104.8320010304451 - }, - "combine": { - "p50": 66.78400188684464, - "p90": 73.37599992752075, - "p95": 74.75200295448303, - "p99": 78.17599922418594 - }, - "roundtrip": { - "p50": 160.51200032234192, - "p90": 202.07999646663666, - "p95": 204.79999482631683, - "p99": 209.60000157356262 - }, - "isolatedSum": { - "p50": 139.71199840307236, - "p90": 168.86399686336517, - "p95": 173.95200580358505, - "p99": 183.00800025463104 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 96.09600156545639, - "p90": 101.72799974679947, - "p95": 107.4879989027977, - "p99": 478.08000445365906 - }, - "combine": { - "p50": 82.07999914884567, - "p90": 87.10400015115738, - "p95": 87.8399983048439, - "p99": 89.82399851083755 - }, - "roundtrip": { - "p50": 175.58400332927704, - "p90": 211.96800470352173, - "p95": 215.03999829292297, - "p99": 219.9999988079071 - }, - "isolatedSum": { - "p50": 178.17600071430206, - "p90": 188.83199989795685, - "p95": 195.3279972076416, - "p99": 567.9040029644966 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 85.82399785518646, - "p90": 103.10400277376175, - "p95": 106.11200332641602, - "p99": 116.60800129175186 - }, - "combine": { - "p50": 91.45600348711014, - "p90": 99.35999661684036, - "p95": 102.62399911880493, - "p99": 148.3200043439865 - }, - "roundtrip": { - "p50": 200.6720006465912, - "p90": 229.18400168418884, - "p95": 231.64799809455872, - "p99": 236.86400055885315 - }, - "isolatedSum": { - "p50": 177.2800013422966, - "p90": 202.4639993906021, - "p95": 208.73600244522095, - "p99": 264.9280056357384 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-55a4c230", - "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_97196257", - "comparisonKey": "8ab5124e24ec36ab", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:02.860609+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271706435", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271706435", - "createdAt": "2026-06-26T23:52:02.860609+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 196.8960016965866, - "p90": 227.77600586414337, - "p95": 297.40801453590393, - "p99": 503.32802534103394 - }, - "combine": { - "p50": 57.920001447200775, - "p90": 62.144000083208084, - "p95": 67.10399687290192, - "p99": 282.0799946784973 - }, - "roundtrip": { - "p50": 237.40799725055695, - "p90": 243.77599358558655, - "p95": 245.31200528144836, - "p99": 250.0160038471222 - }, - "isolatedSum": { - "p50": 254.81600314378738, - "p90": 289.92000594735146, - "p95": 364.51201140880585, - "p99": 785.4080200195312 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 197.1839964389801, - "p90": 204.92799580097198, - "p95": 207.45599269866943, - "p99": 214.6880030632019 - }, - "combine": { - "p50": 58.49599838256836, - "p90": 60.92799827456474, - "p95": 63.26399743556976, - "p99": 70.65600156784058 - }, - "roundtrip": { - "p50": 237.56800591945648, - "p90": 243.96799504756927, - "p95": 247.29600548744202, - "p99": 255.61600923538208 - }, - "isolatedSum": { - "p50": 255.67999482154846, - "p90": 265.8559940755367, - "p95": 270.7199901342392, - "p99": 285.3440046310425 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 200.47999918460846, - "p90": 282.71999955177307, - "p95": 291.20001196861267, - "p99": 401.2480080127716 - }, - "combine": { - "p50": 59.90400165319443, - "p90": 66.84800237417221, - "p95": 69.5360004901886, - "p99": 75.68000257015228 - }, - "roundtrip": { - "p50": 243.20000410079956, - "p90": 321.9839930534363, - "p95": 326.7199993133545, - "p99": 334.75199341773987 - }, - "isolatedSum": { - "p50": 260.3840008378029, - "p90": 349.5680019259453, - "p95": 360.73601245880127, - "p99": 476.9280105829239 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 200.6399929523468, - "p90": 261.9200050830841, - "p95": 265.6959891319275, - "p99": 275.1680016517639 - }, - "combine": { - "p50": 60.99199876189232, - "p90": 69.2799985408783, - "p95": 69.88800317049026, - "p99": 75.32799988985062 - }, - "roundtrip": { - "p50": 239.9040013551712, - "p90": 296.9599962234497, - "p95": 299.8400032520294, - "p99": 307.5200021266937 - }, - "isolatedSum": { - "p50": 261.6319917142391, - "p90": 331.2000036239624, - "p95": 335.58399230241776, - "p99": 350.49600154161453 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 201.75999402999878, - "p90": 280.3199887275696, - "p95": 284.89598631858826, - "p99": 351.48799419403076 - }, - "combine": { - "p50": 61.76000088453293, - "p90": 69.72800195217133, - "p95": 72.92799651622772, - "p99": 133.82400572299957 - }, - "roundtrip": { - "p50": 245.82399427890778, - "p90": 325.53601264953613, - "p95": 328.8959860801697, - "p99": 600.3199815750122 - }, - "isolatedSum": { - "p50": 263.5199949145317, - "p90": 350.0479906797409, - "p95": 357.823982834816, - "p99": 485.31199991703033 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 200.73600113391876, - "p90": 285.0559949874878, - "p95": 287.9680097103119, - "p99": 303.42400074005127 - }, - "combine": { - "p50": 66.78400188684464, - "p90": 78.20799946784973, - "p95": 79.93599772453308, - "p99": 83.8719978928566 - }, - "roundtrip": { - "p50": 249.9839961528778, - "p90": 319.487988948822, - "p95": 328.8959860801697, - "p99": 336.35199069976807 - }, - "isolatedSum": { - "p50": 267.5200030207634, - "p90": 363.2639944553375, - "p95": 367.90400743484497, - "p99": 387.29599863290787 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 200.73600113391876, - "p90": 281.2480032444, - "p95": 289.11998867988586, - "p99": 304.9919903278351 - }, - "combine": { - "p50": 77.11999863386154, - "p90": 84.1279998421669, - "p95": 86.40000224113464, - "p99": 95.77599912881851 - }, - "roundtrip": { - "p50": 259.5840096473694, - "p90": 337.8559947013855, - "p95": 341.3439989089966, - "p99": 350.5280017852783 - }, - "isolatedSum": { - "p50": 277.8559997677803, - "p90": 365.3760030865669, - "p95": 375.5199909210205, - "p99": 400.7679894566536 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 212.5760018825531, - "p90": 282.1759879589081, - "p95": 286.5920066833496, - "p99": 307.96799063682556 - }, - "combine": { - "p50": 92.06400066614151, - "p90": 98.11200201511383, - "p95": 99.48799759149551, - "p99": 103.74400019645691 - }, - "roundtrip": { - "p50": 289.44000601768494, - "p90": 355.3279936313629, - "p95": 359.71200466156006, - "p99": 366.91200733184814 - }, - "isolatedSum": { - "p50": 304.6400025486946, - "p90": 380.2879899740219, - "p95": 386.0800042748451, - "p99": 411.71199083328247 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-416fcf7d", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_a96c99f3", - "comparisonKey": "59d5014bb7031dbe", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:13:04.882575+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286086353", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286086353", - "createdAt": "2026-06-27T10:13:04.882575+00:00", - "sha": "76a3032d20288ee17220eb6099346f74d56ce005" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 100.03200173377991, - "p90": 104.44799810647964, - "p95": 106.30399733781815, - "p99": 110.59200018644333 - }, - "combine": { - "p50": 74.65600222349167, - "p90": 76.38400048017502, - "p95": 77.69600301980972, - "p99": 81.7599967122078 - }, - "roundtrip": { - "p50": 195.64799964427948, - "p90": 208.3200067281723, - "p95": 210.65600216388702, - "p99": 216.15999937057495 - }, - "isolatedSum": { - "p50": 174.68800395727158, - "p90": 180.83199858665466, - "p95": 184.00000035762787, - "p99": 192.35199689865112 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.74400240182877, - "p90": 101.08800232410431, - "p95": 102.62399911880493, - "p99": 109.15199667215347 - }, - "combine": { - "p50": 64.19199705123901, - "p90": 74.43200051784515, - "p95": 75.00799745321274, - "p99": 78.62400263547897 - }, - "roundtrip": { - "p50": 158.59200060367584, - "p90": 206.81600272655487, - "p95": 209.9519968032837, - "p99": 367.71199107170105 - }, - "isolatedSum": { - "p50": 135.93599945306778, - "p90": 175.52000284194946, - "p95": 177.63199657201767, - "p99": 187.77599930763245 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 71.77600264549255, - "p90": 102.78400033712387, - "p95": 104.76800054311752, - "p99": 109.63200032711029 - }, - "combine": { - "p50": 65.8240020275116, - "p90": 77.85599678754807, - "p95": 78.5600021481514, - "p99": 81.82399719953537 - }, - "roundtrip": { - "p50": 159.71200168132782, - "p90": 209.98400449752808, - "p95": 212.09600567817688, - "p99": 216.92800521850586 - }, - "isolatedSum": { - "p50": 137.60000467300415, - "p90": 180.63999712467194, - "p95": 183.32800269126892, - "p99": 191.45599752664566 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.79199957847595, - "p90": 103.61599922180176, - "p95": 106.175996363163, - "p99": 111.90400272607803 - }, - "combine": { - "p50": 75.71200281381607, - "p90": 77.98399776220322, - "p95": 79.77599650621414, - "p99": 83.64800363779068 - }, - "roundtrip": { - "p50": 195.71200013160706, - "p90": 209.6640020608902, - "p95": 211.96800470352173, - "p99": 217.8879976272583 - }, - "isolatedSum": { - "p50": 173.50400239229202, - "p90": 181.59999698400497, - "p95": 185.95199286937714, - "p99": 195.5520063638687 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 97.9200005531311, - "p90": 102.91200131177902, - "p95": 105.34399747848511, - "p99": 110.04800349473953 - }, - "combine": { - "p50": 77.31200009584427, - "p90": 80.79999685287476, - "p95": 81.98399841785431, - "p99": 87.00799942016602 - }, - "roundtrip": { - "p50": 197.02400267124176, - "p90": 212.3199999332428, - "p95": 214.36800062656403, - "p99": 219.200000166893 - }, - "isolatedSum": { - "p50": 175.23200064897537, - "p90": 183.71199816465378, - "p95": 187.32799589633942, - "p99": 197.05600291490555 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 73.98399710655212, - "p90": 102.55999863147736, - "p95": 105.02400249242783, - "p99": 107.87200182676315 - }, - "combine": { - "p50": 73.21599870920181, - "p90": 85.56800335645676, - "p95": 86.46400272846222, - "p99": 90.33600240945816 - }, - "roundtrip": { - "p50": 168.03200542926788, - "p90": 216.73600375652313, - "p95": 218.36799383163452, - "p99": 223.1999933719635 - }, - "isolatedSum": { - "p50": 147.19999581575394, - "p90": 188.1280019879341, - "p95": 191.48800522089005, - "p99": 198.2080042362213 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 97.98400104045868, - "p90": 142.752006649971, - "p95": 145.82400023937225, - "p99": 154.27200496196747 - }, - "combine": { - "p50": 92.19200164079666, - "p90": 112.96000331640244, - "p95": 113.82400244474411, - "p99": 118.07999759912491 - }, - "roundtrip": { - "p50": 179.77599799633026, - "p90": 277.3439884185791, - "p95": 285.535991191864, - "p99": 456.64000511169434 - }, - "isolatedSum": { - "p50": 190.17600268125534, - "p90": 255.71200996637344, - "p95": 259.64800268411636, - "p99": 272.3520025610924 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 90.43200314044952, - "p90": 111.42399907112122, - "p95": 113.24799805879593, - "p99": 117.40799993276596 - }, - "combine": { - "p50": 100.5759984254837, - "p90": 112.47999966144562, - "p95": 114.01599645614624, - "p99": 117.53600090742111 - }, - "roundtrip": { - "p50": 219.7120040655136, - "p90": 246.87999486923218, - "p95": 249.2160052061081, - "p99": 254.07999753952026 - }, - "isolatedSum": { - "p50": 191.00800156593323, - "p90": 223.90399873256683, - "p95": 227.26399451494217, - "p99": 234.94400084018707 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-d4dbb29d", - "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_97196257", - "comparisonKey": "9687217877b9ce9c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:10.138934+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271579958", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271579958", - "createdAt": "2026-06-26T23:48:10.138934+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 193.05600225925446, - "p90": 204.3839991092682, - "p95": 210.52800118923187, - "p99": 277.9200077056885 - }, - "combine": { - "p50": 60.95999851822853, - "p90": 63.29599767923355, - "p95": 65.31199812889099, - "p99": 68.76800209283829 - }, - "roundtrip": { - "p50": 237.63200640678406, - "p90": 244.25600469112396, - "p95": 246.14399671554565, - "p99": 269.4079875946045 - }, - "isolatedSum": { - "p50": 254.016000777483, - "p90": 267.67999678850174, - "p95": 275.83999931812286, - "p99": 346.68800979852676 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 192.9280012845993, - "p90": 200.6720006465912, - "p95": 204.79999482631683, - "p99": 264.5759880542755 - }, - "combine": { - "p50": 62.272001057863235, - "p90": 64.7680014371872, - "p95": 67.391999065876, - "p99": 73.08799773454666 - }, - "roundtrip": { - "p50": 235.6480062007904, - "p90": 243.0720031261444, - "p95": 245.60000002384186, - "p99": 259.71201062202454 - }, - "isolatedSum": { - "p50": 255.20000234246254, - "p90": 265.4400020837784, - "p95": 272.19199389219284, - "p99": 337.6639857888222 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 197.24799692630768, - "p90": 286.080002784729, - "p95": 290.71998596191406, - "p99": 302.2400140762329 - }, - "combine": { - "p50": 63.32799792289734, - "p90": 71.32799923419952, - "p95": 75.45600086450577, - "p99": 82.62400329113007 - }, - "roundtrip": { - "p50": 242.94400215148926, - "p90": 349.40800070762634, - "p95": 354.4960021972656, - "p99": 367.13600158691406 - }, - "isolatedSum": { - "p50": 260.575994849205, - "p90": 357.4080020189285, - "p95": 366.17598682641983, - "p99": 384.864017367363 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 196.383997797966, - "p90": 251.583993434906, - "p95": 254.8159956932068, - "p99": 268.15998554229736 - }, - "combine": { - "p50": 63.87200206518173, - "p90": 72.73600250482559, - "p95": 73.5040009021759, - "p99": 77.95199751853943 - }, - "roundtrip": { - "p50": 242.11199581623077, - "p90": 299.3920147418976, - "p95": 304.1599988937378, - "p99": 410.8160138130188 - }, - "isolatedSum": { - "p50": 260.25599986314774, - "p90": 324.3199959397316, - "p95": 328.3199965953827, - "p99": 346.1119830608368 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 197.63199985027313, - "p90": 288.35201263427734, - "p95": 294.048011302948, - "p99": 322.04800844192505 - }, - "combine": { - "p50": 66.46399945020676, - "p90": 79.9039974808693, - "p95": 106.33599758148193, - "p99": 204.25599813461304 - }, - "roundtrip": { - "p50": 246.62399291992188, - "p90": 330.24001121520996, - "p95": 333.5359990596771, - "p99": 341.18399024009705 - }, - "isolatedSum": { - "p50": 264.0959993004799, - "p90": 368.25601011514664, - "p95": 400.38400888442993, - "p99": 526.3040065765381 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 198.40000569820404, - "p90": 284.35200452804565, - "p95": 288.06400299072266, - "p99": 295.9040105342865 - }, - "combine": { - "p50": 70.97599655389786, - "p90": 79.96799796819687, - "p95": 80.70400357246399, - "p99": 83.52000266313553 - }, - "roundtrip": { - "p50": 250.36799907684326, - "p90": 306.5919876098633, - "p95": 310.2079927921295, - "p99": 368.8639998435974 - }, - "isolatedSum": { - "p50": 269.3760022521019, - "p90": 364.3200024962425, - "p95": 368.76800656318665, - "p99": 379.424013197422 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 198.65599274635315, - "p90": 284.8320007324219, - "p95": 289.69600796699524, - "p99": 304.4480085372925 - }, - "combine": { - "p50": 80.48000186681747, - "p90": 88.83199840784073, - "p95": 90.52799642086029, - "p99": 101.31199657917023 - }, - "roundtrip": { - "p50": 260.96001267433167, - "p90": 351.80801153182983, - "p95": 355.55198788642883, - "p99": 367.0400083065033 - }, - "isolatedSum": { - "p50": 279.1359946131706, - "p90": 373.6639991402626, - "p95": 380.22400438785553, - "p99": 405.7600051164627 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 216.8319970369339, - "p90": 312.8640055656433, - "p95": 320.73599100112915, - "p99": 336.41600608825684 - }, - "combine": { - "p50": 98.94400089979172, - "p90": 112.83200234174728, - "p95": 113.79200220108032, - "p99": 119.13599818944931 - }, - "roundtrip": { - "p50": 303.2959997653961, - "p90": 388.0000114440918, - "p95": 392.2879993915558, - "p99": 401.2480080127716 - }, - "isolatedSum": { - "p50": 315.7759979367256, - "p90": 425.6960079073906, - "p95": 434.5279932022095, - "p99": 455.55200427770615 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-52396484", - "identity": "h100|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h100_a96c99f3", - "comparisonKey": "7d245d1c48b9f399", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:15:21.281924+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287500362", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287500362", - "createdAt": "2026-06-27T11:15:21.281924+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 98.78399968147278, - "p90": 104.54399883747101, - "p95": 108.22399705648422, - "p99": 114.88000303506851 - }, - "combine": { - "p50": 71.45600020885468, - "p90": 73.34399968385696, - "p95": 74.49600100517273, - "p99": 145.88800072669983 - }, - "roundtrip": { - "p50": 201.12000405788422, - "p90": 207.2640061378479, - "p95": 210.11200547218323, - "p99": 237.59999871253967 - }, - "isolatedSum": { - "p50": 170.23999989032745, - "p90": 177.88799852132797, - "p95": 182.71999806165695, - "p99": 260.76800376176834 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 99.48799759149551, - "p90": 103.64799946546555, - "p95": 105.66399991512299, - "p99": 111.55200004577637 - }, - "combine": { - "p50": 72.95999675989151, - "p90": 74.5600014925003, - "p95": 75.99999755620956, - "p99": 78.97599786520004 - }, - "roundtrip": { - "p50": 203.19999754428864, - "p90": 207.13600516319275, - "p95": 210.1760059595108, - "p99": 213.82400393486023 - }, - "isolatedSum": { - "p50": 172.44799435138702, - "p90": 178.20800095796585, - "p95": 181.66399747133255, - "p99": 190.5279979109764 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 99.29600358009338, - "p90": 104.09600287675858, - "p95": 106.175996363163, - "p99": 110.49599945545197 - }, - "combine": { - "p50": 72.06399738788605, - "p90": 74.17599856853485, - "p95": 75.52000135183334, - "p99": 79.74400371313095 - }, - "roundtrip": { - "p50": 202.72000133991241, - "p90": 207.90399610996246, - "p95": 211.0079973936081, - "p99": 221.24800086021423 - }, - "isolatedSum": { - "p50": 171.36000096797943, - "p90": 178.27200144529343, - "p95": 181.69599771499634, - "p99": 190.24000316858292 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 100.16000270843506, - "p90": 104.35199737548828, - "p95": 106.27199709415436, - "p99": 111.93600296974182 - }, - "combine": { - "p50": 73.11999797821045, - "p90": 75.16799867153168, - "p95": 76.80000364780426, - "p99": 83.20000022649765 - }, - "roundtrip": { - "p50": 203.42400670051575, - "p90": 208.12800526618958, - "p95": 210.78400313854218, - "p99": 215.29600024223328 - }, - "isolatedSum": { - "p50": 173.2800006866455, - "p90": 179.51999604701996, - "p95": 183.07200074195862, - "p99": 195.13600319623947 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 98.88000041246414, - "p90": 103.58399897813797, - "p95": 106.27199709415436, - "p99": 112.22399771213531 - }, - "combine": { - "p50": 75.93599706888199, - "p90": 78.3040001988411, - "p95": 80.60800284147263, - "p99": 82.91199803352356 - }, - "roundtrip": { - "p50": 205.72799444198608, - "p90": 210.01599729061127, - "p95": 212.6079946756363, - "p99": 216.89599752426147 - }, - "isolatedSum": { - "p50": 174.81599748134613, - "p90": 181.88799917697906, - "p95": 186.87999993562698, - "p99": 195.13599574565887 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 99.93600100278854, - "p90": 142.71999895572662, - "p95": 161.5999937057495, - "p99": 181.11999332904816 - }, - "combine": { - "p50": 82.07999914884567, - "p90": 102.01600193977356, - "p95": 109.40799862146378, - "p99": 114.52800035476685 - }, - "roundtrip": { - "p50": 211.64800226688385, - "p90": 216.35200083255768, - "p95": 218.23999285697937, - "p99": 223.32799434661865 - }, - "isolatedSum": { - "p50": 182.01600015163422, - "p90": 244.73600089550018, - "p95": 271.0079923272133, - "p99": 295.647993683815 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 101.40799731016159, - "p90": 206.81600272655487, - "p95": 216.86400473117828, - "p99": 370.88000774383545 - }, - "combine": { - "p50": 91.16800129413605, - "p90": 95.29600292444229, - "p95": 99.5199978351593, - "p99": 122.40000069141388 - }, - "roundtrip": { - "p50": 221.37600183486938, - "p90": 226.43199563026428, - "p95": 228.7680059671402, - "p99": 233.34400355815887 - }, - "isolatedSum": { - "p50": 192.57599860429764, - "p90": 302.11200565099716, - "p95": 316.3840025663376, - "p99": 493.28000843524933 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 106.23999685049057, - "p90": 109.8880022764206, - "p95": 112.5440001487732, - "p99": 117.5680011510849 - }, - "combine": { - "p50": 107.77600109577179, - "p90": 110.20799726247787, - "p95": 111.48799955844879, - "p99": 114.56000059843063 - }, - "roundtrip": { - "p50": 240.35200476646423, - "p90": 247.1040040254593, - "p95": 249.82400238513947, - "p99": 295.80798745155334 - }, - "isolatedSum": { - "p50": 214.01599794626236, - "p90": 220.09599953889847, - "p95": 224.03199970722198, - "p99": 232.12800174951553 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8e5c4d34", - "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h100_97196257", - "comparisonKey": "969c3964291e1270", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:43.012530+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271660154", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271660154", - "createdAt": "2026-06-26T23:50:43.012530+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 198.7520009279251, - "p90": 206.2399983406067, - "p95": 209.56799387931824, - "p99": 221.69600427150726 - }, - "combine": { - "p50": 60.83200126886368, - "p90": 64.31999802589417, - "p95": 65.98400324583054, - "p99": 69.05599683523178 - }, - "roundtrip": { - "p50": 242.71999299526215, - "p90": 250.07998943328857, - "p95": 254.5279860496521, - "p99": 290.0159955024719 - }, - "isolatedSum": { - "p50": 259.5840021967888, - "p90": 270.55999636650085, - "p95": 275.5519971251488, - "p99": 290.75200110673904 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 205.53599298000336, - "p90": 313.6320114135742, - "p95": 323.8399922847748, - "p99": 375.5840063095093 - }, - "combine": { - "p50": 62.81600147485733, - "p90": 76.1599987745285, - "p95": 79.19999957084656, - "p99": 83.0719992518425 - }, - "roundtrip": { - "p50": 242.49599874019623, - "p90": 250.43201446533203, - "p95": 253.08799743652344, - "p99": 294.1119968891144 - }, - "isolatedSum": { - "p50": 268.3519944548607, - "p90": 389.7920101881027, - "p95": 403.03999185562134, - "p99": 458.6560055613518 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 203.5519927740097, - "p90": 291.55200719833374, - "p95": 296.09599709510803, - "p99": 303.6159873008728 - }, - "combine": { - "p50": 63.26399743556976, - "p90": 73.98399710655212, - "p95": 75.83999633789062, - "p99": 80.09599894285202 - }, - "roundtrip": { - "p50": 247.42400646209717, - "p90": 336.67200803756714, - "p95": 339.4559919834137, - "p99": 346.20800614356995 - }, - "isolatedSum": { - "p50": 266.81599020957947, - "p90": 365.53600430488586, - "p95": 371.93599343299866, - "p99": 383.7119862437248 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 199.45600628852844, - "p90": 207.8080028295517, - "p95": 213.02400529384613, - "p99": 235.29599606990814 - }, - "combine": { - "p50": 62.72000074386597, - "p90": 67.16799736022949, - "p95": 68.64000111818314, - "p99": 73.60000163316727 - }, - "roundtrip": { - "p50": 245.85600197315216, - "p90": 253.1839907169342, - "p95": 256.9279968738556, - "p99": 269.3119943141937 - }, - "isolatedSum": { - "p50": 262.1760070323944, - "p90": 274.9760001897812, - "p95": 281.66400641202927, - "p99": 308.8959977030754 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 204.22400534152985, - "p90": 292.60799288749695, - "p95": 296.3840067386627, - "p99": 434.30399894714355 - }, - "combine": { - "p50": 66.14399701356888, - "p90": 75.55200159549713, - "p95": 76.1599987745285, - "p99": 79.8719972372055 - }, - "roundtrip": { - "p50": 250.59199333190918, - "p90": 335.32801270484924, - "p95": 340.2239978313446, - "p99": 366.5919899940491 - }, - "isolatedSum": { - "p50": 270.3680023550987, - "p90": 368.1599944829941, - "p95": 372.5440055131912, - "p99": 514.1759961843491 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 200.99200308322906, - "p90": 286.3039970397949, - "p95": 293.3120131492615, - "p99": 305.11999130249023 - }, - "combine": { - "p50": 70.88000327348709, - "p90": 75.83999633789062, - "p95": 78.11199873685837, - "p99": 86.84799820184708 - }, - "roundtrip": { - "p50": 253.31199169158936, - "p90": 259.71201062202454, - "p95": 262.4959945678711, - "p99": 270.9439992904663 - }, - "isolatedSum": { - "p50": 271.87200635671616, - "p90": 362.14399337768555, - "p95": 371.42401188611984, - "p99": 391.9679895043373 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 204.22400534152985, - "p90": 293.8239872455597, - "p95": 299.74400997161865, - "p99": 323.4559893608093 - }, - "combine": { - "p50": 81.82399719953537, - "p90": 93.40800344944, - "p95": 96.63999825716019, - "p99": 99.64799880981445 - }, - "roundtrip": { - "p50": 268.73600482940674, - "p90": 351.6159951686859, - "p95": 354.4960021972656, - "p99": 361.6639971733093 - }, - "isolatedSum": { - "p50": 286.0480025410652, - "p90": 387.2319906949997, - "p95": 396.38400822877884, - "p99": 423.1039881706238 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 224.2240011692047, - "p90": 294.5919930934906, - "p95": 298.4960079193115, - "p99": 310.8159899711609 - }, - "combine": { - "p50": 99.90400075912476, - "p90": 110.33599823713303, - "p95": 111.35999858379364, - "p99": 114.68800157308578 - }, - "roundtrip": { - "p50": 310.88000535964966, - "p90": 375.2320110797882, - "p95": 378.04800271987915, - "p99": 386.46399974823 - }, - "isolatedSum": { - "p50": 324.12800192832947, - "p90": 404.9279913306236, - "p95": 409.85600650310516, - "p99": 425.5039915442467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4e4a7f2d", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_91aa6e56", - "comparisonKey": "511cf861d6b2e142", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:28:00.849157+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254323956", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", - "createdAt": "2026-06-26T17:28:00.849157+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.98400104045868, - "p90": 102.88000106811523, - "p95": 104.38399761915207, - "p99": 110.20799726247787 - }, - "combine": { - "p50": 72.28799909353256, - "p90": 74.14399832487106, - "p95": 75.29599964618683, - "p99": 78.65600287914276 - }, - "roundtrip": { - "p50": 190.65600633621216, - "p90": 195.90400159358978, - "p95": 198.30399751663208, - "p99": 202.72000133991241 - }, - "isolatedSum": { - "p50": 170.27200013399124, - "p90": 177.0239993929863, - "p95": 179.6799972653389, - "p99": 188.86400014162064 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.15999811887741, - "p90": 99.90400075912476, - "p95": 102.52799838781357, - "p99": 105.0880029797554 - }, - "combine": { - "p50": 63.35999816656113, - "p90": 73.18399846553802, - "p95": 73.98399710655212, - "p99": 78.46400141716003 - }, - "roundtrip": { - "p50": 153.82400155067444, - "p90": 194.43200528621674, - "p95": 196.28800451755524, - "p99": 201.05600357055664 - }, - "isolatedSum": { - "p50": 135.51999628543854, - "p90": 173.08799922466278, - "p95": 176.5119954943657, - "p99": 183.55200439691544 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.31999933719635, - "p90": 103.4879982471466, - "p95": 107.26399719715118, - "p99": 115.48800021409988 - }, - "combine": { - "p50": 64.03200328350067, - "p90": 76.28799974918365, - "p95": 77.82399654388428, - "p99": 81.98399841785431 - }, - "roundtrip": { - "p50": 156.09599649906158, - "p90": 202.36800611019135, - "p95": 205.63200116157532, - "p99": 212.51200139522552 - }, - "isolatedSum": { - "p50": 136.35200262069702, - "p90": 179.77599799633026, - "p95": 185.08799374103546, - "p99": 197.4719986319542 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 97.50399738550186, - "p90": 102.30399668216705, - "p95": 105.85600137710571, - "p99": 113.40799927711487 - }, - "combine": { - "p50": 63.80800157785416, - "p90": 74.94399696588516, - "p95": 76.28799974918365, - "p99": 80.89599758386612 - }, - "roundtrip": { - "p50": 154.6880006790161, - "p90": 194.7840005159378, - "p95": 199.0399956703186, - "p99": 203.87199521064758 - }, - "isolatedSum": { - "p50": 161.31199896335602, - "p90": 177.24799364805222, - "p95": 182.14400112628937, - "p99": 194.303996860981 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 97.08800166845322, - "p90": 104.3199971318245, - "p95": 107.39199817180634, - "p99": 113.43999952077866 - }, - "combine": { - "p50": 75.74400305747986, - "p90": 78.49600166082382, - "p95": 80.06399869918823, - "p99": 83.36000144481659 - }, - "roundtrip": { - "p50": 195.2960044145584, - "p90": 205.85599541664124, - "p95": 209.85600352287292, - "p99": 223.83999824523926 - }, - "isolatedSum": { - "p50": 172.83200472593307, - "p90": 182.81599879264832, - "p95": 187.45599687099457, - "p99": 196.80000096559525 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 73.11999797821045, - "p90": 104.16000336408615, - "p95": 106.84800148010254, - "p99": 112.09599673748016 - }, - "combine": { - "p50": 69.2799985408783, - "p90": 81.88799768686295, - "p95": 82.87999778985977, - "p99": 88.28800171613693 - }, - "roundtrip": { - "p50": 161.21600568294525, - "p90": 206.65599405765533, - "p95": 210.84800362586975, - "p99": 216.22399985790253 - }, - "isolatedSum": { - "p50": 142.39999651908875, - "p90": 186.0480010509491, - "p95": 189.7279992699623, - "p99": 200.3839984536171 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 78.65600287914276, - "p90": 106.9440022110939, - "p95": 110.55999994277954, - "p99": 125.44000148773193 - }, - "combine": { - "p50": 83.64800363779068, - "p90": 96.38399630784988, - "p95": 97.69599884748459, - "p99": 100.00000149011612 - }, - "roundtrip": { - "p50": 175.7120043039322, - "p90": 222.6880043745041, - "p95": 225.24799406528473, - "p99": 231.74400627613068 - }, - "isolatedSum": { - "p50": 162.30400651693344, - "p90": 203.3279985189438, - "p95": 208.25599879026413, - "p99": 225.44000297784805 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 90.33600240945816, - "p90": 110.84800213575363, - "p95": 113.82400244474411, - "p99": 117.11999773979187 - }, - "combine": { - "p50": 98.78399968147278, - "p90": 111.00800335407257, - "p95": 112.0000034570694, - "p99": 117.21599847078323 - }, - "roundtrip": { - "p50": 216.12800657749176, - "p90": 240.60800671577454, - "p95": 244.25600469112396, - "p99": 250.2720057964325 - }, - "isolatedSum": { - "p50": 189.12000209093094, - "p90": 221.8560054898262, - "p95": 225.8240059018135, - "p99": 234.3359962105751 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-750e874d", - "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_7f10961a", - "comparisonKey": "f145cb161a39591f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T15:23:35.919985+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "unknown", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28247584217", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247584217", - "createdAt": "2026-06-26T15:23:35.919985+00:00", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 251.93598866462708, - "p90": 260.3839933872223, - "p95": 263.10399174690247, - "p99": 268.5759961605072 - }, - "combine": { - "p50": 68.41599941253662, - "p90": 69.88800317049026, - "p95": 70.8480030298233, - "p99": 76.03199779987335 - }, - "roundtrip": { - "p50": 296.51200771331787, - "p90": 304.1279911994934, - "p95": 306.40000104904175, - "p99": 349.15199875831604 - }, - "isolatedSum": { - "p50": 320.3519880771637, - "p90": 330.27199655771255, - "p95": 333.95199477672577, - "p99": 344.60799396038055 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 200.51200687885284, - "p90": 256.8320035934448, - "p95": 259.99999046325684, - "p99": 268.0000066757202 - }, - "combine": { - "p50": 63.00800293684006, - "p90": 71.00799679756165, - "p95": 71.84000313282013, - "p99": 74.68800246715546 - }, - "roundtrip": { - "p50": 243.1039959192276, - "p90": 300.1919984817505, - "p95": 303.5840094089508, - "p99": 308.9919984340668 - }, - "isolatedSum": { - "p50": 263.5200098156929, - "p90": 327.84000039100647, - "p95": 331.83999359607697, - "p99": 342.68800914287567 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 199.13600385189056, - "p90": 287.9680097103119, - "p95": 291.1359965801239, - "p99": 298.2720136642456 - }, - "combine": { - "p50": 63.519999384880066, - "p90": 75.1039981842041, - "p95": 76.73600316047668, - "p99": 81.40800148248672 - }, - "roundtrip": { - "p50": 246.17600440979004, - "p90": 330.84800839424133, - "p95": 333.9200019836426, - "p99": 343.6479866504669 - }, - "isolatedSum": { - "p50": 262.65600323677063, - "p90": 363.072007894516, - "p95": 367.8719997406006, - "p99": 379.68001514673233 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 199.16799664497375, - "p90": 258.14399123191833, - "p95": 261.4080011844635, - "p99": 267.16798543930054 - }, - "combine": { - "p50": 63.4239986538887, - "p90": 72.57600128650665, - "p95": 73.18399846553802, - "p99": 76.28799974918365 - }, - "roundtrip": { - "p50": 244.83199417591095, - "p90": 302.3039996623993, - "p95": 305.759996175766, - "p99": 310.94399094581604 - }, - "isolatedSum": { - "p50": 262.59199529886246, - "p90": 330.719992518425, - "p95": 334.5919996500015, - "p99": 343.4559851884842 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 200.28799772262573, - "p90": 286.5599989891052, - "p95": 290.0800108909607, - "p99": 296.57599329948425 - }, - "combine": { - "p50": 65.5359998345375, - "p90": 76.86399668455124, - "p95": 77.66400277614594, - "p99": 80.76799660921097 - }, - "roundtrip": { - "p50": 248.57600033283234, - "p90": 330.4640054702759, - "p95": 333.6319923400879, - "p99": 344.7360098361969 - }, - "isolatedSum": { - "p50": 265.82399755716324, - "p90": 363.42399567365646, - "p95": 367.7440136671066, - "p99": 377.3439899086952 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 198.88000190258026, - "p90": 284.4800055027008, - "p95": 288.12798857688904, - "p99": 293.0240035057068 - }, - "combine": { - "p50": 69.18399780988693, - "p90": 80.54400235414505, - "p95": 81.4720019698143, - "p99": 84.63999629020691 - }, - "roundtrip": { - "p50": 253.12000513076782, - "p90": 334.01599526405334, - "p95": 336.89600229263306, - "p99": 340.31999111175537 - }, - "isolatedSum": { - "p50": 268.0639997124672, - "p90": 365.02400785684586, - "p95": 369.59999054670334, - "p99": 377.6639997959137 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 202.07999646663666, - "p90": 355.00800609588623, - "p95": 361.7280125617981, - "p99": 423.007994890213 - }, - "combine": { - "p50": 82.65600353479385, - "p90": 94.11200135946274, - "p95": 95.8079993724823, - "p99": 99.45599734783173 - }, - "roundtrip": { - "p50": 266.88000559806824, - "p90": 352.03200578689575, - "p95": 355.3600013256073, - "p99": 361.4720106124878 - }, - "isolatedSum": { - "p50": 284.7360000014305, - "p90": 449.12000745534897, - "p95": 457.5360119342804, - "p99": 522.4639922380447 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 221.79199755191803, - "p90": 289.72798585891724, - "p95": 293.08798909187317, - "p99": 300.9600043296814 - }, - "combine": { - "p50": 98.27200323343277, - "p90": 108.8000014424324, - "p95": 110.1439967751503, - "p99": 113.88800293207169 - }, - "roundtrip": { - "p50": 303.74398827552795, - "p90": 364.8639917373657, - "p95": 367.45598912239075, - "p99": 371.5519905090332 - }, - "isolatedSum": { - "p50": 320.0640007853508, - "p90": 398.52798730134964, - "p95": 403.23198586702347, - "p99": 414.8480072617531 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b83230a1", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_eddc3af6", - "comparisonKey": "f291497d6f9ce0d1", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:42.999710+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254341346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", - "createdAt": "2026-06-26T17:31:42.999710+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 81.34400099515915, - "p90": 84.927998483181, - "p95": 86.496002972126, - "p99": 90.14400094747543 - }, - "combine": { - "p50": 71.3919997215271, - "p90": 73.91999661922455, - "p95": 74.87999647855759, - "p99": 77.98399776220322 - }, - "roundtrip": { - "p50": 173.15199971199036, - "p90": 178.6240041255951, - "p95": 180.92800676822662, - "p99": 186.5600049495697 - }, - "isolatedSum": { - "p50": 152.73600071668625, - "p90": 158.84799510240555, - "p95": 161.3759994506836, - "p99": 168.12799870967865 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 58.49599838256836, - "p90": 82.78399705886841, - "p95": 84.3840017914772, - "p99": 90.01599997282028 - }, - "combine": { - "p50": 63.07200342416763, - "p90": 74.0479975938797, - "p95": 74.8480036854744, - "p99": 77.44000107049942 - }, - "roundtrip": { - "p50": 141.12000167369843, - "p90": 176.54399573802948, - "p95": 178.81600558757782, - "p99": 181.92000687122345 - }, - "isolatedSum": { - "p50": 121.56800180673599, - "p90": 156.8319946527481, - "p95": 159.2320054769516, - "p99": 167.4560010433197 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 59.13599953055382, - "p90": 82.68799632787704, - "p95": 85.37600189447403, - "p99": 91.61599725484848 - }, - "combine": { - "p50": 63.64800035953522, - "p90": 74.14399832487106, - "p95": 75.19999891519547, - "p99": 79.32800054550171 - }, - "roundtrip": { - "p50": 140.83200693130493, - "p90": 178.49600315093994, - "p95": 180.92800676822662, - "p99": 187.45599687099457 - }, - "isolatedSum": { - "p50": 122.78399989008904, - "p90": 156.8319946527481, - "p95": 160.5760008096695, - "p99": 170.9439978003502 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 61.792001128196716, - "p90": 83.20000022649765, - "p95": 86.07999980449677, - "p99": 96.00000083446503 - }, - "combine": { - "p50": 65.43999910354614, - "p90": 75.93599706888199, - "p95": 78.14399898052216, - "p99": 83.74399691820145 - }, - "roundtrip": { - "p50": 144.44799721240997, - "p90": 181.15200102329254, - "p95": 184.25600230693817, - "p99": 199.8080015182495 - }, - "isolatedSum": { - "p50": 127.23200023174286, - "p90": 159.13599729537964, - "p95": 164.22399878501892, - "p99": 179.74399775266647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 80.38400113582611, - "p90": 83.52000266313553, - "p95": 85.08799970149994, - "p99": 92.38400310277939 - }, - "combine": { - "p50": 75.80800354480743, - "p90": 77.85599678754807, - "p95": 79.03999835252762, - "p99": 80.83199709653854 - }, - "roundtrip": { - "p50": 150.59199929237366, - "p90": 182.49599635601044, - "p95": 184.60799753665924, - "p99": 194.815993309021 - }, - "isolatedSum": { - "p50": 156.19200468063354, - "p90": 161.3759994506836, - "p95": 164.12799805402756, - "p99": 173.21600019931793 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 61.792001128196716, - "p90": 81.727996468544, - "p95": 84.28800106048584, - "p99": 89.88799899816513 - }, - "combine": { - "p50": 69.34399902820587, - "p90": 79.96799796819687, - "p95": 81.24800026416779, - "p99": 83.99999886751175 - }, - "roundtrip": { - "p50": 146.11199498176575, - "p90": 184.32000279426575, - "p95": 186.52799725532532, - "p99": 192.44800508022308 - }, - "isolatedSum": { - "p50": 131.1360001564026, - "p90": 161.69599443674088, - "p95": 165.53600132465363, - "p99": 173.88799786567688 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 72.80000299215317, - "p90": 86.43200248479843, - "p95": 92.54399687051773, - "p99": 99.7759997844696 - }, - "combine": { - "p50": 85.08799970149994, - "p90": 95.0080007314682, - "p95": 96.41599655151367, - "p99": 101.21600329875946 - }, - "roundtrip": { - "p50": 182.8799992799759, - "p90": 202.94399559497833, - "p95": 208.3200067281723, - "p99": 218.176007270813 - }, - "isolatedSum": { - "p50": 157.8880026936531, - "p90": 181.44000321626663, - "p95": 188.9599934220314, - "p99": 200.99200308322906 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 77.79199630022049, - "p90": 92.12800115346909, - "p95": 93.72799843549728, - "p99": 98.24000298976898 - }, - "combine": { - "p50": 99.55199807882309, - "p90": 109.72800105810165, - "p95": 110.91200262308121, - "p99": 114.46399986743927 - }, - "roundtrip": { - "p50": 205.1520049571991, - "p90": 219.200000166893, - "p95": 220.89600563049316, - "p99": 223.4880030155182 - }, - "isolatedSum": { - "p50": 177.34399437904358, - "p90": 201.85600221157074, - "p95": 204.6400010585785, - "p99": 212.70400285720825 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-d8e58489", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ec72792b", - "comparisonKey": "2bfd4913feb2a935", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:47:54.320638+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271573150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271573150", - "createdAt": "2026-06-26T23:47:54.320638+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 78.3040001988411, - "p90": 82.07999914884567, - "p95": 84.44800227880478, - "p99": 88.03199976682663 - }, - "combine": { - "p50": 71.1359977722168, - "p90": 72.86400347948074, - "p95": 73.82400333881378, - "p99": 77.88799703121185 - }, - "roundtrip": { - "p50": 136.63999736309052, - "p90": 174.75199699401855, - "p95": 177.15199291706085, - "p99": 181.08800053596497 - }, - "isolatedSum": { - "p50": 149.4399979710579, - "p90": 154.94400262832642, - "p95": 158.27200561761856, - "p99": 165.91999679803848 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 56.832000613212585, - "p90": 79.74400371313095, - "p95": 81.11999928951263, - "p99": 85.69599688053131 - }, - "combine": { - "p50": 62.3680017888546, - "p90": 71.58400118350983, - "p95": 72.25599884986877, - "p99": 75.9039968252182 - }, - "roundtrip": { - "p50": 138.0160003900528, - "p90": 172.95999825000763, - "p95": 174.30399358272552, - "p99": 179.61600422859192 - }, - "isolatedSum": { - "p50": 119.20000240206718, - "p90": 151.32800489664078, - "p95": 153.3759981393814, - "p99": 161.5999937057495 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 56.92800134420395, - "p90": 82.0159986615181, - "p95": 85.02399921417236, - "p99": 87.77599781751633 - }, - "combine": { - "p50": 63.07200342416763, - "p90": 74.94399696588516, - "p95": 76.28799974918365, - "p99": 79.99999821186066 - }, - "roundtrip": { - "p50": 138.7840062379837, - "p90": 179.51999604701996, - "p95": 182.01600015163422, - "p99": 187.42400407791138 - }, - "isolatedSum": { - "p50": 120.00000476837158, - "p90": 156.95999562740326, - "p95": 161.31199896335602, - "p99": 167.77599602937698 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 56.832000613212585, - "p90": 80.99199831485748, - "p95": 82.94399827718735, - "p99": 87.99999952316284 - }, - "combine": { - "p50": 63.71200084686279, - "p90": 74.43200051784515, - "p95": 75.19999891519547, - "p99": 79.52000200748444 - }, - "roundtrip": { - "p50": 139.93600010871887, - "p90": 178.5919964313507, - "p95": 181.98400735855103, - "p99": 185.47199666500092 - }, - "isolatedSum": { - "p50": 120.54400146007538, - "p90": 155.42399883270264, - "p95": 158.1439971923828, - "p99": 167.52000153064728 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 57.472001761198044, - "p90": 81.79199695587158, - "p95": 84.28800106048584, - "p99": 87.87199854850769 - }, - "combine": { - "p50": 65.5359998345375, - "p90": 77.37600058317184, - "p95": 79.3600007891655, - "p99": 82.46400207281113 - }, - "roundtrip": { - "p50": 141.184002161026, - "p90": 181.7920058965683, - "p95": 184.9599927663803, - "p99": 191.93600118160248 - }, - "isolatedSum": { - "p50": 123.00800159573555, - "p90": 159.16799753904343, - "p95": 163.64800184965134, - "p99": 170.33600062131882 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 60.32000109553337, - "p90": 82.0159986615181, - "p95": 84.63999629020691, - "p99": 91.0400003194809 - }, - "combine": { - "p50": 70.97599655389786, - "p90": 82.14399963617325, - "p95": 83.20000022649765, - "p99": 88.60799670219421 - }, - "roundtrip": { - "p50": 147.0080018043518, - "p90": 185.7919991016388, - "p95": 188.06399405002594, - "p99": 192.25600361824036 - }, - "isolatedSum": { - "p50": 131.29599764943123, - "p90": 164.15999829769135, - "p95": 167.83999651670456, - "p99": 179.6479970216751 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 69.60000097751617, - "p90": 85.69599688053131, - "p95": 87.99999952316284, - "p99": 100.8640006184578 - }, - "combine": { - "p50": 80.6720033288002, - "p90": 92.70399808883667, - "p95": 93.66399794816971, - "p99": 97.4079966545105 - }, - "roundtrip": { - "p50": 160.70400178432465, - "p90": 200.83199441432953, - "p95": 203.19999754428864, - "p99": 211.5200012922287 - }, - "isolatedSum": { - "p50": 150.27200430631638, - "p90": 178.39999496936798, - "p95": 181.66399747133255, - "p99": 198.2719972729683 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 77.05599814653397, - "p90": 91.96799993515015, - "p95": 94.43199634552002, - "p99": 99.32799637317657 - }, - "combine": { - "p50": 97.53599762916565, - "p90": 109.37599837779999, - "p95": 110.68800091743469, - "p99": 115.7120019197464 - }, - "roundtrip": { - "p50": 203.80799472332, - "p90": 219.9999988079071, - "p95": 222.59199619293213, - "p99": 236.4799976348877 - }, - "isolatedSum": { - "p50": 174.59199577569962, - "p90": 201.34399831295013, - "p95": 205.1199972629547, - "p99": 215.03999829292297 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f1a3625a", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_7720baf2", - "comparisonKey": "800e526f613bc59d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:09.827299+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271594334", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", - "createdAt": "2026-06-26T23:49:09.827299+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 30.81599995493889, - "p90": 33.824000507593155, - "p95": 36.67199984192848, - "p99": 41.760001331567764 - }, - "combine": { - "p50": 33.535998314619064, - "p90": 36.06399893760681, - "p95": 38.656000047922134, - "p99": 94.62399780750275 - }, - "roundtrip": { - "p50": 2063.647985458374, - "p90": 2066.3039684295654, - "p95": 2067.5199031829834, - "p99": 2072.1280574798584 - }, - "isolatedSum": { - "p50": 64.35199826955795, - "p90": 69.88799944519997, - "p95": 75.32799988985062, - "p99": 136.3839991390705 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 30.688000842928886, - "p90": 33.440001308918, - "p95": 35.32800078392029, - "p99": 41.85599833726883 - }, - "combine": { - "p50": 35.10399907827377, - "p90": 39.135999977588654, - "p95": 60.99199876189232, - "p99": 184.2239946126938 - }, - "roundtrip": { - "p50": 2065.023899078369, - "p90": 2067.647933959961, - "p95": 2069.279909133911, - "p99": 2082.5600624084473 - }, - "isolatedSum": { - "p50": 65.79199992120266, - "p90": 72.57600128650665, - "p95": 96.3199995458126, - "p99": 226.07999294996262 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 30.527999624609947, - "p90": 32.70399942994118, - "p95": 34.33600068092346, - "p99": 38.72000053524971 - }, - "combine": { - "p50": 34.71999987959862, - "p90": 36.896001547575, - "p95": 37.82400116324425, - "p99": 40.672000497579575 - }, - "roundtrip": { - "p50": 2065.7920837402344, - "p90": 2069.4079399108887, - "p95": 2074.079990386963, - "p99": 2120.703935623169 - }, - "isolatedSum": { - "p50": 65.24799950420856, - "p90": 69.60000097751617, - "p95": 72.16000184416771, - "p99": 79.39200103282928 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 31.007999554276466, - "p90": 33.24799984693527, - "p95": 35.45600175857544, - "p99": 42.11200028657913 - }, - "combine": { - "p50": 35.74400022625923, - "p90": 38.62399980425835, - "p95": 39.903998374938965, - "p99": 44.12800073623657 - }, - "roundtrip": { - "p50": 2066.240072250366, - "p90": 2069.6959495544434, - "p95": 2070.784091949463, - "p99": 2073.9200115203857 - }, - "isolatedSum": { - "p50": 66.7519997805357, - "p90": 71.87199965119362, - "p95": 75.3600001335144, - "p99": 86.2400010228157 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 32.32000023126602, - "p90": 39.103999733924866, - "p95": 52.799999713897705, - "p99": 55.36000058054924 - }, - "combine": { - "p50": 38.656000047922134, - "p90": 41.79200157523155, - "p95": 42.97599941492081, - "p99": 47.520000487565994 - }, - "roundtrip": { - "p50": 2071.9680786132812, - "p90": 2074.592113494873, - "p95": 2075.615882873535, - "p99": 2079.7760486602783 - }, - "isolatedSum": { - "p50": 70.97600027918816, - "p90": 80.89600130915642, - "p95": 95.77599912881851, - "p99": 102.88000106811523 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 36.3520011305809, - "p90": 38.11199963092804, - "p95": 40.22400081157684, - "p99": 45.951999723911285 - }, - "combine": { - "p50": 47.968000173568726, - "p90": 50.87999999523163, - "p95": 51.83999985456467, - "p99": 58.04799869656563 - }, - "roundtrip": { - "p50": 2082.7200412750244, - "p90": 2085.2479934692383, - "p95": 2086.2081050872803, - "p99": 2089.1199111938477 - }, - "isolatedSum": { - "p50": 84.32000130414963, - "p90": 88.99199962615967, - "p95": 92.06400066614151, - "p99": 103.99999842047691 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 41.600000113248825, - "p90": 51.00800096988678, - "p95": 52.12799832224846, - "p99": 55.1999993622303 - }, - "combine": { - "p50": 60.67200005054474, - "p90": 68.67200136184692, - "p95": 71.68000191450119, - "p99": 97.08800166845322 - }, - "roundtrip": { - "p50": 2101.8240451812744, - "p90": 2108.736038208008, - "p95": 2111.936092376709, - "p99": 2120.1279163360596 - }, - "isolatedSum": { - "p50": 102.27200016379356, - "p90": 119.6800023317337, - "p95": 123.80800023674965, - "p99": 152.28800103068352 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 54.016001522541046, - "p90": 56.223999708890915, - "p95": 57.312000542879105, - "p99": 60.575999319553375 - }, - "combine": { - "p50": 88.54400366544724, - "p90": 91.93599969148636, - "p95": 92.70399808883667, - "p99": 114.81600254774094 - }, - "roundtrip": { - "p50": 2143.0718898773193, - "p90": 2146.7199325561523, - "p95": 2147.455930709839, - "p99": 2153.791904449463 - }, - "isolatedSum": { - "p50": 142.56000518798828, - "p90": 148.15999940037727, - "p95": 150.01599863171577, - "p99": 175.3920018672943 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-73d1725a", - "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_3a77ee8e", - "comparisonKey": "93509525aa3f27c6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:16.484836+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271598000", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", - "createdAt": "2026-06-26T23:49:16.484836+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 31.10400028526783, - "p90": 33.376000821590424, - "p95": 34.88000109791756, - "p99": 39.264000952243805 - }, - "combine": { - "p50": 32.575998455286026, - "p90": 35.32800078392029, - "p95": 36.928001791238785, - "p99": 40.41599854826927 - }, - "roundtrip": { - "p50": 2062.4639987945557, - "p90": 2065.1841163635254, - "p95": 2067.9678916931152, - "p99": 2091.871976852417 - }, - "isolatedSum": { - "p50": 63.679998740553856, - "p90": 68.70400160551071, - "p95": 71.80800288915634, - "p99": 79.67999950051308 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 30.719999223947525, - "p90": 32.99200162291527, - "p95": 35.551998764276505, - "p99": 40.64000025391579 - }, - "combine": { - "p50": 32.735999673604965, - "p90": 35.00799834728241, - "p95": 36.3520011305809, - "p99": 43.807998299598694 - }, - "roundtrip": { - "p50": 2063.136100769043, - "p90": 2065.376043319702, - "p95": 2067.296028137207, - "p99": 2071.039915084839 - }, - "isolatedSum": { - "p50": 63.45599889755249, - "p90": 67.99999997019768, - "p95": 71.9039998948574, - "p99": 84.44799855351448 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 32.35200047492981, - "p90": 46.65600135922432, - "p95": 47.42399975657463, - "p99": 53.279999643564224 - }, - "combine": { - "p50": 33.824000507593155, - "p90": 36.768000572919846, - "p95": 39.07199949026108, - "p99": 50.783999264240265 - }, - "roundtrip": { - "p50": 2064.095973968506, - "p90": 2066.9119358062744, - "p95": 2069.567918777466, - "p99": 2080.512046813965 - }, - "isolatedSum": { - "p50": 66.17600098252296, - "p90": 83.42400193214417, - "p95": 86.49599924683571, - "p99": 104.06399890780449 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 31.90400078892708, - "p90": 34.04799848794937, - "p95": 35.74400022625923, - "p99": 39.77600112557411 - }, - "combine": { - "p50": 34.17599946260452, - "p90": 36.22400015592575, - "p95": 37.53599897027016, - "p99": 42.208001017570496 - }, - "roundtrip": { - "p50": 2065.279960632324, - "p90": 2068.416118621826, - "p95": 2070.6560611724854, - "p99": 2080.8000564575195 - }, - "isolatedSum": { - "p50": 66.0800002515316, - "p90": 70.27199864387512, - "p95": 73.27999919652939, - "p99": 81.98400214314461 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 32.86400064826012, - "p90": 34.432001411914825, - "p95": 36.25600039958954, - "p99": 40.73600098490715 - }, - "combine": { - "p50": 37.88800165057182, - "p90": 44.67200115323067, - "p95": 46.30399867892265, - "p99": 69.24799829721451 - }, - "roundtrip": { - "p50": 2071.1679458618164, - "p90": 2079.5199871063232, - "p95": 2080.4800987243652, - "p99": 2085.439920425415 - }, - "isolatedSum": { - "p50": 70.75200229883194, - "p90": 79.10400256514549, - "p95": 82.55999907851219, - "p99": 109.98399928212166 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 35.00799834728241, - "p90": 36.928001791238785, - "p95": 39.07199949026108, - "p99": 41.98399931192398 - }, - "combine": { - "p50": 43.68000105023384, - "p90": 45.72800174355507, - "p95": 46.879999339580536, - "p99": 52.480001002550125 - }, - "roundtrip": { - "p50": 2079.263925552368, - "p90": 2081.279993057251, - "p95": 2082.5281143188477, - "p99": 2086.1759185791016 - }, - "isolatedSum": { - "p50": 78.68799939751625, - "p90": 82.65600353479385, - "p95": 85.95199882984161, - "p99": 94.4640003144741 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 42.11200028657913, - "p90": 65.15199691057205, - "p95": 74.36800003051758, - "p99": 88.99199962615967 - }, - "combine": { - "p50": 58.9120015501976, - "p90": 63.87200206518173, - "p95": 64.80000168085098, - "p99": 71.45600020885468 - }, - "roundtrip": { - "p50": 2100.9280681610107, - "p90": 2110.1760864257812, - "p95": 2111.2639904022217, - "p99": 2114.367961883545 - }, - "isolatedSum": { - "p50": 101.02400183677673, - "p90": 129.02399897575378, - "p95": 139.16800171136856, - "p99": 160.44799983501434 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 53.18399891257286, - "p90": 54.78399991989136, - "p95": 56.60799890756607, - "p99": 61.535999178886414 - }, - "combine": { - "p50": 85.75999736785889, - "p90": 88.03199976682663, - "p95": 89.12000060081482, - "p99": 95.29600292444229 - }, - "roundtrip": { - "p50": 2140.671968460083, - "p90": 2143.5201168060303, - "p95": 2145.632028579712, - "p99": 2288.991928100586 - }, - "isolatedSum": { - "p50": 138.94399628043175, - "p90": 142.815999686718, - "p95": 145.7279995083809, - "p99": 156.8320021033287 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1d30dd2c", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h100_ac25b0a1", - "comparisonKey": "405d06288635d74f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:32:59.549027+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 LL (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254359089", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", - "createdAt": "2026-06-26T17:32:59.549027+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 30.432000756263733, - "p90": 32.32000023126602, - "p95": 34.143999218940735, - "p99": 38.015998899936676 - }, - "combine": { - "p50": 32.287999987602234, - "p90": 34.78400036692619, - "p95": 35.87200120091438, - "p99": 40.383998304605484 - }, - "roundtrip": { - "p50": 2063.9359951019287, - "p90": 2065.632104873657, - "p95": 2066.9760704040527, - "p99": 2069.6001052856445 - }, - "isolatedSum": { - "p50": 62.72000074386597, - "p90": 67.10400059819221, - "p95": 70.01600041985512, - "p99": 78.39999720454216 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 30.368000268936157, - "p90": 32.09599852561951, - "p95": 34.01599824428558, - "p99": 37.248000502586365 - }, - "combine": { - "p50": 32.22399950027466, - "p90": 34.46400165557861, - "p95": 35.711999982595444, - "p99": 45.88799923658371 - }, - "roundtrip": { - "p50": 2064.768075942993, - "p90": 2067.13604927063, - "p95": 2069.024085998535, - "p99": 2083.7440490722656 - }, - "isolatedSum": { - "p50": 62.591999769210815, - "p90": 66.56000018119812, - "p95": 69.72799822688103, - "p99": 83.13599973917007 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 30.527999624609947, - "p90": 32.54399821162224, - "p95": 35.26400029659271, - "p99": 40.063999593257904 - }, - "combine": { - "p50": 34.2399999499321, - "p90": 37.53599897027016, - "p95": 38.24000060558319, - "p99": 40.031999349594116 - }, - "roundtrip": { - "p50": 2065.376043319702, - "p90": 2067.3279762268066, - "p95": 2068.3200359344482, - "p99": 2070.5599784851074 - }, - "isolatedSum": { - "p50": 64.76799957454205, - "p90": 70.0799971818924, - "p95": 73.5040009021759, - "p99": 80.09599894285202 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 31.231999397277832, - "p90": 33.055998384952545, - "p95": 35.61599925160408, - "p99": 38.94399851560593 - }, - "combine": { - "p50": 33.76000002026558, - "p90": 35.999998450279236, - "p95": 37.76000067591667, - "p99": 53.888000547885895 - }, - "roundtrip": { - "p50": 2066.528081893921, - "p90": 2068.511962890625, - "p95": 2069.6959495544434, - "p99": 2078.07993888855 - }, - "isolatedSum": { - "p50": 64.99199941754341, - "p90": 69.05599683523178, - "p95": 73.37599992752075, - "p99": 92.83199906349182 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 32.51200169324875, - "p90": 34.20799970626831, - "p95": 36.86400130391121, - "p99": 40.09599983692169 - }, - "combine": { - "p50": 37.21600025892258, - "p90": 39.45599868893623, - "p95": 40.41599854826927, - "p99": 42.399998754262924 - }, - "roundtrip": { - "p50": 2071.392059326172, - "p90": 2074.687957763672, - "p95": 2078.7200927734375, - "p99": 2156.5120220184326 - }, - "isolatedSum": { - "p50": 69.72800195217133, - "p90": 73.66399839520454, - "p95": 77.27999985218048, - "p99": 82.49599859118462 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 35.10399907827377, - "p90": 36.38400137424469, - "p95": 38.43199834227562, - "p99": 42.208001017570496 - }, - "combine": { - "p50": 42.7200011909008, - "p90": 44.89599913358688, - "p95": 45.66400125622749, - "p99": 48.70399832725525 - }, - "roundtrip": { - "p50": 2080.22403717041, - "p90": 2081.9520950317383, - "p95": 2083.359956741333, - "p99": 2118.4639930725098 - }, - "isolatedSum": { - "p50": 77.82400026917458, - "p90": 81.28000050783157, - "p95": 84.09599959850311, - "p99": 90.91199934482574 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 42.047999799251556, - "p90": 47.90399968624115, - "p95": 48.8319993019104, - "p99": 53.119998425245285 - }, - "combine": { - "p50": 57.40800127387047, - "p90": 62.68800050020218, - "p95": 64.51199948787689, - "p99": 67.03999638557434 - }, - "roundtrip": { - "p50": 2100.5120277404785, - "p90": 2108.383893966675, - "p95": 2109.503984451294, - "p99": 2111.9039058685303 - }, - "isolatedSum": { - "p50": 99.45600107312202, - "p90": 110.59200018644333, - "p95": 113.34399878978729, - "p99": 120.15999481081963 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 53.599998354911804, - "p90": 55.39200082421303, - "p95": 56.41600117087364, - "p99": 61.08799949288368 - }, - "combine": { - "p50": 83.5840031504631, - "p90": 86.11200004816055, - "p95": 87.2960016131401, - "p99": 91.51999652385712 - }, - "roundtrip": { - "p50": 2139.967918395996, - "p90": 2142.303943634033, - "p95": 2142.911911010742, - "p99": 2144.831895828247 - }, - "isolatedSum": { - "p50": 137.1840015053749, - "p90": 141.50400087237358, - "p95": 143.71200278401375, - "p99": 152.6079960167408 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-d35502c2", - "identity": "h100|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_805b6904", - "comparisonKey": "a3be04b3aa017ede", - "schemaVersion": 3, - "generatedAt": "2026-06-27T15:55:34.014711+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8-directcast", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8-directcast", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28294158591", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294158591", - "createdAt": "2026-06-27T15:55:34.014711+00:00", - "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 97.50399738550186, - "p90": 102.27199643850327, - "p95": 104.70400005578995, - "p99": 110.75200140476227 - }, - "combine": { - "p50": 73.60000163316727, - "p90": 75.42400062084198, - "p95": 76.92799717187881, - "p99": 80.48000186681747 - }, - "roundtrip": { - "p50": 193.79200041294098, - "p90": 199.26400482654572, - "p95": 201.47199928760529, - "p99": 205.79199492931366 - }, - "isolatedSum": { - "p50": 171.10399901866913, - "p90": 177.69599705934525, - "p95": 181.63199722766876, - "p99": 191.23200327157974 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.10399752855301, - "p90": 98.88000041246414, - "p95": 101.59999877214432, - "p99": 105.50399869680405 - }, - "combine": { - "p50": 62.55999952554703, - "p90": 71.07199728488922, - "p95": 71.74400240182877, - "p99": 74.81600344181061 - }, - "roundtrip": { - "p50": 154.01600301265717, - "p90": 193.12000274658203, - "p95": 195.3279972076416, - "p99": 198.91199469566345 - }, - "isolatedSum": { - "p50": 133.66399705410004, - "p90": 169.95199769735336, - "p95": 173.34400117397308, - "p99": 180.32000213861465 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 71.16799801588058, - "p90": 103.2319962978363, - "p95": 105.18400371074677, - "p99": 110.52799969911575 - }, - "combine": { - "p50": 63.968002796173096, - "p90": 75.99999755620956, - "p95": 77.98399776220322, - "p99": 81.66400343179703 - }, - "roundtrip": { - "p50": 154.62400019168854, - "p90": 201.02399587631226, - "p95": 203.99999618530273, - "p99": 212.0320051908493 - }, - "isolatedSum": { - "p50": 135.13600081205368, - "p90": 179.23199385404587, - "p95": 183.16800147294998, - "p99": 192.19200313091278 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 71.45600020885468, - "p90": 100.73599964380264, - "p95": 102.78400033712387, - "p99": 107.55199939012527 - }, - "combine": { - "p50": 63.840001821517944, - "p90": 74.97599720954895, - "p95": 76.19199901819229, - "p99": 83.29600095748901 - }, - "roundtrip": { - "p50": 155.42399883270264, - "p90": 199.68000054359436, - "p95": 201.9840031862259, - "p99": 291.6480004787445 - }, - "isolatedSum": { - "p50": 135.29600203037262, - "p90": 175.7119968533516, - "p95": 178.97599935531616, - "p99": 190.8480003476143 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 71.42399996519089, - "p90": 100.89600086212158, - "p95": 103.00800204277039, - "p99": 108.22399705648422 - }, - "combine": { - "p50": 65.5680000782013, - "p90": 77.08799839019775, - "p95": 78.5600021481514, - "p99": 82.91199803352356 - }, - "roundtrip": { - "p50": 157.18400478363037, - "p90": 202.04800367355347, - "p95": 204.76800203323364, - "p99": 209.4080001115799 - }, - "isolatedSum": { - "p50": 136.99200004339218, - "p90": 177.98399925231934, - "p95": 181.56800419092178, - "p99": 191.13599509000778 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 73.18399846553802, - "p90": 101.08800232410431, - "p95": 102.88000106811523, - "p99": 106.81600123643875 - }, - "combine": { - "p50": 71.35999947786331, - "p90": 82.84799754619598, - "p95": 83.67999643087387, - "p99": 86.94399893283844 - }, - "roundtrip": { - "p50": 162.04799711704254, - "p90": 207.23199844360352, - "p95": 209.34399962425232, - "p99": 212.41599321365356 - }, - "isolatedSum": { - "p50": 144.54399794340134, - "p90": 183.9359998703003, - "p95": 186.5599974989891, - "p99": 193.7600001692772 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 81.34400099515915, - "p90": 105.43999820947647, - "p95": 109.6000000834465, - "p99": 460.54399013519287 - }, - "combine": { - "p50": 80.64000308513641, - "p90": 92.99200028181076, - "p95": 94.24000233411789, - "p99": 98.55999797582626 - }, - "roundtrip": { - "p50": 174.01599884033203, - "p90": 220.5439954996109, - "p95": 222.91199862957, - "p99": 228.2239943742752 - }, - "isolatedSum": { - "p50": 161.98400408029556, - "p90": 198.43199849128723, - "p95": 203.8400024175644, - "p99": 559.1039881110191 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 88.8959988951683, - "p90": 109.63200032711029, - "p95": 111.32799834012985, - "p99": 116.70400202274323 - }, - "combine": { - "p50": 98.88000041246414, - "p90": 111.10399663448334, - "p95": 112.64000087976456, - "p99": 115.07199704647064 - }, - "roundtrip": { - "p50": 215.61600267887115, - "p90": 238.43200504779816, - "p95": 240.76800048351288, - "p99": 245.15199661254883 - }, - "isolatedSum": { - "p50": 187.77599930763245, - "p90": 220.73599696159363, - "p95": 223.9679992198944, - "p99": 231.77599906921387 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-779ba710", - "identity": "h100|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_b68ae8a2", - "comparisonKey": "6d2a2c2b7775de32", - "schemaVersion": 3, - "generatedAt": "2026-06-27T15:55:42.044043+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8-pertoken", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8-pertoken", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28294162181", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294162181", - "createdAt": "2026-06-27T15:55:42.044043+00:00", - "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 94.59199756383896, - "p90": 99.35999661684036, - "p95": 101.56799852848053, - "p99": 106.1440035700798 - }, - "combine": { - "p50": 68.4799998998642, - "p90": 71.23199850320816, - "p95": 72.22399860620499, - "p99": 76.06399804353714 - }, - "roundtrip": { - "p50": 184.79999899864197, - "p90": 190.72000682353973, - "p95": 192.9280012845993, - "p99": 197.9839950799942 - }, - "isolatedSum": { - "p50": 163.07199746370316, - "p90": 170.59199512004852, - "p95": 173.79199713468552, - "p99": 182.20800161361694 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.78400254249573, - "p90": 95.07200121879578, - "p95": 97.18400239944458, - "p99": 103.13600301742554 - }, - "combine": { - "p50": 62.463998794555664, - "p90": 70.97599655389786, - "p95": 71.52000069618225, - "p99": 75.96799731254578 - }, - "roundtrip": { - "p50": 151.8400013446808, - "p90": 189.28000330924988, - "p95": 190.75199961662292, - "p99": 195.26399672031403 - }, - "isolatedSum": { - "p50": 133.2480013370514, - "p90": 166.04799777269363, - "p95": 168.70400309562683, - "p99": 179.1040003299713 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 94.11200135946274, - "p90": 101.34399682283401, - "p95": 105.12000322341919, - "p99": 111.42399907112122 - }, - "combine": { - "p50": 71.48800045251846, - "p90": 75.68000257015228, - "p95": 77.08799839019775, - "p99": 80.32000064849854 - }, - "roundtrip": { - "p50": 152.92799472808838, - "p90": 198.0160027742386, - "p95": 201.1840045452118, - "p99": 207.64799416065216 - }, - "isolatedSum": { - "p50": 165.6000018119812, - "p90": 177.0239993929863, - "p95": 182.20800161361694, - "p99": 191.74399971961975 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 94.65599805116653, - "p90": 100.92800110578537, - "p95": 103.10400277376175, - "p99": 107.16799646615982 - }, - "combine": { - "p50": 73.18399846553802, - "p90": 74.81600344181061, - "p95": 76.19199901819229, - "p99": 79.29600030183792 - }, - "roundtrip": { - "p50": 187.83999979496002, - "p90": 195.45599818229675, - "p95": 197.28000462055206, - "p99": 202.84800231456757 - }, - "isolatedSum": { - "p50": 167.83999651670456, - "p90": 175.74400454759598, - "p95": 179.29600179195404, - "p99": 186.46399676799774 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 94.40000355243683, - "p90": 100.70399940013885, - "p95": 102.55999863147736, - "p99": 108.51199924945831 - }, - "combine": { - "p50": 74.75200295448303, - "p90": 77.44000107049942, - "p95": 79.83999699354172, - "p99": 83.42400193214417 - }, - "roundtrip": { - "p50": 192.1280026435852, - "p90": 201.05600357055664, - "p95": 204.28800582885742, - "p99": 209.4080001115799 - }, - "isolatedSum": { - "p50": 169.15200650691986, - "p90": 178.14400047063828, - "p95": 182.39999562501907, - "p99": 191.93600118160248 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 73.21599870920181, - "p90": 100.51199793815613, - "p95": 102.75200009346008, - "p99": 106.36799782514572 - }, - "combine": { - "p50": 71.16799801588058, - "p90": 82.0159986615181, - "p95": 83.00799876451492, - "p99": 86.11200004816055 - }, - "roundtrip": { - "p50": 160.76800227165222, - "p90": 204.3199986219406, - "p95": 207.10399746894836, - "p99": 212.0639979839325 - }, - "isolatedSum": { - "p50": 144.3839967250824, - "p90": 182.52799659967422, - "p95": 185.759998857975, - "p99": 192.47999787330627 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 95.551997423172, - "p90": 104.25599664449692, - "p95": 106.88000172376633, - "p99": 122.65600264072418 - }, - "combine": { - "p50": 89.88799899816513, - "p90": 92.54399687051773, - "p95": 94.04800087213516, - "p99": 97.24800288677216 - }, - "roundtrip": { - "p50": 207.8080028295517, - "p90": 219.16800737380981, - "p95": 221.66399657726288, - "p99": 228.44800353050232 - }, - "isolatedSum": { - "p50": 185.43999642133713, - "p90": 196.79999351501465, - "p95": 200.9280025959015, - "p99": 219.90400552749634 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 88.86399865150452, - "p90": 107.77600109577179, - "p95": 110.23999750614166, - "p99": 115.61600118875504 - }, - "combine": { - "p50": 98.78399968147278, - "p90": 110.49599945545197, - "p95": 111.77600175142288, - "p99": 115.13599753379822 - }, - "roundtrip": { - "p50": 216.8000042438507, - "p90": 236.38400435447693, - "p95": 240.57599902153015, - "p99": 246.14399671554565 - }, - "isolatedSum": { - "p50": 187.6479983329773, - "p90": 218.27200055122375, - "p95": 222.01599925756454, - "p99": 230.75199872255325 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-108bdec2", - "identity": "h100|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h100_42947950", - "comparisonKey": "5aeeda2cd42e92cb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:50.229059+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287504962", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287504962", - "createdAt": "2026-06-27T11:13:50.229059+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 94.17600184679031, - "p90": 98.43199700117111, - "p95": 100.25600343942642, - "p99": 104.47999835014343 - }, - "combine": { - "p50": 87.20000088214874, - "p90": 89.66399729251862, - "p95": 90.40000289678574, - "p99": 95.42399644851685 - }, - "roundtrip": { - "p50": 156.51200711727142, - "p90": 162.20800578594208, - "p95": 163.455992937088, - "p99": 169.53599452972412 - }, - "isolatedSum": { - "p50": 181.37600272893906, - "p90": 188.09599429368973, - "p95": 190.65600633621216, - "p99": 199.90399479866028 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 114.56000059843063, - "p90": 127.26399302482605, - "p95": 130.78400492668152, - "p99": 137.11999356746674 - }, - "combine": { - "p50": 112.15999722480774, - "p90": 115.35999923944473, - "p95": 118.75200271606445, - "p99": 122.5920021533966 - }, - "roundtrip": { - "p50": 197.02400267124176, - "p90": 202.33599841594696, - "p95": 204.57600057125092, - "p99": 207.68000185489655 - }, - "isolatedSum": { - "p50": 226.71999782323837, - "p90": 242.62399226427078, - "p95": 249.53600764274597, - "p99": 259.71199572086334 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 153.6639928817749, - "p90": 169.855996966362, - "p95": 171.7119961977005, - "p99": 176.32000148296356 - }, - "combine": { - "p50": 167.71200299263, - "p90": 180.38399517536163, - "p95": 182.43199586868286, - "p99": 184.1599941253662 - }, - "roundtrip": { - "p50": 289.6000146865845, - "p90": 307.45598673820496, - "p95": 310.07999181747437, - "p99": 317.1519935131073 - }, - "isolatedSum": { - "p50": 321.3759958744049, - "p90": 350.23999214172363, - "p95": 354.14399206638336, - "p99": 360.4799956083298 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 216.8319970369339, - "p90": 221.02400660514832, - "p95": 222.46399521827698, - "p99": 227.2000014781952 - }, - "combine": { - "p50": 277.0240008831024, - "p90": 282.78398513793945, - "p95": 284.2879891395569, - "p99": 288.4480059146881 - }, - "roundtrip": { - "p50": 469.4080054759979, - "p90": 475.23200511932373, - "p95": 476.83200240135193, - "p99": 480.3520143032074 - }, - "isolatedSum": { - "p50": 493.8559979200363, - "p90": 503.80799174308777, - "p95": 506.75198435783386, - "p99": 515.6480073928833 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 361.2799942493439, - "p90": 374.208003282547, - "p95": 379.2960047721863, - "p99": 538.752019405365 - }, - "combine": { - "p50": 470.5600142478943, - "p90": 482.87999629974365, - "p95": 485.0879907608032, - "p99": 490.81599712371826 - }, - "roundtrip": { - "p50": 804.4800162315369, - "p90": 820.2239871025085, - "p95": 825.3120183944702, - "p99": 835.3919982910156 - }, - "isolatedSum": { - "p50": 831.8400084972382, - "p90": 857.0879995822906, - "p95": 864.3839955329895, - "p99": 1029.5680165290833 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 640.5439972877502, - "p90": 648.4159827232361, - "p95": 651.7760157585144, - "p99": 662.015974521637 - }, - "combine": { - "p50": 846.176028251648, - "p90": 854.9759984016418, - "p95": 857.5360178947449, - "p99": 862.8479838371277 - }, - "roundtrip": { - "p50": 1459.9039554595947, - "p90": 1470.5599546432495, - "p95": 1474.4000434875488, - "p99": 1484.1920137405396 - }, - "isolatedSum": { - "p50": 1486.7200255393982, - "p90": 1503.391981124878, - "p95": 1509.3120336532593, - "p99": 1524.8639583587646 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8265fe0e", - "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h100_ff7906f8", - "comparisonKey": "d0edce95a580d060", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:06.777183+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271688175", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271688175", - "createdAt": "2026-06-26T23:52:06.777183+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 92.32000261545181, - "p90": 96.41599655151367, - "p95": 98.39999675750732, - "p99": 104.22399640083313 - }, - "combine": { - "p50": 86.97599917650223, - "p90": 88.41600269079208, - "p95": 89.50400352478027, - "p99": 93.31200271844864 - }, - "roundtrip": { - "p50": 156.73600137233734, - "p90": 160.70400178432465, - "p95": 161.6639941930771, - "p99": 166.04800522327423 - }, - "isolatedSum": { - "p50": 179.29600179195404, - "p90": 184.83199924230576, - "p95": 187.9040002822876, - "p99": 197.53599911928177 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 111.10399663448334, - "p90": 133.98399949073792, - "p95": 135.96799969673157, - "p99": 139.96799290180206 - }, - "combine": { - "p50": 112.99200356006622, - "p90": 121.47200107574463, - "p95": 122.01599776744843, - "p99": 128.35200130939484 - }, - "roundtrip": { - "p50": 202.72000133991241, - "p90": 217.6000028848648, - "p95": 219.39200162887573, - "p99": 223.7440049648285 - }, - "isolatedSum": { - "p50": 224.09600019454956, - "p90": 255.45600056648254, - "p95": 257.98399746418, - "p99": 268.3199942111969 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 153.08800339698792, - "p90": 166.9439971446991, - "p95": 168.67199540138245, - "p99": 175.55199563503265 - }, - "combine": { - "p50": 168.92799735069275, - "p90": 181.15200102329254, - "p95": 183.07200074195862, - "p99": 186.0480010509491 - }, - "roundtrip": { - "p50": 291.29600524902344, - "p90": 307.45598673820496, - "p95": 309.6959888935089, - "p99": 313.9199912548065 - }, - "isolatedSum": { - "p50": 322.01600074768066, - "p90": 348.09599816799164, - "p95": 351.74399614334106, - "p99": 361.59999668598175 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 219.26400065422058, - "p90": 230.71999847888947, - "p95": 234.9119931459427, - "p99": 238.62400650978088 - }, - "combine": { - "p50": 274.04800057411194, - "p90": 280.5440127849579, - "p95": 281.69599175453186, - "p99": 284.1919958591461 - }, - "roundtrip": { - "p50": 467.4240052700043, - "p90": 473.2159972190857, - "p95": 475.8079946041107, - "p99": 479.2639911174774 - }, - "isolatedSum": { - "p50": 493.3120012283325, - "p90": 511.26401126384735, - "p95": 516.6079849004745, - "p99": 522.816002368927 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 360.79999804496765, - "p90": 374.36801195144653, - "p95": 376.5760064125061, - "p99": 380.2880048751831 - }, - "combine": { - "p50": 465.88799357414246, - "p90": 475.77598690986633, - "p95": 478.4319996833801, - "p99": 481.53600096702576 - }, - "roundtrip": { - "p50": 799.1999983787537, - "p90": 816.6720271110535, - "p95": 819.8080062866211, - "p99": 824.7680068016052 - }, - "isolatedSum": { - "p50": 826.6879916191101, - "p90": 850.1439988613129, - "p95": 855.0080060958862, - "p99": 861.8240058422089 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 638.975977897644, - "p90": 648.1279730796814, - "p95": 652.7040004730225, - "p99": 661.1520051956177 - }, - "combine": { - "p50": 848.4799861907959, - "p90": 856.8000197410583, - "p95": 859.5520257949829, - "p99": 898.5919952392578 - }, - "roundtrip": { - "p50": 1462.623953819275, - "p90": 1474.079966545105, - "p95": 1478.4959554672241, - "p99": 1489.3120527267456 - }, - "isolatedSum": { - "p50": 1487.45596408844, - "p90": 1504.9279928207397, - "p95": 1512.2560262680054, - "p99": 1559.7440004348755 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2dcc1e5c", - "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h100_ff7906f8", - "comparisonKey": "69b861c40f88be42", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:59.492832+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271702702", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271702702", - "createdAt": "2026-06-26T23:51:59.492832+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 99.45599734783173, - "p90": 105.05600273609161, - "p95": 106.04800283908844, - "p99": 110.23999750614166 - }, - "combine": { - "p50": 95.58399766683578, - "p90": 97.47199714183807, - "p95": 98.39999675750732, - "p99": 102.9760017991066 - }, - "roundtrip": { - "p50": 170.33599317073822, - "p90": 175.10400712490082, - "p95": 177.85599827766418, - "p99": 179.58399653434753 - }, - "isolatedSum": { - "p50": 195.0399950146675, - "p90": 202.5279998779297, - "p95": 204.44799959659576, - "p99": 213.21599930524826 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 119.29599940776825, - "p90": 124.22399967908859, - "p95": 126.30400061607361, - "p99": 130.5599957704544 - }, - "combine": { - "p50": 122.079998254776, - "p90": 127.80800461769104, - "p95": 128.67200374603271, - "p99": 132.9919993877411 - }, - "roundtrip": { - "p50": 219.32800114154816, - "p90": 223.1680005788803, - "p95": 224.5440036058426, - "p99": 228.7359982728958 - }, - "isolatedSum": { - "p50": 241.37599766254425, - "p90": 252.03200429677963, - "p95": 254.97600436210632, - "p99": 263.5519951581955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 111104000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 165.53600132465363, - "p90": 178.1120002269745, - "p95": 180.12799322605133, - "p99": 184.25600230693817 - }, - "combine": { - "p50": 190.46400487422943, - "p90": 198.71999323368073, - "p95": 200.9280025959015, - "p99": 213.79199624061584 - }, - "roundtrip": { - "p50": 325.76000690460205, - "p90": 331.07200264930725, - "p95": 332.73598551750183, - "p99": 336.1920118331909 - }, - "isolatedSum": { - "p50": 356.00000619888306, - "p90": 376.8319934606552, - "p95": 381.0559958219528, - "p99": 398.047998547554 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 223098880, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 244.57600712776184, - "p90": 249.439999461174, - "p95": 253.56799364089966, - "p99": 409.56801176071167 - }, - "combine": { - "p50": 299.1040050983429, - "p90": 303.9360046386719, - "p95": 305.759996175766, - "p99": 311.0719919204712 - }, - "roundtrip": { - "p50": 515.7759785652161, - "p90": 522.2399830818176, - "p95": 524.1600275039673, - "p99": 528.8959741592407 - }, - "isolatedSum": { - "p50": 543.6800122261047, - "p90": 553.3760040998459, - "p95": 559.3279898166656, - "p99": 720.6400036811829 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 446730240, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 413.1520092487335, - "p90": 423.0720102787018, - "p95": 426.2399971485138, - "p99": 432.5760006904602 - }, - "combine": { - "p50": 515.7439708709717, - "p90": 523.7119793891907, - "p95": 526.4319777488708, - "p99": 530.3360223770142 - }, - "roundtrip": { - "p50": 898.2080221176147, - "p90": 911.0400080680847, - "p95": 915.2960181236267, - "p99": 921.6639995574951 - }, - "isolatedSum": { - "p50": 928.8959801197052, - "p90": 946.7839896678925, - "p95": 952.6719748973846, - "p99": 962.9120230674744 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 893634560, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 745.0559735298157, - "p90": 758.2719922065735, - "p95": 762.112021446228, - "p99": 772.4159955978394 - }, - "combine": { - "p50": 933.247983455658, - "p90": 941.9839978218079, - "p95": 945.1839923858643, - "p99": 951.3279795646667 - }, - "roundtrip": { - "p50": 1646.2719440460205, - "p90": 1661.9199514389038, - "p95": 1667.3599481582642, - "p99": 1685.7600212097168 - }, - "isolatedSum": { - "p50": 1678.3039569854736, - "p90": 1700.2559900283813, - "p95": 1707.2960138320923, - "p99": 1723.743975162506 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1786265600, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8f627a86", - "identity": "h100|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_42947950", - "comparisonKey": "68eaec6b4043581a", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:20.359016+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287492752", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287492752", - "createdAt": "2026-06-27T11:13:20.359016+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.35999858379364, - "p90": 115.58400094509125, - "p95": 116.35199934244156, - "p99": 121.56800180673599 - }, - "combine": { - "p50": 97.72799909114838, - "p90": 103.45599800348282, - "p95": 104.3199971318245, - "p99": 108.25599730014801 - }, - "roundtrip": { - "p50": 183.9359998703003, - "p90": 187.96800076961517, - "p95": 189.31199610233307, - "p99": 192.76799261569977 - }, - "isolatedSum": { - "p50": 209.08799767494202, - "p90": 219.03999894857407, - "p95": 220.67199647426605, - "p99": 229.823999106884 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 133.15199315547943, - "p90": 150.27199685573578, - "p95": 157.56799280643463, - "p99": 168.2240068912506 - }, - "combine": { - "p50": 137.05599308013916, - "p90": 144.03200149536133, - "p95": 145.50399780273438, - "p99": 152.79999375343323 - }, - "roundtrip": { - "p50": 239.74399268627167, - "p90": 252.70399451255798, - "p95": 254.17599081993103, - "p99": 258.2400143146515 - }, - "isolatedSum": { - "p50": 270.2079862356186, - "p90": 294.3039983510971, - "p95": 303.071990609169, - "p99": 321.02400064468384 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 182.49599635601044, - "p90": 195.23200392723083, - "p95": 198.14400374889374, - "p99": 200.95999538898468 - }, - "combine": { - "p50": 208.44799280166626, - "p90": 217.98400580883026, - "p95": 219.10400688648224, - "p99": 253.76001000404358 - }, - "roundtrip": { - "p50": 361.6960048675537, - "p90": 376.0319948196411, - "p95": 379.71198558807373, - "p99": 384.6080005168915 - }, - "isolatedSum": { - "p50": 390.9439891576767, - "p90": 413.2160097360611, - "p95": 417.248010635376, - "p99": 454.72000539302826 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 274.1119861602783, - "p90": 283.3920121192932, - "p95": 286.1120104789734, - "p99": 290.8160090446472 - }, - "combine": { - "p50": 330.1120102405548, - "p90": 336.0320031642914, - "p95": 336.89600229263306, - "p99": 341.8560028076172 - }, - "roundtrip": { - "p50": 577.344000339508, - "p90": 583.9359760284424, - "p95": 586.0480070114136, - "p99": 589.3440246582031 - }, - "isolatedSum": { - "p50": 604.2239964008331, - "p90": 619.4240152835846, - "p95": 623.0080127716064, - "p99": 632.6720118522644 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 464.03199434280396, - "p90": 478.59200835227966, - "p95": 481.6960096359253, - "p99": 491.5519952774048 - }, - "combine": { - "p50": 581.4080238342285, - "p90": 591.5840268135071, - "p95": 594.6879982948303, - "p99": 603.5839915275574 - }, - "roundtrip": { - "p50": 1013.3440494537354, - "p90": 1023.2000350952148, - "p95": 1027.008056640625, - "p99": 1076.6079425811768 - }, - "isolatedSum": { - "p50": 1045.4400181770325, - "p90": 1070.1760351657867, - "p95": 1076.3840079307556, - "p99": 1095.1359868049622 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 886.1759901046753, - "p90": 910.2720022201538, - "p95": 915.5840277671814, - "p99": 928.8960099220276 - }, - "combine": { - "p50": 1059.2319965362549, - "p90": 1067.520022392273, - "p95": 1070.0160264968872, - "p99": 1076.8640041351318 - }, - "roundtrip": { - "p50": 1908.6079597473145, - "p90": 1929.2479753494263, - "p95": 1936.3199472427368, - "p99": 1965.440034866333 - }, - "isolatedSum": { - "p50": 1945.4079866409302, - "p90": 1977.7920246124268, - "p95": 1985.6000542640686, - "p99": 2005.7600140571594 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-29bbdbee", - "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ff7906f8", - "comparisonKey": "4401899311d5e08c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:30.177352+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271717621", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271717621", - "createdAt": "2026-06-26T23:52:30.177352+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.42399907112122, - "p90": 114.94400352239609, - "p95": 116.03199690580368, - "p99": 119.61600184440613 - }, - "combine": { - "p50": 98.33600372076035, - "p90": 103.71199995279312, - "p95": 104.67199981212616, - "p99": 106.4319983124733 - }, - "roundtrip": { - "p50": 184.9599927663803, - "p90": 188.63999843597412, - "p95": 189.66400623321533, - "p99": 194.11200284957886 - }, - "isolatedSum": { - "p50": 209.76000279188156, - "p90": 218.6560034751892, - "p95": 220.70399671792984, - "p99": 226.04800015687943 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 132.86399841308594, - "p90": 137.7599984407425, - "p95": 139.3280029296875, - "p99": 142.4960047006607 - }, - "combine": { - "p50": 137.69599795341492, - "p90": 140.4159963130951, - "p95": 141.37600362300873, - "p99": 145.53600549697876 - }, - "roundtrip": { - "p50": 237.2480034828186, - "p90": 242.08000302314758, - "p95": 243.1039959192276, - "p99": 246.24000489711761 - }, - "isolatedSum": { - "p50": 270.55999636650085, - "p90": 278.1759947538376, - "p95": 280.7040065526962, - "p99": 288.03201019763947 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 184.00000035762787, - "p90": 197.31199741363525, - "p95": 200.15999674797058, - "p99": 204.12799715995789 - }, - "combine": { - "p50": 209.6959948539734, - "p90": 216.86400473117828, - "p95": 217.92000532150269, - "p99": 221.95200622081757 - }, - "roundtrip": { - "p50": 365.02400040626526, - "p90": 377.21601128578186, - "p95": 380.5760145187378, - "p99": 388.12801241874695 - }, - "isolatedSum": { - "p50": 393.69599521160126, - "p90": 414.17600214481354, - "p95": 418.08000206947327, - "p99": 426.08000338077545 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 273.21600914001465, - "p90": 277.44001150131226, - "p95": 279.87200021743774, - "p99": 289.3120050430298 - }, - "combine": { - "p50": 332.41599798202515, - "p90": 337.119996547699, - "p95": 338.20798993110657, - "p99": 341.66398644447327 - }, - "roundtrip": { - "p50": 577.6320099830627, - "p90": 582.751989364624, - "p95": 584.7679972648621, - "p99": 588.7680053710938 - }, - "isolatedSum": { - "p50": 605.6320071220398, - "p90": 614.5600080490112, - "p95": 618.0799901485443, - "p99": 630.975991487503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 464.32000398635864, - "p90": 473.60000014305115, - "p95": 477.3760139942169, - "p99": 648.8320231437683 - }, - "combine": { - "p50": 584.384024143219, - "p90": 590.9119844436646, - "p95": 593.0560231208801, - "p99": 596.8000292778015 - }, - "roundtrip": { - "p50": 1019.2320346832275, - "p90": 1029.6640396118164, - "p95": 1033.7599515914917, - "p99": 1037.984013557434 - }, - "isolatedSum": { - "p50": 1048.7040281295776, - "p90": 1064.5119845867157, - "p95": 1070.432037115097, - "p99": 1245.6320524215698 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 879.423975944519, - "p90": 904.6720266342163, - "p95": 913.2480025291443, - "p99": 928.991973400116 - }, - "combine": { - "p50": 1065.6960010528564, - "p90": 1075.3920078277588, - "p95": 1078.3040523529053, - "p99": 1084.2560529708862 - }, - "roundtrip": { - "p50": 1901.9520282745361, - "p90": 1920.7359552383423, - "p95": 1926.5919923782349, - "p99": 1940.1600360870361 - }, - "isolatedSum": { - "p50": 1945.1199769973755, - "p90": 1980.064034461975, - "p95": 1991.5520548820496, - "p99": 2013.2480263710022 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-d524fd7e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", - "colorKey": "h100_42947950", - "comparisonKey": "4c920ba7523ac63b", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:28.917588+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "157ca81687ddb63", - "workloadId": "set:3:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271785174", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271785174", - "createdAt": "2026-06-26T23:54:28.917588+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 130.52800297737122, - "p90": 135.55200397968292, - "p95": 138.43199610710144, - "p99": 176.79999768733978 - }, - "combine": { - "p50": 113.8560026884079, - "p90": 120.86399644613266, - "p95": 122.11199849843979, - "p99": 145.50399780273438 - }, - "roundtrip": { - "p50": 209.05600488185883, - "p90": 217.56799519062042, - "p95": 219.200000166893, - "p99": 275.04000067710876 - }, - "isolatedSum": { - "p50": 244.3840056657791, - "p90": 256.4160004258156, - "p95": 260.54399460554123, - "p99": 322.30399549007416 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 210.27199923992157, - "p90": 217.056006193161, - "p95": 220.22399306297302, - "p99": 256.99201226234436 - }, - "combine": { - "p50": 234.9119931459427, - "p90": 241.40800535678864, - "p95": 244.9920028448105, - "p99": 262.9759907722473 - }, - "roundtrip": { - "p50": 412.54401206970215, - "p90": 420.9280014038086, - "p95": 423.0720102787018, - "p99": 427.35999822616577 - }, - "isolatedSum": { - "p50": 445.18399238586426, - "p90": 458.46401154994965, - "p95": 465.2159959077835, - "p99": 519.9680030345917 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 526.5920162200928, - "p90": 541.4720177650452, - "p95": 545.9200143814087, - "p99": 552.3520112037659 - }, - "combine": { - "p50": 637.5679969787598, - "p90": 649.6959924697876, - "p95": 652.6079773902893, - "p99": 661.0879898071289 - }, - "roundtrip": { - "p50": 1134.6240043640137, - "p90": 1146.880030632019, - "p95": 1151.2320041656494, - "p99": 1158.5919857025146 - }, - "isolatedSum": { - "p50": 1164.1600131988525, - "p90": 1191.1680102348328, - "p95": 1198.527991771698, - "p99": 1213.4400010108948 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-efe3a643", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_42947950", - "comparisonKey": "cca7a3f5d9dbba36", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:12:09.407437+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_12", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286083501", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286083501", - "createdAt": "2026-06-27T10:12:09.407437+00:00", - "sha": "76a3032d20288ee17220eb6099346f74d56ce005" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 112.5119999051094, - "p90": 118.01599711179733, - "p95": 119.39200013875961, - "p99": 123.4240010380745 - }, - "combine": { - "p50": 107.77600109577179, - "p90": 113.40799927711487, - "p95": 114.1119971871376, - "p99": 116.2559986114502 - }, - "roundtrip": { - "p50": 200.57600736618042, - "p90": 204.73599433898926, - "p95": 206.36799931526184, - "p99": 209.85600352287292 - }, - "isolatedSum": { - "p50": 220.2880010008812, - "p90": 231.4239963889122, - "p95": 233.50399732589722, - "p99": 239.6799996495247 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 143.77599954605103, - "p90": 148.12800288200378, - "p95": 149.6960073709488, - "p99": 152.51199901103973 - }, - "combine": { - "p50": 151.10400319099426, - "p90": 155.74400126934052, - "p95": 156.76799416542053, - "p99": 158.11200439929962 - }, - "roundtrip": { - "p50": 265.53601026535034, - "p90": 269.79199051856995, - "p95": 270.9760069847107, - "p99": 274.01599287986755 - }, - "isolatedSum": { - "p50": 294.8800027370453, - "p90": 303.8720041513443, - "p95": 306.4640015363693, - "p99": 310.62400341033936 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 199.96799528598785, - "p90": 204.12799715995789, - "p95": 205.31199872493744, - "p99": 209.72800254821777 - }, - "combine": { - "p50": 229.0560007095337, - "p90": 232.2559952735901, - "p95": 235.80799996852875, - "p99": 239.19999599456787 - }, - "roundtrip": { - "p50": 401.5359878540039, - "p90": 406.0159921646118, - "p95": 407.6800048351288, - "p99": 412.1280014514923 - }, - "isolatedSum": { - "p50": 429.02399599552155, - "p90": 436.383992433548, - "p95": 441.1199986934662, - "p99": 448.92799854278564 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 304.9600124359131, - "p90": 310.016006231308, - "p95": 311.3279938697815, - "p99": 313.2160007953644 - }, - "combine": { - "p50": 367.39200353622437, - "p90": 373.3440041542053, - "p95": 375.90399384498596, - "p99": 378.81600856781006 - }, - "roundtrip": { - "p50": 645.4079747200012, - "p90": 652.5760293006897, - "p95": 654.7200083732605, - "p99": 659.0719819068909 - }, - "isolatedSum": { - "p50": 672.3520159721375, - "p90": 683.3600103855133, - "p95": 687.2319877147675, - "p99": 692.0320093631744 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 532.0320129394531, - "p90": 541.8559908866882, - "p95": 545.4720258712769, - "p99": 554.0480017662048 - }, - "combine": { - "p50": 637.9200220108032, - "p90": 645.7599997520447, - "p95": 647.9679942131042, - "p99": 653.6639928817749 - }, - "roundtrip": { - "p50": 1139.6479606628418, - "p90": 1149.888038635254, - "p95": 1154.3359756469727, - "p99": 1160.032033920288 - }, - "isolatedSum": { - "p50": 1169.9520349502563, - "p90": 1187.615990638733, - "p95": 1193.440020084381, - "p99": 1207.7119946479797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 999.4239807128906, - "p90": 1017.2480344772339, - "p95": 1023.8080024719238, - "p99": 1035.040020942688 - }, - "combine": { - "p50": 1168.544054031372, - "p90": 1176.8640279769897, - "p95": 1180.5119514465332, - "p99": 1186.1759424209595 - }, - "roundtrip": { - "p50": 2132.4799060821533, - "p90": 2148.47993850708, - "p95": 2154.9439430236816, - "p99": 2171.5519428253174 - }, - "isolatedSum": { - "p50": 2167.9680347442627, - "p90": 2194.1120624542236, - "p95": 2204.319953918457, - "p99": 2221.2159633636475 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8a96205b", - "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ff7906f8", - "comparisonKey": "6a625438eb544ee8", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:12.079136+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271563151", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271563151", - "createdAt": "2026-06-26T23:48:12.079136+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.61600053310394, - "p90": 117.3119992017746, - "p95": 118.81600320339203, - "p99": 123.74400347471237 - }, - "combine": { - "p50": 105.85600137710571, - "p90": 107.07200318574905, - "p95": 111.16799712181091, - "p99": 113.8560026884079 - }, - "roundtrip": { - "p50": 193.02399456501007, - "p90": 199.52000677585602, - "p95": 200.9280025959015, - "p99": 204.96000349521637 - }, - "isolatedSum": { - "p50": 217.47200191020966, - "p90": 224.38400238752365, - "p95": 229.98400032520294, - "p99": 237.60000616312027 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 143.23200285434723, - "p90": 147.5200057029724, - "p95": 148.6400067806244, - "p99": 152.28800475597382 - }, - "combine": { - "p50": 148.76799285411835, - "p90": 154.4640064239502, - "p95": 155.29599785804749, - "p99": 156.76799416542053 - }, - "roundtrip": { - "p50": 262.33598589897156, - "p90": 266.431987285614, - "p95": 268.12800765037537, - "p99": 271.1679935455322 - }, - "isolatedSum": { - "p50": 291.9999957084656, - "p90": 301.9840121269226, - "p95": 303.9360046386719, - "p99": 309.05599892139435 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 196.25599682331085, - "p90": 201.1840045452118, - "p95": 202.72000133991241, - "p99": 214.84799683094025 - }, - "combine": { - "p50": 230.49600422382355, - "p90": 236.12800240516663, - "p95": 237.2799962759018, - "p99": 241.15200340747833 - }, - "roundtrip": { - "p50": 403.0719995498657, - "p90": 408.3839952945709, - "p95": 410.14400124549866, - "p99": 412.76800632476807 - }, - "isolatedSum": { - "p50": 426.7520010471344, - "p90": 437.3120069503784, - "p95": 439.9999976158142, - "p99": 456.0000002384186 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 301.6960024833679, - "p90": 306.43200874328613, - "p95": 307.9040050506592, - "p99": 312.1280074119568 - }, - "combine": { - "p50": 364.1279935836792, - "p90": 369.4399893283844, - "p95": 372.0319867134094, - "p99": 374.9760091304779 - }, - "roundtrip": { - "p50": 640.064001083374, - "p90": 646.8160152435303, - "p95": 648.5120058059692, - "p99": 653.6960005760193 - }, - "isolatedSum": { - "p50": 665.8239960670471, - "p90": 675.8719980716705, - "p95": 679.9359917640686, - "p99": 687.1040165424347 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 532.1599841117859, - "p90": 540.7040119171143, - "p95": 544.1280007362366, - "p99": 549.2799878120422 - }, - "combine": { - "p50": 637.503981590271, - "p90": 645.5039978027344, - "p95": 647.7760076522827, - "p99": 653.9520025253296 - }, - "roundtrip": { - "p50": 1141.9199705123901, - "p90": 1154.4320583343506, - "p95": 1160.1920127868652, - "p99": 1180.9600591659546 - }, - "isolatedSum": { - "p50": 1169.6639657020569, - "p90": 1186.2080097198486, - "p95": 1191.9040083885193, - "p99": 1203.2319903373718 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 993.9200282096863, - "p90": 1017.2799825668335, - "p95": 1023.4240293502808, - "p99": 1036.8319749832153 - }, - "combine": { - "p50": 1165.0559902191162, - "p90": 1175.3599643707275, - "p95": 1177.9520511627197, - "p99": 1283.2640409469604 - }, - "roundtrip": { - "p50": 2117.6319122314453, - "p90": 2134.848117828369, - "p95": 2139.6799087524414, - "p99": 2151.5839099884033 - }, - "isolatedSum": { - "p50": 2158.9760184288025, - "p90": 2192.639946937561, - "p95": 2201.3760805130005, - "p99": 2320.096015930176 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-27ce5700", - "identity": "h100|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h100_42947950", - "comparisonKey": "4106e8f613d56fa1", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:16:10.577708+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287499275", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287499275", - "createdAt": "2026-06-27T11:16:10.577708+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.07199639081955, - "p90": 116.38399958610535, - "p95": 117.8240031003952, - "p99": 120.70400267839432 - }, - "combine": { - "p50": 106.36799782514572, - "p90": 107.64800012111664, - "p95": 111.77600175142288, - "p99": 114.656001329422 - }, - "roundtrip": { - "p50": 195.10400295257568, - "p90": 200.6399929523468, - "p95": 202.2079974412918, - "p99": 206.68800175189972 - }, - "isolatedSum": { - "p50": 217.43999421596527, - "p90": 224.03199970722198, - "p95": 229.60000485181808, - "p99": 235.36000400781631 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 145.34400403499603, - "p90": 151.10400319099426, - "p95": 152.25599706172943, - "p99": 155.29599785804749 - }, - "combine": { - "p50": 149.63200688362122, - "p90": 155.16799688339233, - "p95": 155.71199357509613, - "p99": 156.76799416542053 - }, - "roundtrip": { - "p50": 268.12800765037537, - "p90": 272.99201488494873, - "p95": 274.6880054473877, - "p99": 278.78400683403015 - }, - "isolatedSum": { - "p50": 294.97601091861725, - "p90": 306.2720000743866, - "p95": 307.96799063682556, - "p99": 312.063992023468 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 199.42399859428406, - "p90": 204.41600680351257, - "p95": 207.87200331687927, - "p99": 219.93599832057953 - }, - "combine": { - "p50": 228.99200022220612, - "p90": 234.6239984035492, - "p95": 236.06400191783905, - "p99": 237.69600689411163 - }, - "roundtrip": { - "p50": 400.89601278305054, - "p90": 405.11998534202576, - "p95": 406.49598836898804, - "p99": 409.88799929618835 - }, - "isolatedSum": { - "p50": 428.4159988164902, - "p90": 439.04000520706177, - "p95": 443.9360052347183, - "p99": 457.63200521469116 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 304.86398935317993, - "p90": 336.38399839401245, - "p95": 347.51999378204346, - "p99": 513.0239725112915 - }, - "combine": { - "p50": 366.33598804473877, - "p90": 372.0960021018982, - "p95": 374.91199374198914, - "p99": 474.7520089149475 - }, - "roundtrip": { - "p50": 644.0640091896057, - "p90": 650.9439945220947, - "p95": 653.823971748352, - "p99": 658.5919857025146 - }, - "isolatedSum": { - "p50": 671.1999773979187, - "p90": 708.4800004959106, - "p95": 722.4319875240326, - "p99": 987.775981426239 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 524.5440006256104, - "p90": 540.8959984779358, - "p95": 543.7120199203491, - "p99": 551.6800284385681 - }, - "combine": { - "p50": 630.8159828186035, - "p90": 639.8720145225525, - "p95": 642.4000263214111, - "p99": 648.1599807739258 - }, - "roundtrip": { - "p50": 1125.1519918441772, - "p90": 1138.8800144195557, - "p95": 1142.2719955444336, - "p99": 1151.6799926757812 - }, - "isolatedSum": { - "p50": 1155.3599834442139, - "p90": 1180.7680130004883, - "p95": 1186.1120462417603, - "p99": 1199.840009212494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1012.2560262680054, - "p90": 1040.9280061721802, - "p95": 1049.504041671753, - "p99": 1060.0320100784302 - }, - "combine": { - "p50": 1154.3359756469727, - "p90": 1163.2000207901, - "p95": 1166.8479442596436, - "p99": 1173.7279891967773 - }, - "roundtrip": { - "p50": 2117.1839237213135, - "p90": 2141.8559551239014, - "p95": 2147.104024887085, - "p99": 2157.1199893951416 - }, - "isolatedSum": { - "p50": 2166.592001914978, - "p90": 2204.1280269622803, - "p95": 2216.3519859313965, - "p99": 2233.7599992752075 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-32c90de8", - "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h100_ff7906f8", - "comparisonKey": "db866d0065c2a509", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:05.825406+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271671786", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271671786", - "createdAt": "2026-06-26T23:51:05.825406+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 112.96000331640244, - "p90": 118.78400295972824, - "p95": 120.28799951076508, - "p99": 130.40000200271606 - }, - "combine": { - "p50": 106.1440035700798, - "p90": 109.15199667215347, - "p95": 110.30399799346924, - "p99": 114.49600011110306 - }, - "roundtrip": { - "p50": 196.99199497699738, - "p90": 201.34399831295013, - "p95": 202.94399559497833, - "p99": 206.04799687862396 - }, - "isolatedSum": { - "p50": 219.10400688648224, - "p90": 227.9359996318817, - "p95": 230.5919975042343, - "p99": 244.89600211381912 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 149.21599626541138, - "p90": 155.03999590873718, - "p95": 157.05600380897522, - "p99": 159.4880074262619 - }, - "combine": { - "p50": 153.50399911403656, - "p90": 158.62399339675903, - "p95": 160.25599837303162, - "p99": 165.15199840068817 - }, - "roundtrip": { - "p50": 270.3999876976013, - "p90": 284.0000092983246, - "p95": 285.69599986076355, - "p99": 288.9600098133087 - }, - "isolatedSum": { - "p50": 302.71999537944794, - "p90": 313.6639893054962, - "p95": 317.31200218200684, - "p99": 324.6400058269501 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 201.92000269889832, - "p90": 212.5760018825531, - "p95": 214.59199488162994, - "p99": 217.8560048341751 - }, - "combine": { - "p50": 229.5999974012375, - "p90": 237.92000114917755, - "p95": 241.2479966878891, - "p99": 245.2159970998764 - }, - "roundtrip": { - "p50": 404.2240083217621, - "p90": 417.5359904766083, - "p95": 419.3919897079468, - "p99": 424.1600036621094 - }, - "isolatedSum": { - "p50": 431.5200001001358, - "p90": 450.49600303173065, - "p95": 455.83999156951904, - "p99": 463.0720019340515 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 304.83201146125793, - "p90": 315.39198756217957, - "p95": 317.6319897174835, - "p99": 320.51199674606323 - }, - "combine": { - "p50": 367.48799681663513, - "p90": 376.96000933647156, - "p95": 381.9200098514557, - "p99": 392.192006111145 - }, - "roundtrip": { - "p50": 644.7039842605591, - "p90": 655.456006526947, - "p95": 677.951991558075, - "p99": 919.8399782180786 - }, - "isolatedSum": { - "p50": 672.3200082778931, - "p90": 692.3519968986511, - "p95": 699.5519995689392, - "p99": 712.7040028572083 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 521.2799906730652, - "p90": 536.4800095558167, - "p95": 540.224015712738, - "p99": 549.3119955062866 - }, - "combine": { - "p50": 632.4160099029541, - "p90": 640.7679915428162, - "p95": 643.3600187301636, - "p99": 651.4559984207153 - }, - "roundtrip": { - "p50": 1126.431941986084, - "p90": 1137.8240585327148, - "p95": 1141.5679454803467, - "p99": 1157.6000452041626 - }, - "isolatedSum": { - "p50": 1153.6960005760193, - "p90": 1177.2480010986328, - "p95": 1183.5840344429016, - "p99": 1200.767993927002 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1011.2960338592529, - "p90": 1036.895990371704, - "p95": 1044.3840026855469, - "p99": 1057.088017463684 - }, - "combine": { - "p50": 1154.8160314559937, - "p90": 1163.9360189437866, - "p95": 1166.5279865264893, - "p99": 1172.160029411316 - }, - "roundtrip": { - "p50": 2122.7200031280518, - "p90": 2144.9921131134033, - "p95": 2150.559902191162, - "p99": 2167.6158905029297 - }, - "isolatedSum": { - "p50": 2166.1120653152466, - "p90": 2200.8320093154907, - "p95": 2210.911989212036, - "p99": 2229.248046875 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-3c52549e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", - "colorKey": "h100_16047c28", - "comparisonKey": "987d0ef30063bb5c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:36.290170+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_11", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271938768", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271938768", - "createdAt": "2026-06-26T23:59:36.290170+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 127.23200023174286, - "p90": 131.52000308036804, - "p95": 133.08799266815186, - "p99": 136.3839954137802 - }, - "combine": { - "p50": 126.11199915409088, - "p90": 130.62399625778198, - "p95": 131.48799538612366, - "p99": 133.98399949073792 - }, - "roundtrip": { - "p50": 233.43999683856964, - "p90": 236.76800727844238, - "p95": 237.40799725055695, - "p99": 240.4160052537918 - }, - "isolatedSum": { - "p50": 253.34399938583374, - "p90": 262.14399933815, - "p95": 264.5759880542755, - "p99": 270.3679949045181 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 180.7039976119995, - "p90": 191.3280040025711, - "p95": 193.08799505233765, - "p99": 197.28000462055206 - }, - "combine": { - "p50": 183.26400220394135, - "p90": 190.97599387168884, - "p95": 192.3840045928955, - "p99": 197.66399264335632 - }, - "roundtrip": { - "p50": 332.15999603271484, - "p90": 344.35200691223145, - "p95": 346.3680148124695, - "p99": 348.83201122283936 - }, - "isolatedSum": { - "p50": 363.96799981594086, - "p90": 382.30399787425995, - "p95": 385.47199964523315, - "p99": 394.9439972639084 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 272.41599559783936, - "p90": 284.0000092983246, - "p95": 286.46400570869446, - "p99": 290.1439964771271 - }, - "combine": { - "p50": 276.2239873409271, - "p90": 285.0880026817322, - "p95": 286.8799865245819, - "p99": 294.624000787735 - }, - "roundtrip": { - "p50": 519.648015499115, - "p90": 533.2159996032715, - "p95": 535.1999998092651, - "p99": 538.0480289459229 - }, - "isolatedSum": { - "p50": 548.6399829387665, - "p90": 569.0880119800568, - "p95": 573.3439922332764, - "p99": 584.7679972648621 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 450.3679871559143, - "p90": 462.14398741722107, - "p95": 464.2559885978699, - "p99": 469.34399008750916 - }, - "combine": { - "p50": 469.11999583244324, - "p90": 477.53599286079407, - "p95": 479.0720045566559, - "p99": 484.0959906578064 - }, - "roundtrip": { - "p50": 892.3839926719666, - "p90": 904.3520092964172, - "p95": 909.0560078620911, - "p99": 1079.967975616455 - }, - "isolatedSum": { - "p50": 919.4879829883575, - "p90": 939.6799802780151, - "p95": 943.3279931545258, - "p99": 953.4399807453156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 810.7200264930725, - "p90": 828.607976436615, - "p95": 831.3599824905396, - "p99": 837.2480273246765 - }, - "combine": { - "p50": 854.8160195350647, - "p90": 863.6159896850586, - "p95": 865.9840226173401, - "p99": 870.3359961509705 - }, - "roundtrip": { - "p50": 1635.583996772766, - "p90": 1645.0239419937134, - "p95": 1648.095965385437, - "p99": 1656.7679643630981 - }, - "isolatedSum": { - "p50": 1665.5360460281372, - "p90": 1692.2239661216736, - "p95": 1697.3440051078796, - "p99": 1707.584023475647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1546.623945236206, - "p90": 1554.0159940719604, - "p95": 1556.3839673995972, - "p99": 1562.559962272644 - }, - "combine": { - "p50": 1599.552035331726, - "p90": 1609.2480421066284, - "p95": 1612.4800443649292, - "p99": 1621.6000318527222 - }, - "roundtrip": { - "p50": 3122.015953063965, - "p90": 3132.4799060821533, - "p95": 3136.352062225342, - "p99": 3144.4480419158936 - }, - "isolatedSum": { - "p50": 3146.175980567932, - "p90": 3163.264036178589, - "p95": 3168.8640117645264, - "p99": 3184.159994125366 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-05271e8a", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", - "colorKey": "h100_16047c28", - "comparisonKey": "987d0ef30063bb5c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:32.762651+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "9e6ac678a09f7f8", - "workloadId": "set:3:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271791847", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271791847", - "createdAt": "2026-06-26T23:55:32.762651+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 141.9840008020401, - "p90": 148.15999567508698, - "p95": 150.43200552463531, - "p99": 159.71200168132782 - }, - "combine": { - "p50": 131.77600502967834, - "p90": 138.7840062379837, - "p95": 139.80799913406372, - "p99": 147.07200229167938 - }, - "roundtrip": { - "p50": 243.1039959192276, - "p90": 250.71999430656433, - "p95": 252.03201174736023, - "p99": 257.9840123653412 - }, - "isolatedSum": { - "p50": 273.76000583171844, - "p90": 286.9440019130707, - "p95": 290.24000465869904, - "p99": 306.7840039730072 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 282.20799565315247, - "p90": 291.04000329971313, - "p95": 293.3439910411835, - "p99": 299.3920147418976 - }, - "combine": { - "p50": 282.71999955177307, - "p90": 287.4560058116913, - "p95": 288.9600098133087, - "p99": 297.5040078163147 - }, - "roundtrip": { - "p50": 530.239999294281, - "p90": 536.9600057601929, - "p95": 540.0320291519165, - "p99": 549.3119955062866 - }, - "isolatedSum": { - "p50": 564.9279952049255, - "p90": 578.4960091114044, - "p95": 582.3040008544922, - "p99": 596.8960225582123 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 815.7439827919006, - "p90": 825.2800107002258, - "p95": 828.5760283470154, - "p99": 835.0080251693726 - }, - "combine": { - "p50": 857.9840064048767, - "p90": 866.27197265625, - "p95": 869.6320056915283, - "p99": 877.8560161590576 - }, - "roundtrip": { - "p50": 1642.5280570983887, - "p90": 1654.5920372009277, - "p95": 1658.944010734558, - "p99": 1692.7039623260498 - }, - "isolatedSum": { - "p50": 1673.7279891967773, - "p90": 1691.5519833564758, - "p95": 1698.2080340385437, - "p99": 1712.8640413284302 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-06b4b084", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", - "colorKey": "h100_0c515f8b", - "comparisonKey": "e2c5b47e428e10b6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:50.950252+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "7aa44c7b86748b9", - "workloadId": "set:3:388ff74baef05c72", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271798809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271798809", - "createdAt": "2026-06-26T23:54:50.950252+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 102.52799838781357, - "p90": 106.52799904346466, - "p95": 108.31999778747559, - "p99": 112.44799941778183 - }, - "combine": { - "p50": 81.31200075149536, - "p90": 88.128000497818, - "p95": 88.48000317811966, - "p99": 90.4960036277771 - }, - "roundtrip": { - "p50": 155.32800555229187, - "p90": 160.92799603939056, - "p95": 161.79199516773224, - "p99": 165.40800034999847 - }, - "isolatedSum": { - "p50": 183.83999913930893, - "p90": 194.65599954128265, - "p95": 196.80000096559525, - "p99": 202.94400304555893 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 124.95999783277512, - "p90": 130.36799430847168, - "p95": 131.9040060043335, - "p99": 142.17600226402283 - }, - "combine": { - "p50": 128.7039965391159, - "p90": 130.43199479579926, - "p95": 136.80000603199005, - "p99": 147.67999947071075 - }, - "roundtrip": { - "p50": 216.25599265098572, - "p90": 220.57600319385529, - "p95": 223.4880030155182, - "p99": 267.8399980068207 - }, - "isolatedSum": { - "p50": 253.66399437189102, - "p90": 260.79998910427094, - "p95": 268.70401203632355, - "p99": 289.8560017347336 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 1, - "recvTokensMax": 512, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 205.6639939546585, - "p90": 211.13599836826324, - "p95": 216.48000180721283, - "p99": 269.1200077533722 - }, - "combine": { - "p50": 295.80798745155334, - "p90": 300.54399371147156, - "p95": 305.2160143852234, - "p99": 337.3439908027649 - }, - "roundtrip": { - "p50": 464.4800126552582, - "p90": 471.45599126815796, - "p95": 474.047988653183, - "p99": 503.35997343063354 - }, - "isolatedSum": { - "p50": 501.47198140621185, - "p90": 511.6799920797348, - "p95": 521.6960161924362, - "p99": 606.4639985561371 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 1, - "recvTokensMax": 2048, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4058f6f5", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", - "colorKey": "h100_c0c0ad86", - "comparisonKey": "252e0af9287be53d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:35.979250+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · balanced+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "df54a9510825f71", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271942138", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271942138", - "createdAt": "2026-06-26T23:59:35.979250+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 94.14400160312653, - "p90": 98.01600128412247, - "p95": 99.74399954080582, - "p99": 103.29599678516388 - }, - "combine": { - "p50": 83.03999900817871, - "p90": 88.22400122880936, - "p95": 89.15200084447861, - "p99": 90.81599861383438 - }, - "roundtrip": { - "p50": 157.79200196266174, - "p90": 161.9199961423874, - "p95": 163.5199934244156, - "p99": 167.67999529838562 - }, - "isolatedSum": { - "p50": 177.18400061130524, - "p90": 186.24000251293182, - "p95": 188.89600038528442, - "p99": 194.11199539899826 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 122.40000069141388, - "p90": 129.95199859142303, - "p95": 143.10400187969208, - "p99": 173.95199835300446 - }, - "combine": { - "p50": 104.41599786281586, - "p90": 106.65600001811981, - "p95": 120.51200121641159, - "p99": 144.28800344467163 - }, - "roundtrip": { - "p50": 198.43199849128723, - "p90": 202.36800611019135, - "p95": 205.1839977502823, - "p99": 235.32800376415253 - }, - "isolatedSum": { - "p50": 226.81599855422974, - "p90": 236.60799860954285, - "p95": 263.61600309610367, - "p99": 318.2400017976761 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 2, - "recvTokensMax": 768, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 150.43200552463531, - "p90": 154.7199934720993, - "p95": 158.4320068359375, - "p99": 386.1120045185089 - }, - "combine": { - "p50": 141.15199446678162, - "p90": 145.91999351978302, - "p95": 146.55999839305878, - "p99": 147.5200057029724 - }, - "roundtrip": { - "p50": 266.1440074443817, - "p90": 274.9119997024536, - "p95": 278.3679962158203, - "p99": 286.9440019130707 - }, - "isolatedSum": { - "p50": 291.58399999141693, - "p90": 300.6399869918823, - "p95": 304.9920052289963, - "p99": 533.6320102214813 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 2, - "recvTokensMax": 1536, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 207.61600136756897, - "p90": 213.44000101089478, - "p95": 217.98400580883026, - "p99": 245.5040067434311 - }, - "combine": { - "p50": 219.93599832057953, - "p90": 225.0880002975464, - "p95": 227.2000014781952, - "p99": 244.86400187015533 - }, - "roundtrip": { - "p50": 405.023992061615, - "p90": 410.0480079650879, - "p95": 412.31998801231384, - "p99": 437.6640021800995 - }, - "isolatedSum": { - "p50": 427.5519996881485, - "p90": 438.52800130844116, - "p95": 445.18400728702545, - "p99": 490.3680086135864 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 2, - "recvTokensMax": 3072, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 329.47200536727905, - "p90": 336.60799264907837, - "p95": 339.04001116752625, - "p99": 460.4159891605377 - }, - "combine": { - "p50": 368.3199882507324, - "p90": 375.2639889717102, - "p95": 377.6960074901581, - "p99": 383.07198882102966 - }, - "roundtrip": { - "p50": 670.0159907341003, - "p90": 675.8400201797485, - "p95": 678.3360242843628, - "p99": 682.3359727859497 - }, - "isolatedSum": { - "p50": 697.7919936180115, - "p90": 711.8719816207886, - "p95": 716.7360186576843, - "p99": 843.4879779815674 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 2, - "recvTokensMax": 6144, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 580.8960199356079, - "p90": 590.1119709014893, - "p95": 592.6079750061035, - "p99": 597.5040197372437 - }, - "combine": { - "p50": 647.9039788246155, - "p90": 655.0719738006592, - "p95": 657.2480201721191, - "p99": 660.863995552063 - }, - "roundtrip": { - "p50": 1207.4559926986694, - "p90": 1217.087984085083, - "p95": 1224.0639925003052, - "p99": 1241.312026977539 - }, - "isolatedSum": { - "p50": 1228.7999987602234, - "p90": 1245.1839447021484, - "p95": 1249.8559951782227, - "p99": 1258.3680152893066 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 2, - "recvTokensMax": 12288, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b89c63a5", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", - "colorKey": "h100_b654f9b2", - "comparisonKey": "37db9a5137981152", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:36.358305+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "38fd0bcf7109c32", - "workloadId": "set:3:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271820121", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271820121", - "createdAt": "2026-06-26T23:55:36.358305+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 123.61600250005722, - "p90": 127.48800218105316, - "p95": 131.1040073633194, - "p99": 136.19199395179749 - }, - "combine": { - "p50": 116.95999652147293, - "p90": 122.46400117874146, - "p95": 124.95999783277512, - "p99": 131.26400113105774 - }, - "roundtrip": { - "p50": 217.72800385951996, - "p90": 224.89599883556366, - "p95": 229.24800217151642, - "p99": 245.37600576877594 - }, - "isolatedSum": { - "p50": 240.57599902153015, - "p90": 249.95200335979462, - "p95": 256.0640051960945, - "p99": 267.4559950828552 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 238.91200125217438, - "p90": 257.24801421165466, - "p95": 259.2960000038147, - "p99": 261.9520127773285 - }, - "combine": { - "p50": 271.93599939346313, - "p90": 282.1759879589081, - "p95": 284.8320007324219, - "p99": 288.5119915008545 - }, - "roundtrip": { - "p50": 486.04801297187805, - "p90": 500.8959770202637, - "p95": 503.55201959609985, - "p99": 509.2160105705261 - }, - "isolatedSum": { - "p50": 510.8480006456375, - "p90": 539.4240021705627, - "p95": 544.1280007362366, - "p99": 550.464004278183 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 718.3039784431458, - "p90": 732.3840260505676, - "p95": 736.3520264625549, - "p99": 740.4159903526306 - }, - "combine": { - "p50": 829.9520015716553, - "p90": 838.047981262207, - "p95": 840.2559757232666, - "p99": 846.6879725456238 - }, - "roundtrip": { - "p50": 1516.2559747695923, - "p90": 1525.3759622573853, - "p95": 1528.223991394043, - "p99": 1535.2319478988647 - }, - "isolatedSum": { - "p50": 1548.255980014801, - "p90": 1570.4320073127747, - "p95": 1576.6080021858215, - "p99": 1587.1039628982544 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fa73d33e", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", - "colorKey": "h100_b654f9b2", - "comparisonKey": "37db9a5137981152", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:55.460957+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_13", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "bfbb64a166e9f1c", - "workloadId": "set:6:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272012738", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272012738", - "createdAt": "2026-06-27T00:01:55.460957+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 129.08799946308136, - "p90": 131.42399489879608, - "p95": 132.03200697898865, - "p99": 135.903999209404 - }, - "combine": { - "p50": 119.87199634313583, - "p90": 121.98399752378464, - "p95": 122.36800044775009, - "p99": 125.72799623012543 - }, - "roundtrip": { - "p50": 219.200000166893, - "p90": 223.80800545215607, - "p95": 224.7679978609085, - "p99": 228.0000001192093 - }, - "isolatedSum": { - "p50": 248.9599958062172, - "p90": 253.40799242258072, - "p95": 254.40000742673874, - "p99": 261.6319954395294 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 163.96799683570862, - "p90": 168.19199919700623, - "p95": 170.43200135231018, - "p99": 173.12000691890717 - }, - "combine": { - "p50": 171.55200242996216, - "p90": 176.83200538158417, - "p95": 178.3680021762848, - "p99": 180.60800433158875 - }, - "roundtrip": { - "p50": 306.7840039730072, - "p90": 310.94399094581604, - "p95": 312.3199939727783, - "p99": 314.7839903831482 - }, - "isolatedSum": { - "p50": 335.5199992656708, - "p90": 345.0240045785904, - "p95": 348.80000352859497, - "p99": 353.7280112504959 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156090368, - "combineLogicalBytes": 156090368, - "fanoutMean": 5.31640625, - "recvTokensMax": 2048, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 237.34399676322937, - "p90": 242.11199581623077, - "p95": 244.1920042037964, - "p99": 248.28800559043884 - }, - "combine": { - "p50": 268.22400093078613, - "p90": 273.53599667549133, - "p95": 274.84801411628723, - "p99": 277.69601345062256 - }, - "roundtrip": { - "p50": 482.7519953250885, - "p90": 488.44799399375916, - "p95": 490.4319941997528, - "p99": 495.07200717926025 - }, - "isolatedSum": { - "p50": 505.5679976940155, - "p90": 515.6479924917221, - "p95": 519.0400183200836, - "p99": 525.9840190410614 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 391.61598682403564, - "p90": 397.5679874420166, - "p95": 399.9040126800537, - "p99": 407.1039855480194 - }, - "combine": { - "p50": 455.6480050086975, - "p90": 461.5359902381897, - "p95": 463.0720019340515, - "p99": 466.5600061416626 - }, - "roundtrip": { - "p50": 823.2960104942322, - "p90": 829.5040130615234, - "p95": 831.5839767456055, - "p99": 835.4560136795044 - }, - "isolatedSum": { - "p50": 847.2639918327332, - "p90": 859.1039776802063, - "p95": 862.9760146141052, - "p99": 873.663991689682 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620648448, - "combineLogicalBytes": 620648448, - "fanoutMean": 5.2847900390625, - "recvTokensMax": 8192, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 720.6720113754272, - "p90": 733.6320281028748, - "p95": 737.5680208206177, - "p99": 744.9280023574829 - }, - "combine": { - "p50": 825.7279992103577, - "p90": 834.559977054596, - "p95": 837.3759984970093, - "p99": 841.2479758262634 - }, - "roundtrip": { - "p50": 1514.240026473999, - "p90": 1523.7120389938354, - "p95": 1526.6239643096924, - "p99": 1534.3999862670898 - }, - "isolatedSum": { - "p50": 1546.400010585785, - "p90": 1568.1920051574707, - "p95": 1574.944019317627, - "p99": 1586.1759781837463 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1379.1359663009644, - "p90": 1390.1439905166626, - "p95": 1393.280029296875, - "p99": 1400.480031967163 - }, - "combine": { - "p50": 1540.5759811401367, - "p90": 1547.4879741668701, - "p95": 1549.7599840164185, - "p99": 1553.1519651412964 - }, - "roundtrip": { - "p50": 2893.3119773864746, - "p90": 2902.30393409729, - "p95": 2905.695915222168, - "p99": 2912.480115890503 - }, - "isolatedSum": { - "p50": 2919.711947441101, - "p90": 2937.6319646835327, - "p95": 2943.0400133132935, - "p99": 2953.6319971084595 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2484242432, - "combineLogicalBytes": 2484242432, - "fanoutMean": 5.288299560546875, - "recvTokensMax": 32768, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e91dfe75", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", - "colorKey": "h100_456a963c", - "comparisonKey": "54b53207b090a644", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:57.841646+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "29ae5ace13636f8", - "workloadId": "set:6:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.8466796875, - "eplbImbalanceAfter": 1.0002700343276514, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272016505", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272016505", - "createdAt": "2026-06-27T00:01:57.841646+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 109.69600081443787, - "p90": 113.98400366306305, - "p95": 115.77600240707397, - "p99": 122.43200093507767 - }, - "combine": { - "p50": 105.50399869680405, - "p90": 111.10399663448334, - "p95": 112.31999844312668, - "p99": 114.27199840545654 - }, - "roundtrip": { - "p50": 196.6720074415207, - "p90": 203.2960057258606, - "p95": 204.0960043668747, - "p99": 207.64799416065216 - }, - "isolatedSum": { - "p50": 215.1999995112419, - "p90": 225.0880002975464, - "p95": 228.09600085020065, - "p99": 236.7039993405342 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77701120, - "combineLogicalBytes": 77701120, - "fanoutMean": 5.29296875, - "recvTokensMax": 697, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 144.44799721240997, - "p90": 149.63200688362122, - "p95": 151.2320041656494, - "p99": 155.83999454975128 - }, - "combine": { - "p50": 152.0639955997467, - "p90": 153.60000729560852, - "p95": 154.4640064239502, - "p99": 158.52800011634827 - }, - "roundtrip": { - "p50": 265.0560140609741, - "p90": 268.92799139022827, - "p95": 270.687997341156, - "p99": 273.21600914001465 - }, - "isolatedSum": { - "p50": 296.5119928121567, - "p90": 303.23201417922974, - "p95": 305.6960105895996, - "p99": 314.36799466609955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155187200, - "combineLogicalBytes": 155187200, - "fanoutMean": 5.28564453125, - "recvTokensMax": 1372, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 199.64799284934998, - "p90": 203.45599949359894, - "p95": 204.79999482631683, - "p99": 208.76799523830414 - }, - "combine": { - "p50": 228.5120040178299, - "p90": 234.23999547958374, - "p95": 235.167995095253, - "p99": 236.95999383926392 - }, - "roundtrip": { - "p50": 403.80799770355225, - "p90": 408.35198760032654, - "p95": 410.0799858570099, - "p99": 413.88800740242004 - }, - "isolatedSum": { - "p50": 428.15999686717987, - "p90": 437.6959949731827, - "p95": 439.9679899215698, - "p99": 445.72798907756805 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311162880, - "combineLogicalBytes": 311162880, - "fanoutMean": 5.299072265625, - "recvTokensMax": 2761, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 305.4080009460449, - "p90": 310.016006231308, - "p95": 311.7760121822357, - "p99": 316.76799058914185 - }, - "combine": { - "p50": 367.19998717308044, - "p90": 374.0159869194031, - "p95": 375.5199909210205, - "p99": 379.2960047721863 - }, - "roundtrip": { - "p50": 649.1199731826782, - "p90": 655.6479930877686, - "p95": 658.4640145301819, - "p99": 661.9840264320374 - }, - "isolatedSum": { - "p50": 672.6079881191254, - "p90": 684.0319931507111, - "p95": 687.2960031032562, - "p99": 696.0639953613281 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619974656, - "combineLogicalBytes": 619974656, - "fanoutMean": 5.279052734375, - "recvTokensMax": 5481, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 528.8640260696411, - "p90": 539.3919944763184, - "p95": 543.8079833984375, - "p99": 805.9520125389099 - }, - "combine": { - "p50": 633.184015750885, - "p90": 640.9919857978821, - "p95": 643.9039707183838, - "p99": 648.5440135002136 - }, - "roundtrip": { - "p50": 1132.032036781311, - "p90": 1143.8720226287842, - "p95": 1147.3920345306396, - "p99": 1154.8160314559937 - }, - "isolatedSum": { - "p50": 1162.0480418205261, - "p90": 1180.3839802742004, - "p95": 1187.7119541168213, - "p99": 1454.4960260391235 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240020992, - "combineLogicalBytes": 1240020992, - "fanoutMean": 5.27935791015625, - "recvTokensMax": 10883, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 985.8880043029785, - "p90": 1005.5680274963379, - "p95": 1010.9119415283203, - "p99": 1020.5440521240234 - }, - "combine": { - "p50": 1144.1919803619385, - "p90": 1153.92005443573, - "p95": 1157.439947128296, - "p99": 1163.6799573898315 - }, - "roundtrip": { - "p50": 2094.464063644409, - "p90": 2109.8880767822266, - "p95": 2115.295886993408, - "p99": 2124.5760917663574 - }, - "isolatedSum": { - "p50": 2130.079984664917, - "p90": 2159.488081932068, - "p95": 2168.351888656616, - "p99": 2184.224009513855 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480414720, - "combineLogicalBytes": 2480414720, - "fanoutMean": 5.2801513671875, - "recvTokensMax": 21702, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f8095d72", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", - "colorKey": "h100_fb5b86de", - "comparisonKey": "cd6da73322e03923", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:17.404659+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · uniform+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2225dbbdab9bf2d", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.006072998046875, - "eplbImbalanceAfter": 1.0000152587890625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271927356", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271927356", - "createdAt": "2026-06-26T23:59:17.404659+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 113.02399635314941, - "p90": 118.40000003576279, - "p95": 120.12799829244614, - "p99": 126.0479986667633 - }, - "combine": { - "p50": 105.66399991512299, - "p90": 108.89600217342377, - "p95": 112.06399649381638, - "p99": 115.9679964184761 - }, - "roundtrip": { - "p50": 195.8719938993454, - "p90": 201.24800503253937, - "p95": 202.62399315834045, - "p99": 207.39200711250305 - }, - "isolatedSum": { - "p50": 218.6879962682724, - "p90": 227.29600220918655, - "p95": 232.1919947862625, - "p99": 242.0159950852394 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77041664, - "combineLogicalBytes": 77041664, - "fanoutMean": 5.248046875, - "recvTokensMax": 686, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 146.97599411010742, - "p90": 150.91200172901154, - "p95": 151.7760008573532, - "p99": 155.39200603961945 - }, - "combine": { - "p50": 148.3519971370697, - "p90": 153.82400155067444, - "p95": 154.4959992170334, - "p99": 156.67200088500977 - }, - "roundtrip": { - "p50": 265.9200131893158, - "p90": 270.9760069847107, - "p95": 273.1199860572815, - "p99": 278.4000039100647 - }, - "isolatedSum": { - "p50": 295.3279912471771, - "p90": 304.736003279686, - "p95": 306.2720000743866, - "p99": 312.0640069246292 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154542080, - "combineLogicalBytes": 154542080, - "fanoutMean": 5.263671875, - "recvTokensMax": 1365, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 200.32000541687012, - "p90": 203.3279985189438, - "p95": 204.57600057125092, - "p99": 208.28799903392792 - }, - "combine": { - "p50": 229.8559993505478, - "p90": 235.4239970445633, - "p95": 236.4480048418045, - "p99": 237.98400163650513 - }, - "roundtrip": { - "p50": 402.46400237083435, - "p90": 407.9360067844391, - "p95": 410.0480079650879, - "p99": 413.1839871406555 - }, - "isolatedSum": { - "p50": 430.1760047674179, - "p90": 438.7519955635071, - "p95": 441.0240054130554, - "p99": 446.27200067043304 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310589440, - "combineLogicalBytes": 310589440, - "fanoutMean": 5.289306640625, - "recvTokensMax": 2746, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 303.51999402046204, - "p90": 308.6720108985901, - "p95": 310.2720081806183, - "p99": 315.8400058746338 - }, - "combine": { - "p50": 366.3040101528168, - "p90": 374.33600425720215, - "p95": 375.99998712539673, - "p99": 380.0320029258728 - }, - "roundtrip": { - "p50": 643.9679861068726, - "p90": 650.9119868278503, - "p95": 653.4720063209534, - "p99": 656.9280028343201 - }, - "isolatedSum": { - "p50": 669.8240041732788, - "p90": 683.0080151557922, - "p95": 686.271995306015, - "p99": 695.8720088005066 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619171840, - "combineLogicalBytes": 619171840, - "fanoutMean": 5.272216796875, - "recvTokensMax": 5467, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 526.6559720039368, - "p90": 533.8879823684692, - "p95": 536.0000133514404, - "p99": 542.4000024795532 - }, - "combine": { - "p50": 628.607988357544, - "p90": 636.5759968757629, - "p95": 639.3600106239319, - "p99": 643.455982208252 - }, - "roundtrip": { - "p50": 1128.5760402679443, - "p90": 1137.984037399292, - "p95": 1141.5679454803467, - "p99": 1146.1759805679321 - }, - "isolatedSum": { - "p50": 1155.2639603614807, - "p90": 1170.4639792442322, - "p95": 1175.3600239753723, - "p99": 1185.8559846878052 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1238945792, - "combineLogicalBytes": 1238945792, - "fanoutMean": 5.2747802734375, - "recvTokensMax": 10913, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1018.4320211410522, - "p90": 1046.496033668518, - "p95": 1056.1920404434204, - "p99": 1073.5039710998535 - }, - "combine": { - "p50": 1148.5120058059692, - "p90": 1156.3199758529663, - "p95": 1158.784031867981, - "p99": 1164.031982421875 - }, - "roundtrip": { - "p50": 2113.408088684082, - "p90": 2138.5281085968018, - "p95": 2143.807888031006, - "p99": 2155.679941177368 - }, - "isolatedSum": { - "p50": 2166.9440269470215, - "p90": 2202.8160095214844, - "p95": 2214.9760723114014, - "p99": 2237.5359535217285 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481747968, - "combineLogicalBytes": 2481747968, - "fanoutMean": 5.282989501953125, - "recvTokensMax": 21789, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-ff5c49bb", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", - "colorKey": "h100_aa268d13", - "comparisonKey": "927a6d7282665742", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:17.079494+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_02", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "4caecd33bedf786", - "workloadId": "set:3:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271806404", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271806404", - "createdAt": "2026-06-26T23:55:17.079494+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 125.37600100040436, - "p90": 131.9040060043335, - "p95": 132.7359974384308, - "p99": 137.08800077438354 - }, - "combine": { - "p50": 113.0559965968132, - "p90": 114.04799669981003, - "p95": 114.56000059843063, - "p99": 120.67200243473053 - }, - "roundtrip": { - "p50": 216.2880003452301, - "p90": 219.67999637126923, - "p95": 221.15199267864227, - "p99": 226.17599368095398 - }, - "isolatedSum": { - "p50": 238.43199759721756, - "p90": 245.95200270414352, - "p95": 247.29599803686142, - "p99": 257.7600032091141 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 239.42400515079498, - "p90": 255.5519938468933, - "p95": 258.14399123191833, - "p99": 261.9200050830841 - }, - "combine": { - "p50": 267.07199215888977, - "p90": 276.63999795913696, - "p95": 277.536004781723, - "p99": 279.90400791168213 - }, - "roundtrip": { - "p50": 476.22400522232056, - "p90": 492.3520088195801, - "p95": 495.03999948501587, - "p99": 499.55201148986816 - }, - "isolatedSum": { - "p50": 506.49599730968475, - "p90": 532.1919918060303, - "p95": 535.6799960136414, - "p99": 541.8240129947662 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 677.183985710144, - "p90": 691.3599967956543, - "p95": 694.8800086975098, - "p99": 701.2479901313782 - }, - "combine": { - "p50": 816.2879943847656, - "p90": 828.607976436615, - "p95": 832.5759768486023, - "p99": 837.8239870071411 - }, - "roundtrip": { - "p50": 1460.4159593582153, - "p90": 1474.176049232483, - "p95": 1478.4640073776245, - "p99": 1485.8880043029785 - }, - "isolatedSum": { - "p50": 1493.4719800949097, - "p90": 1519.9679732322693, - "p95": 1527.455985546112, - "p99": 1539.0719771385193 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f5264491", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h100_aa268d13", - "comparisonKey": "927a6d7282665742", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:04.176924+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271951888", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271951888", - "createdAt": "2026-06-27T00:00:04.176924+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.70400333404541, - "p90": 128.86400520801544, - "p95": 131.071999669075, - "p99": 132.9600065946579 - }, - "combine": { - "p50": 112.5119999051094, - "p90": 114.01599645614624, - "p95": 114.3679991364479, - "p99": 116.5120005607605 - }, - "roundtrip": { - "p50": 216.22399985790253, - "p90": 219.90400552749634, - "p95": 221.02400660514832, - "p99": 223.90399873256683 - }, - "isolatedSum": { - "p50": 237.21600323915482, - "p90": 242.88000166416168, - "p95": 245.43999880552292, - "p99": 249.4720071554184 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 164.57599401474, - "p90": 167.93599724769592, - "p95": 169.5680022239685, - "p99": 229.15199398994446 - }, - "combine": { - "p50": 162.6559942960739, - "p90": 168.64000260829926, - "p95": 169.98399794101715, - "p99": 171.29600048065186 - }, - "roundtrip": { - "p50": 299.80799555778503, - "p90": 305.11999130249023, - "p95": 306.71998858451843, - "p99": 308.9919984340668 - }, - "isolatedSum": { - "p50": 327.2319883108139, - "p90": 336.5759998559952, - "p95": 339.55200016498566, - "p99": 400.4479944705963 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 237.92000114917755, - "p90": 242.3039972782135, - "p95": 244.4159984588623, - "p99": 250.14400482177734 - }, - "combine": { - "p50": 260.9280049800873, - "p90": 265.6640112400055, - "p95": 267.67998933792114, - "p99": 272.7360129356384 - }, - "roundtrip": { - "p50": 471.77600860595703, - "p90": 476.8959879875183, - "p95": 479.2639911174774, - "p99": 495.2000081539154 - }, - "isolatedSum": { - "p50": 498.84800612926483, - "p90": 507.968008518219, - "p95": 512.0959877967834, - "p99": 522.8800177574158 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 377.85598635673523, - "p90": 384.38400626182556, - "p95": 385.8239948749542, - "p99": 390.6880021095276 - }, - "combine": { - "p50": 442.1760141849518, - "p90": 447.80799746513367, - "p95": 449.3120014667511, - "p99": 452.86399126052856 - }, - "roundtrip": { - "p50": 795.6799864768982, - "p90": 803.167998790741, - "p95": 806.3039779663086, - "p99": 813.0559921264648 - }, - "isolatedSum": { - "p50": 820.032000541687, - "p90": 832.1920037269592, - "p95": 835.1359963417053, - "p99": 843.5519933700562 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 675.3919720649719, - "p90": 690.4320120811462, - "p95": 694.1120028495789, - "p99": 700.8320093154907 - }, - "combine": { - "p50": 806.1439990997314, - "p90": 816.5119886398315, - "p95": 818.5279965400696, - "p99": 824.5440125465393 - }, - "roundtrip": { - "p50": 1447.1999406814575, - "p90": 1458.143949508667, - "p95": 1462.5600576400757, - "p99": 1468.991994857788 - }, - "isolatedSum": { - "p50": 1481.5359711647034, - "p90": 1506.9440007209778, - "p95": 1512.6399993896484, - "p99": 1525.37602186203 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1273.7280130386353, - "p90": 1286.1759662628174, - "p95": 1290.2400493621826, - "p99": 1300.3519773483276 - }, - "combine": { - "p50": 1515.6480073928833, - "p90": 1529.1199684143066, - "p95": 1554.6239614486694, - "p99": 1575.2639770507812 - }, - "roundtrip": { - "p50": 2763.0081176757812, - "p90": 2772.9599475860596, - "p95": 2776.3519287109375, - "p99": 2782.464027404785 - }, - "isolatedSum": { - "p50": 2789.3760204315186, - "p90": 2815.295934677124, - "p95": 2844.864010810852, - "p99": 2875.615954399109 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f680673f", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", - "colorKey": "h100_002beb29", - "comparisonKey": "3715210183d38757", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:20.108988+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_06", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "3dd868cb33839a3", - "workloadId": "set:3:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271813470", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271813470", - "createdAt": "2026-06-26T23:55:20.108988+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.63200163841248, - "p90": 122.43200093507767, - "p95": 123.55200201272964, - "p99": 126.5919953584671 - }, - "combine": { - "p50": 106.62399977445602, - "p90": 112.31999844312668, - "p95": 113.27999830245972, - "p99": 115.9679964184761 - }, - "roundtrip": { - "p50": 207.58399367332458, - "p90": 211.84000372886658, - "p95": 213.18399906158447, - "p99": 216.35200083255768 - }, - "isolatedSum": { - "p50": 224.2560014128685, - "p90": 234.75199937820435, - "p95": 236.83200031518936, - "p99": 242.5599917769432 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 236.35199666023254, - "p90": 249.82400238513947, - "p95": 253.88801097869873, - "p99": 257.02399015426636 - }, - "combine": { - "p50": 251.583993434906, - "p90": 259.7759962081909, - "p95": 260.47998666763306, - "p99": 262.2080147266388 - }, - "roundtrip": { - "p50": 459.29598808288574, - "p90": 472.1919894218445, - "p95": 474.88000988960266, - "p99": 478.5279929637909 - }, - "isolatedSum": { - "p50": 487.93599009513855, - "p90": 509.5999985933304, - "p95": 514.3679976463318, - "p99": 519.2320048809052 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 659.3279838562012, - "p90": 669.0239906311035, - "p95": 672.0960140228271, - "p99": 678.4319877624512 - }, - "combine": { - "p50": 783.456027507782, - "p90": 794.6239709854126, - "p95": 799.0720272064209, - "p99": 807.6800107955933 - }, - "roundtrip": { - "p50": 1412.6399755477905, - "p90": 1421.8239784240723, - "p95": 1426.0480403900146, - "p99": 1434.0159893035889 - }, - "isolatedSum": { - "p50": 1442.7840113639832, - "p90": 1463.647961616516, - "p95": 1471.168041229248, - "p99": 1486.1119985580444 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-329395ff", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", - "colorKey": "h100_002beb29", - "comparisonKey": "3715210183d38757", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:29.454209+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "bbcd1d9d8d1e4fe", - "workloadId": "set:6:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271996602", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271996602", - "createdAt": "2026-06-27T00:01:29.454209+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.87200313806534, - "p90": 122.14399874210358, - "p95": 123.10399860143661, - "p99": 127.16799974441528 - }, - "combine": { - "p50": 106.72000050544739, - "p90": 111.7120012640953, - "p95": 112.57600039243698, - "p99": 114.46399986743927 - }, - "roundtrip": { - "p50": 207.07200467586517, - "p90": 210.91200411319733, - "p95": 212.54399418830872, - "p99": 243.52000653743744 - }, - "isolatedSum": { - "p50": 222.59200364351273, - "p90": 233.85600000619888, - "p95": 235.6799989938736, - "p99": 241.63199961185455 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 159.29600596427917, - "p90": 166.62399470806122, - "p95": 167.4560010433197, - "p99": 169.21600699424744 - }, - "combine": { - "p50": 154.65599298477173, - "p90": 163.10399770736694, - "p95": 163.7759953737259, - "p99": 165.0560051202774 - }, - "roundtrip": { - "p50": 289.44000601768494, - "p90": 301.66399478912354, - "p95": 303.5840094089508, - "p99": 308.03200602531433 - }, - "isolatedSum": { - "p50": 313.9519989490509, - "p90": 329.72799241542816, - "p95": 331.2319964170456, - "p99": 334.27201211452484 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 45688832, - "combineLogicalBytes": 45688832, - "fanoutMean": 1.55615234375, - "recvTokensMax": 2048, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 231.1680018901825, - "p90": 236.4799976348877, - "p95": 237.40799725055695, - "p99": 240.7039999961853 - }, - "combine": { - "p50": 252.73600220680237, - "p90": 260.8639895915985, - "p95": 261.8879973888397, - "p99": 263.64800333976746 - }, - "roundtrip": { - "p50": 461.34400367736816, - "p90": 475.39201378822327, - "p95": 476.639986038208, - "p99": 479.45600748062134 - }, - "isolatedSum": { - "p50": 483.90400409698486, - "p90": 497.3439872264862, - "p95": 499.29599463939667, - "p99": 504.35200333595276 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 374.2400109767914, - "p90": 379.040002822876, - "p95": 381.98399543762207, - "p99": 387.4559998512268 - }, - "combine": { - "p50": 431.2640130519867, - "p90": 439.8399889469147, - "p95": 443.07199120521545, - "p99": 446.78398966789246 - }, - "roundtrip": { - "p50": 779.2320251464844, - "p90": 791.3600206375122, - "p95": 794.0160036087036, - "p99": 801.0240197181702 - }, - "isolatedSum": { - "p50": 805.5040240287781, - "p90": 818.8799917697906, - "p95": 825.0559866428375, - "p99": 834.2399895191193 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 183916544, - "combineLogicalBytes": 183916544, - "fanoutMean": 1.5660400390625, - "recvTokensMax": 8192, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 661.1520051956177, - "p90": 676.1919856071472, - "p95": 679.6479821205139, - "p99": 685.9520077705383 - }, - "combine": { - "p50": 789.9519801139832, - "p90": 800.0959753990173, - "p95": 803.1359910964966, - "p99": 808.7360262870789 - }, - "roundtrip": { - "p50": 1422.271966934204, - "p90": 1435.1680278778076, - "p95": 1439.1039609909058, - "p99": 1454.367995262146 - }, - "isolatedSum": { - "p50": 1451.1039853096008, - "p90": 1476.2879610061646, - "p95": 1482.7839732170105, - "p99": 1494.6880340576172 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1241.8559789657593, - "p90": 1251.871943473816, - "p95": 1256.4799785614014, - "p99": 1264.0639543533325 - }, - "combine": { - "p50": 1471.4560508728027, - "p90": 1480.1599979400635, - "p95": 1482.6240539550781, - "p99": 1489.8879528045654 - }, - "roundtrip": { - "p50": 2687.9680156707764, - "p90": 2698.848009109497, - "p95": 2703.104019165039, - "p99": 2708.928108215332 - }, - "isolatedSum": { - "p50": 2713.312029838562, - "p90": 2732.0319414138794, - "p95": 2739.1040325164795, - "p99": 2753.951907157898 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 734720000, - "combineLogicalBytes": 734720000, - "fanoutMean": 1.56402587890625, - "recvTokensMax": 32768, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c90a67e2", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", - "colorKey": "h100_c44978e5", - "comparisonKey": "6c5c69e3474ec552", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:29.771027+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "46855e7fa6754eb", - "workloadId": "set:6:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.38995361328125, - "eplbImbalanceAfter": 1.0000210716610862, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272000459", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272000459", - "createdAt": "2026-06-27T00:01:29.771027+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 110.75200140476227, - "p90": 114.97599631547928, - "p95": 116.95999652147293, - "p99": 122.01599776744843 - }, - "combine": { - "p50": 105.92000186443329, - "p90": 109.56799983978271, - "p95": 111.23199760913849, - "p99": 114.14399743080139 - }, - "roundtrip": { - "p50": 193.1840032339096, - "p90": 198.7520009279251, - "p95": 200.19200444221497, - "p99": 204.44799959659576 - }, - "isolatedSum": { - "p50": 216.67200326919556, - "p90": 224.543996155262, - "p95": 228.19199413061142, - "p99": 236.15999519824982 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 79206400, - "combineLogicalBytes": 79206400, - "fanoutMean": 5.3955078125, - "recvTokensMax": 713, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 146.81600034236908, - "p90": 151.48800611495972, - "p95": 152.44799852371216, - "p99": 156.80000185966492 - }, - "combine": { - "p50": 150.62400698661804, - "p90": 154.7520011663437, - "p95": 155.39200603961945, - "p99": 161.31199896335602 - }, - "roundtrip": { - "p50": 266.59199595451355, - "p90": 270.4640030860901, - "p95": 271.64798974990845, - "p99": 274.84801411628723 - }, - "isolatedSum": { - "p50": 297.4400073289871, - "p90": 306.2400072813034, - "p95": 307.8400045633316, - "p99": 318.11200082302094 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 159330304, - "combineLogicalBytes": 159330304, - "fanoutMean": 5.4267578125, - "recvTokensMax": 1436, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 201.05600357055664, - "p90": 204.70400154590607, - "p95": 205.63200116157532, - "p99": 209.1200053691864 - }, - "combine": { - "p50": 227.64800488948822, - "p90": 231.99999332427979, - "p95": 234.17599499225616, - "p99": 235.83999276161194 - }, - "roundtrip": { - "p50": 403.55199575424194, - "p90": 408.160001039505, - "p95": 409.15200114250183, - "p99": 411.77600622177124 - }, - "isolatedSum": { - "p50": 428.70400846004486, - "p90": 436.70399487018585, - "p95": 439.8079961538315, - "p99": 444.95999813079834 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 319535104, - "combineLogicalBytes": 319535104, - "fanoutMean": 5.441650390625, - "recvTokensMax": 2897, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 309.1840147972107, - "p90": 313.2160007953644, - "p95": 314.62401151657104, - "p99": 317.79199838638306 - }, - "combine": { - "p50": 368.5440123081207, - "p90": 374.9440014362335, - "p95": 376.22401118278503, - "p99": 380.7680010795593 - }, - "roundtrip": { - "p50": 652.2560119628906, - "p90": 658.9760184288025, - "p95": 661.3759994506836, - "p99": 665.2479767799377 - }, - "isolatedSum": { - "p50": 677.7280271053314, - "p90": 688.1600022315979, - "p95": 690.8480226993561, - "p99": 698.5599994659424 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 638410752, - "combineLogicalBytes": 638410752, - "fanoutMean": 5.43603515625, - "recvTokensMax": 5815, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 532.6079726219177, - "p90": 546.5599894523621, - "p95": 550.495982170105, - "p99": 557.7600002288818 - }, - "combine": { - "p50": 642.5279974937439, - "p90": 649.9519944190979, - "p95": 652.2560119628906, - "p99": 658.8159799575806 - }, - "roundtrip": { - "p50": 1146.399974822998, - "p90": 1156.9600105285645, - "p95": 1160.9920263290405, - "p99": 1168.511986732483 - }, - "isolatedSum": { - "p50": 1175.1359701156616, - "p90": 1196.51198387146, - "p95": 1202.7519941329956, - "p99": 1216.5759801864624 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1275144192, - "combineLogicalBytes": 1275144192, - "fanoutMean": 5.42889404296875, - "recvTokensMax": 11606, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1024.351954460144, - "p90": 1048.5440492630005, - "p95": 1056.9599866867065, - "p99": 1069.3119764328003 - }, - "combine": { - "p50": 1185.9840154647827, - "p90": 1194.1759586334229, - "p95": 1196.5759992599487, - "p99": 1201.5680074691772 - }, - "roundtrip": { - "p50": 2167.520046234131, - "p90": 2183.3600997924805, - "p95": 2188.8959407806396, - "p99": 2197.727918624878 - }, - "isolatedSum": { - "p50": 2210.3359699249268, - "p90": 2242.7200078964233, - "p95": 2253.5359859466553, - "p99": 2270.8799839019775 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2546374656, - "combineLogicalBytes": 2546374656, - "fanoutMean": 5.420562744140625, - "recvTokensMax": 23170, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fe520015", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", - "colorKey": "h100_9aa30544", - "comparisonKey": "212a6f0661f5d2d6", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:29.937355+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cf93f8f6b52e428", - "workloadId": "set:6:a224603e5a1640b8", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271965088", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271965088", - "createdAt": "2026-06-27T00:00:29.937355+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 123.71200323104858, - "p90": 127.6479959487915, - "p95": 131.20000064373016, - "p99": 133.7279975414276 - }, - "combine": { - "p50": 113.76000195741653, - "p90": 115.13599753379822, - "p95": 119.48800086975098, - "p99": 121.56800180673599 - }, - "roundtrip": { - "p50": 214.65599536895752, - "p90": 219.29599344730377, - "p95": 220.12799978256226, - "p99": 223.61600399017334 - }, - "isolatedSum": { - "p50": 237.47200518846512, - "p90": 242.78399348258972, - "p95": 250.68800151348114, - "p99": 255.2959993481636 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 160.19199788570404, - "p90": 166.4000004529953, - "p95": 167.61599481105804, - "p99": 170.43200135231018 - }, - "combine": { - "p50": 169.37600076198578, - "p90": 172.5119948387146, - "p95": 173.40800166130066, - "p99": 177.50400304794312 - }, - "roundtrip": { - "p50": 299.5840013027191, - "p90": 303.42400074005127, - "p95": 305.1519989967346, - "p99": 310.8479976654053 - }, - "isolatedSum": { - "p50": 329.5679986476898, - "p90": 338.9119952917099, - "p95": 341.0239964723587, - "p99": 347.9360044002533 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 140879872, - "combineLogicalBytes": 140879872, - "fanoutMean": 4.79833984375, - "recvTokensMax": 1972, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 233.18399488925934, - "p90": 239.26399648189545, - "p95": 240.28800427913666, - "p99": 242.94400215148926 - }, - "combine": { - "p50": 263.5839879512787, - "p90": 268.70399713516235, - "p95": 270.27198672294617, - "p99": 274.1760015487671 - }, - "roundtrip": { - "p50": 471.71199321746826, - "p90": 476.639986038208, - "p95": 478.5600006580353, - "p99": 481.3440144062042 - }, - "isolatedSum": { - "p50": 496.767982840538, - "p90": 507.9679936170578, - "p95": 510.5599910020828, - "p99": 517.1200037002563 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 282333184, - "combineLogicalBytes": 282333184, - "fanoutMean": 4.80810546875, - "recvTokensMax": 3936, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 377.27999687194824, - "p90": 383.35999846458435, - "p95": 385.18399000167847, - "p99": 387.84000277519226 - }, - "combine": { - "p50": 446.30399346351624, - "p90": 453.44001054763794, - "p95": 455.52000403404236, - "p99": 460.89598536491394 - }, - "roundtrip": { - "p50": 797.0240116119385, - "p90": 804.4800162315369, - "p95": 807.1039915084839, - "p99": 811.6480112075806 - }, - "isolatedSum": { - "p50": 823.5839903354645, - "p90": 836.8000090122223, - "p95": 840.7039940357208, - "p99": 848.7359881401062 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 566716416, - "combineLogicalBytes": 566716416, - "fanoutMean": 4.8255615234375, - "recvTokensMax": 7855, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 676.4479875564575, - "p90": 686.8799924850464, - "p95": 690.5279755592346, - "p99": 791.9679880142212 - }, - "combine": { - "p50": 796.3520288467407, - "p90": 808.4160089492798, - "p95": 811.3920092582703, - "p99": 820.5440044403076 - }, - "roundtrip": { - "p50": 1445.5360174179077, - "p90": 1457.311987876892, - "p95": 1460.6399536132812, - "p99": 1468.2879447937012 - }, - "isolatedSum": { - "p50": 1472.8000164031982, - "p90": 1495.2960014343262, - "p95": 1501.9199848175049, - "p99": 1612.5119924545288 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1132285952, - "combineLogicalBytes": 1132285952, - "fanoutMean": 4.8206787109375, - "recvTokensMax": 15694, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1284.8639488220215, - "p90": 1296.3199615478516, - "p95": 1299.7759580612183, - "p99": 1306.5279722213745 - }, - "combine": { - "p50": 1503.5840272903442, - "p90": 1517.2799825668335, - "p95": 1524.2880582809448, - "p99": 1540.0960445404053 - }, - "roundtrip": { - "p50": 2760.960102081299, - "p90": 2775.10404586792, - "p95": 2783.936023712158, - "p99": 2810.0481033325195 - }, - "isolatedSum": { - "p50": 2788.4479761123657, - "p90": 2813.599944114685, - "p95": 2824.064016342163, - "p99": 2846.62401676178 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2267840512, - "combineLogicalBytes": 2267840512, - "fanoutMean": 4.82763671875, - "recvTokensMax": 31357, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2b98c773", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", - "colorKey": "h100_e8b903ea", - "comparisonKey": "5961b4bc09451ca4", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:35.470349+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_16", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "27ddc85ded0add9", - "workloadId": "set:6:a224603e5a1640b8", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.545684814453125, - "eplbImbalanceAfter": 1.0001495361328125, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271968791", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271968791", - "createdAt": "2026-06-27T00:00:35.470349+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 112.41599917411804, - "p90": 117.18399822711945, - "p95": 118.9119964838028, - "p99": 122.91199713945389 - }, - "combine": { - "p50": 106.33599758148193, - "p90": 112.12799698114395, - "p95": 113.0559965968132, - "p99": 114.43199962377548 - }, - "roundtrip": { - "p50": 198.81600141525269, - "p90": 204.03200387954712, - "p95": 205.4080069065094, - "p99": 207.58399367332458 - }, - "isolatedSum": { - "p50": 218.75199675559998, - "p90": 229.3119952082634, - "p95": 231.967993080616, - "p99": 237.34399676322937 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78159872, - "combineLogicalBytes": 78159872, - "fanoutMean": 5.32421875, - "recvTokensMax": 702, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 148.44800531864166, - "p90": 151.99999511241913, - "p95": 153.3759981393814, - "p99": 156.3519984483719 - }, - "combine": { - "p50": 149.47199821472168, - "p90": 155.39200603961945, - "p95": 159.39199924468994, - "p99": 164.06400501728058 - }, - "roundtrip": { - "p50": 267.4880027770996, - "p90": 272.2879946231842, - "p95": 274.04800057411194, - "p99": 279.4879972934723 - }, - "isolatedSum": { - "p50": 297.92000353336334, - "p90": 307.3920011520386, - "p95": 312.76799738407135, - "p99": 320.41600346565247 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156563456, - "combineLogicalBytes": 156563456, - "fanoutMean": 5.33251953125, - "recvTokensMax": 1393, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 201.12000405788422, - "p90": 204.48000729084015, - "p95": 206.04799687862396, - "p99": 212.22400665283203 - }, - "combine": { - "p50": 229.0239930152893, - "p90": 233.95200073719025, - "p95": 236.4480048418045, - "p99": 238.52799832820892 - }, - "roundtrip": { - "p50": 404.06399965286255, - "p90": 408.86399149894714, - "p95": 411.0719859600067, - "p99": 431.5840005874634 - }, - "isolatedSum": { - "p50": 430.1439970731735, - "p90": 438.4320080280304, - "p95": 442.49600172042847, - "p99": 450.75200498104095 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312410112, - "combineLogicalBytes": 312410112, - "fanoutMean": 5.3203125, - "recvTokensMax": 2773, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 304.0960133075714, - "p90": 309.28000807762146, - "p95": 311.64801120758057, - "p99": 479.5520007610321 - }, - "combine": { - "p50": 366.11199378967285, - "p90": 372.8959858417511, - "p95": 374.55999851226807, - "p99": 383.4559917449951 - }, - "roundtrip": { - "p50": 644.0640091896057, - "p90": 650.1439809799194, - "p95": 652.1919965744019, - "p99": 656.5120220184326 - }, - "isolatedSum": { - "p50": 670.2080070972443, - "p90": 682.1759939193726, - "p95": 686.2080097198486, - "p99": 863.0079925060272 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 622712832, - "combineLogicalBytes": 622712832, - "fanoutMean": 5.3023681640625, - "recvTokensMax": 5498, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 521.5039849281311, - "p90": 530.1120281219482, - "p95": 533.3759784698486, - "p99": 540.5120253562927 - }, - "combine": { - "p50": 632.1920156478882, - "p90": 639.3280029296875, - "p95": 640.9599781036377, - "p99": 647.2960114479065 - }, - "roundtrip": { - "p50": 1123.9999532699585, - "p90": 1132.8959465026855, - "p95": 1135.807991027832, - "p99": 1143.5840129852295 - }, - "isolatedSum": { - "p50": 1153.6960005760193, - "p90": 1169.4400310516357, - "p95": 1174.3359565734863, - "p99": 1187.8080368041992 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1245038592, - "combineLogicalBytes": 1245038592, - "fanoutMean": 5.30072021484375, - "recvTokensMax": 10955, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 990.2399778366089, - "p90": 1009.4720125198364, - "p95": 1016.1279439926147, - "p99": 1026.8160104751587 - }, - "combine": { - "p50": 1164.736032485962, - "p90": 1174.015998840332, - "p95": 1177.2799491882324, - "p99": 1183.9359998703003 - }, - "roundtrip": { - "p50": 2116.895914077759, - "p90": 2137.7599239349365, - "p95": 2143.712043762207, - "p99": 2157.8240394592285 - }, - "isolatedSum": { - "p50": 2154.976010322571, - "p90": 2183.4880113601685, - "p95": 2193.407893180847, - "p99": 2210.752010345459 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2489460736, - "combineLogicalBytes": 2489460736, - "fanoutMean": 5.299407958984375, - "recvTokensMax": 21864, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0a66c8a3", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h100_552a4b73", - "comparisonKey": "44cbfb11e1668dc5", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:00.044863+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:6709a02c31933a9f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271978834", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271978834", - "createdAt": "2026-06-27T00:01:00.044863+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.7360035777092, - "p90": 130.68799674510956, - "p95": 132.03200697898865, - "p99": 136.4479959011078 - }, - "combine": { - "p50": 112.5119999051094, - "p90": 114.17599767446518, - "p95": 115.07199704647064, - "p99": 120.67200243473053 - }, - "roundtrip": { - "p50": 215.16799926757812, - "p90": 219.35999393463135, - "p95": 221.11999988555908, - "p99": 229.18400168418884 - }, - "isolatedSum": { - "p50": 237.2480034828186, - "p90": 244.86399441957474, - "p95": 247.1040040254593, - "p99": 257.1199983358383 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 163.7440025806427, - "p90": 167.26399958133698, - "p95": 168.44800114631653, - "p99": 174.6560037136078 - }, - "combine": { - "p50": 164.51199352741241, - "p90": 169.50400173664093, - "p95": 170.1440066099167, - "p99": 174.14399981498718 - }, - "roundtrip": { - "p50": 297.91998863220215, - "p90": 302.72001028060913, - "p95": 304.32000756263733, - "p99": 306.5600097179413 - }, - "isolatedSum": { - "p50": 328.2559961080551, - "p90": 336.7680013179779, - "p95": 338.5920077562332, - "p99": 348.80000352859497 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 237.44000494480133, - "p90": 241.82400107383728, - "p95": 243.0720031261444, - "p99": 247.74399399757385 - }, - "combine": { - "p50": 264.51200246810913, - "p90": 268.41598749160767, - "p95": 271.5519964694977, - "p99": 281.6320061683655 - }, - "roundtrip": { - "p50": 475.5840003490448, - "p90": 482.59198665618896, - "p95": 490.30399322509766, - "p99": 504.96000051498413 - }, - "isolatedSum": { - "p50": 501.95200741291046, - "p90": 510.23998856544495, - "p95": 514.6239995956421, - "p99": 529.3760001659393 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 379.040002822876, - "p90": 385.72800159454346, - "p95": 388.2240056991577, - "p99": 414.3359959125519 - }, - "combine": { - "p50": 447.00801372528076, - "p90": 452.4799883365631, - "p95": 453.5039961338043, - "p99": 456.89600706100464 - }, - "roundtrip": { - "p50": 800.2240061759949, - "p90": 805.791974067688, - "p95": 807.744026184082, - "p99": 811.680018901825 - }, - "isolatedSum": { - "p50": 826.0480165481567, - "p90": 838.2079899311066, - "p95": 841.728001832962, - "p99": 871.2320029735565 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 675.3919720649719, - "p90": 695.6800222396851, - "p95": 707.8400254249573, - "p99": 910.8160138130188 - }, - "combine": { - "p50": 819.2319869995117, - "p90": 829.6639919281006, - "p95": 833.2160115242004, - "p99": 841.3439989089966 - }, - "roundtrip": { - "p50": 1459.9679708480835, - "p90": 1476.9599437713623, - "p95": 1481.8559885025024, - "p99": 1501.2799501419067 - }, - "isolatedSum": { - "p50": 1494.6239590644836, - "p90": 1525.3440141677856, - "p95": 1541.0560369491577, - "p99": 1752.1600127220154 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1275.10404586792, - "p90": 1287.5200510025024, - "p95": 1291.8399572372437, - "p99": 1346.0479974746704 - }, - "combine": { - "p50": 1538.7200117111206, - "p90": 1550.3679513931274, - "p95": 1555.232048034668, - "p99": 1607.9360246658325 - }, - "roundtrip": { - "p50": 2787.168025970459, - "p90": 2798.784017562866, - "p95": 2802.9439449310303, - "p99": 2818.4640407562256 - }, - "isolatedSum": { - "p50": 2813.8240575790405, - "p90": 2837.88800239563, - "p95": 2847.0720052719116, - "p99": 2953.984022140503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-7114a01f", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h100_106a51ab", - "comparisonKey": "80b7db884aaf5a8c", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:17.822701+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:6709a02c31933a9f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271982260", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271982260", - "createdAt": "2026-06-27T00:01:17.822701+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 114.30399864912033, - "p90": 120.31999975442886, - "p95": 121.56800180673599, - "p99": 125.02400577068329 - }, - "combine": { - "p50": 106.27199709415436, - "p90": 111.48799955844879, - "p95": 111.77600175142288, - "p99": 114.1119971871376 - }, - "roundtrip": { - "p50": 198.0160027742386, - "p90": 201.82399451732635, - "p95": 203.36000621318817, - "p99": 207.35999941825867 - }, - "isolatedSum": { - "p50": 220.5759957432747, - "p90": 231.80799931287766, - "p95": 233.34400355815887, - "p99": 239.1360029578209 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 144.9279934167862, - "p90": 149.85600113868713, - "p95": 151.45599842071533, - "p99": 155.87200224399567 - }, - "combine": { - "p50": 151.19999647140503, - "p90": 154.84799444675446, - "p95": 156.63999319076538, - "p99": 160.73599457740784 - }, - "roundtrip": { - "p50": 266.11199975013733, - "p90": 271.5519964694977, - "p95": 273.6000120639801, - "p99": 277.1199941635132 - }, - "isolatedSum": { - "p50": 296.1279898881912, - "p90": 304.7039955854416, - "p95": 308.0959916114807, - "p99": 316.6079968214035 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 200.32000541687012, - "p90": 204.12799715995789, - "p95": 205.4399996995926, - "p99": 208.38400721549988 - }, - "combine": { - "p50": 227.58400440216064, - "p90": 233.75999927520752, - "p95": 234.55999791622162, - "p99": 238.3359968662262 - }, - "roundtrip": { - "p50": 402.0479917526245, - "p90": 407.1039855480194, - "p95": 408.735990524292, - "p99": 412.06398606300354 - }, - "isolatedSum": { - "p50": 427.90400981903076, - "p90": 437.8879964351654, - "p95": 439.9999976158142, - "p99": 446.7200040817261 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 303.16799879074097, - "p90": 307.3920011520386, - "p95": 308.76800417900085, - "p99": 313.27998638153076 - }, - "combine": { - "p50": 362.2399866580963, - "p90": 368.76800656318665, - "p95": 370.3039884567261, - "p99": 372.70399928092957 - }, - "roundtrip": { - "p50": 641.1839723587036, - "p90": 647.9359865188599, - "p95": 650.7520079612732, - "p99": 656.6399931907654 - }, - "isolatedSum": { - "p50": 665.4079854488373, - "p90": 676.1600077152252, - "p95": 679.0719926357269, - "p99": 685.9839856624603 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 520.9919810295105, - "p90": 531.4239859580994, - "p95": 534.4640016555786, - "p99": 541.1840081214905 - }, - "combine": { - "p50": 639.3600106239319, - "p90": 650.592029094696, - "p95": 654.5600295066833, - "p99": 660.4800224304199 - }, - "roundtrip": { - "p50": 1128.864049911499, - "p90": 1138.2720470428467, - "p95": 1141.2479877471924, - "p99": 1146.3040113449097 - }, - "isolatedSum": { - "p50": 1160.3519916534424, - "p90": 1182.0160150527954, - "p95": 1189.024031162262, - "p99": 1201.6640305519104 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1005.5999755859375, - "p90": 1031.7120552062988, - "p95": 1038.3360385894775, - "p99": 1051.103949546814 - }, - "combine": { - "p50": 1158.9759588241577, - "p90": 1167.8719520568848, - "p95": 1169.9199676513672, - "p99": 1174.6560335159302 - }, - "roundtrip": { - "p50": 2121.5360164642334, - "p90": 2138.2720470428467, - "p95": 2142.6239013671875, - "p99": 2150.0160694122314 - }, - "isolatedSum": { - "p50": 2164.575934410095, - "p90": 2199.5840072631836, - "p95": 2208.2560062408447, - "p99": 2225.759983062744 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-71b6107f", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h100_769b9c4b", - "comparisonKey": "24fc2cc385891299", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:08.090138+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_05", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271955196", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271955196", - "createdAt": "2026-06-27T00:00:08.090138+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 111.07199639081955, - "p90": 115.93600362539291, - "p95": 118.14399808645248, - "p99": 121.08799815177917 - }, - "combine": { - "p50": 106.08000308275223, - "p90": 111.26399785280228, - "p95": 112.38399893045425, - "p99": 114.14399743080139 - }, - "roundtrip": { - "p50": 195.68000733852386, - "p90": 201.1840045452118, - "p95": 202.39999890327454, - "p99": 204.96000349521637 - }, - "isolatedSum": { - "p50": 217.15199947357178, - "p90": 227.2000014781952, - "p95": 230.52799701690674, - "p99": 235.23199558258057 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 144.48000490665436, - "p90": 148.0640023946762, - "p95": 149.6960073709488, - "p99": 153.60000729560852 - }, - "combine": { - "p50": 148.92800152301788, - "p90": 154.33600544929504, - "p95": 155.008003115654, - "p99": 157.8879952430725 - }, - "roundtrip": { - "p50": 262.81601190567017, - "p90": 266.975998878479, - "p95": 268.3199942111969, - "p99": 272.44800329208374 - }, - "isolatedSum": { - "p50": 293.40800642967224, - "p90": 302.40000784397125, - "p95": 304.7040104866028, - "p99": 311.48800253868103 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 199.68000054359436, - "p90": 203.42400670051575, - "p95": 205.47200739383698, - "p99": 222.52799570560455 - }, - "combine": { - "p50": 227.80799865722656, - "p90": 232.9919934272766, - "p95": 234.3679964542389, - "p99": 237.34399676322937 - }, - "roundtrip": { - "p50": 399.83999729156494, - "p90": 405.023992061615, - "p95": 406.3040018081665, - "p99": 414.43198919296265 - }, - "isolatedSum": { - "p50": 427.4879992008209, - "p90": 436.41600012779236, - "p95": 439.84000384807587, - "p99": 459.8719924688339 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 303.5840094089508, - "p90": 309.471994638443, - "p95": 310.4960024356842, - "p99": 313.82399797439575 - }, - "combine": { - "p50": 362.8480136394501, - "p90": 367.74399876594543, - "p95": 369.6320056915283, - "p99": 523.7119793891907 - }, - "roundtrip": { - "p50": 640.8320069313049, - "p90": 648.576021194458, - "p95": 651.2960195541382, - "p99": 733.4399819374084 - }, - "isolatedSum": { - "p50": 666.4320230484009, - "p90": 677.2159934043884, - "p95": 680.1280081272125, - "p99": 837.5359773635864 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 524.3200063705444, - "p90": 533.5680246353149, - "p95": 536.191999912262, - "p99": 542.2080159187317 - }, - "combine": { - "p50": 643.9039707183838, - "p90": 653.1839966773987, - "p95": 655.8719873428345, - "p99": 661.1520051956177 - }, - "roundtrip": { - "p50": 1135.2959871292114, - "p90": 1144.8320150375366, - "p95": 1148.4800577163696, - "p99": 1153.92005443573 - }, - "isolatedSum": { - "p50": 1168.2239770889282, - "p90": 1186.7520213127136, - "p95": 1192.0639872550964, - "p99": 1203.3600211143494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1020.4800367355347, - "p90": 1048.8959550857544, - "p95": 1056.2560558319092, - "p99": 1071.4880228042603 - }, - "combine": { - "p50": 1164.6720170974731, - "p90": 1173.375964164734, - "p95": 1177.024006843567, - "p99": 1183.135986328125 - }, - "roundtrip": { - "p50": 2140.575885772705, - "p90": 2157.248020172119, - "p95": 2164.031982421875, - "p99": 2171.4560985565186 - }, - "isolatedSum": { - "p50": 2185.152053833008, - "p90": 2222.2719192504883, - "p95": 2233.280062675476, - "p99": 2254.6240091323853 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-19a8d159", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_7b3247bf", - "comparisonKey": "0ac8f8817cb63abb", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:47.651979+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254315809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", - "createdAt": "2026-06-26T17:30:47.651979+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 110.46399921178818, - "p90": 116.35199934244156, - "p95": 117.8240031003952, - "p99": 166.01599752902985 - }, - "combine": { - "p50": 106.1440035700798, - "p90": 111.51999980211258, - "p95": 112.06399649381638, - "p99": 114.07999694347382 - }, - "roundtrip": { - "p50": 197.40800559520721, - "p90": 200.9280025959015, - "p95": 203.0400037765503, - "p99": 206.01600408554077 - }, - "isolatedSum": { - "p50": 216.60800278186798, - "p90": 227.87199914455414, - "p95": 229.88799959421158, - "p99": 280.09599447250366 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 147.39200472831726, - "p90": 150.68799257278442, - "p95": 151.7760008573532, - "p99": 154.33600544929504 - }, - "combine": { - "p50": 145.1839953660965, - "p90": 149.88799393177032, - "p95": 151.67999267578125, - "p99": 154.7199934720993 - }, - "roundtrip": { - "p50": 262.4000012874603, - "p90": 267.2640085220337, - "p95": 269.27998661994934, - "p99": 357.34400153160095 - }, - "isolatedSum": { - "p50": 292.57600009441376, - "p90": 300.57598650455475, - "p95": 303.45599353313446, - "p99": 309.05599892139435 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 204.92799580097198, - "p90": 219.39200162887573, - "p95": 221.76000475883484, - "p99": 226.4000028371811 - }, - "combine": { - "p50": 217.15199947357178, - "p90": 221.3120013475418, - "p95": 224.57599639892578, - "p99": 227.743998169899 - }, - "roundtrip": { - "p50": 392.60798692703247, - "p90": 397.47199416160583, - "p95": 400.09599924087524, - "p99": 421.37598991394043 - }, - "isolatedSum": { - "p50": 422.07999527454376, - "p90": 440.70400297641754, - "p95": 446.3360011577606, - "p99": 454.1440010070801 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 319.93600726127625, - "p90": 324.8960077762604, - "p95": 327.1679878234863, - "p99": 330.55999875068665 - }, - "combine": { - "p50": 330.01598715782166, - "p90": 335.1680040359497, - "p95": 336.64000034332275, - "p99": 340.2239978313446 - }, - "roundtrip": { - "p50": 624.064028263092, - "p90": 629.2480230331421, - "p95": 631.6159963607788, - "p99": 638.2399797439575 - }, - "isolatedSum": { - "p50": 649.9519944190979, - "p90": 660.0640118122101, - "p95": 663.8079881668091, - "p99": 670.7839965820312 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 570.9440112113953, - "p90": 584.5119953155518, - "p95": 589.1519784927368, - "p99": 593.9199924468994 - }, - "combine": { - "p50": 564.9920105934143, - "p90": 574.3039846420288, - "p95": 576.7999887466431, - "p99": 583.5199952125549 - }, - "roundtrip": { - "p50": 1105.5680513381958, - "p90": 1120.1599836349487, - "p95": 1124.7680187225342, - "p99": 1134.719967842102 - }, - "isolatedSum": { - "p50": 1135.9360218048096, - "p90": 1158.8159799575806, - "p95": 1165.9519672393799, - "p99": 1177.4399876594543 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1075.8719444274902, - "p90": 1088.703989982605, - "p95": 1093.5360193252563, - "p99": 1102.463960647583 - }, - "combine": { - "p50": 1031.872034072876, - "p90": 1041.3119792938232, - "p95": 1044.4799661636353, - "p99": 1055.359959602356 - }, - "roundtrip": { - "p50": 2082.304000854492, - "p90": 2096.640110015869, - "p95": 2100.895881652832, - "p99": 2108.031988143921 - }, - "isolatedSum": { - "p50": 2107.743978500366, - "p90": 2130.015969276428, - "p95": 2138.0159854888916, - "p99": 2157.823920249939 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-107dd39c", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", - "colorKey": "h100_716e65b9", - "comparisonKey": "ea5a5b6f1b74dc9d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:48.643579+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_04", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254367516", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516", - "createdAt": "2026-06-26T17:31:48.643579+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 126.65599584579468, - "p90": 131.74399733543396, - "p95": 132.83200562000275, - "p99": 139.80799913406372 - }, - "combine": { - "p50": 120.4800009727478, - "p90": 122.40000069141388, - "p95": 124.28800016641617, - "p99": 129.12000715732574 - }, - "roundtrip": { - "p50": 221.40799462795258, - "p90": 226.49599611759186, - "p95": 227.77600586414337, - "p99": 232.16000199317932 - }, - "isolatedSum": { - "p50": 247.13599681854248, - "p90": 254.14399802684784, - "p95": 257.1200057864189, - "p99": 268.92800629138947 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 174.04800653457642, - "p90": 177.5359958410263, - "p95": 179.29600179195404, - "p99": 190.0160014629364 - }, - "combine": { - "p50": 172.67200350761414, - "p90": 174.52800273895264, - "p95": 175.4239946603775, - "p99": 180.28800189495087 - }, - "roundtrip": { - "p50": 317.05600023269653, - "p90": 321.3759958744049, - "p95": 322.4320113658905, - "p99": 326.04798674583435 - }, - "isolatedSum": { - "p50": 346.72001004219055, - "p90": 352.06399857997894, - "p95": 354.71999645233154, - "p99": 370.30400335788727 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 260.70401072502136, - "p90": 264.41600918769836, - "p95": 265.76000452041626, - "p99": 269.6639895439148 - }, - "combine": { - "p50": 255.13601303100586, - "p90": 258.2080066204071, - "p95": 259.5840096473694, - "p99": 263.5520100593567 - }, - "roundtrip": { - "p50": 489.3760085105896, - "p90": 493.696004152298, - "p95": 495.0079917907715, - "p99": 498.9120066165924 - }, - "isolatedSum": { - "p50": 515.8400237560272, - "p90": 522.6240158081055, - "p95": 525.3440141677856, - "p99": 533.2159996032715 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 437.6640021800995, - "p90": 443.7119960784912, - "p95": 445.248007774353, - "p99": 449.50398802757263 - }, - "combine": { - "p50": 422.14399576187134, - "p90": 426.07998847961426, - "p95": 427.90400981903076, - "p99": 431.0399889945984 - }, - "roundtrip": { - "p50": 834.0799808502197, - "p90": 840.3199911117554, - "p95": 842.8159952163696, - "p99": 852.512001991272 - }, - "isolatedSum": { - "p50": 859.8079979419708, - "p90": 869.7919845581055, - "p95": 873.1520175933838, - "p99": 880.543977022171 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 802.623987197876, - "p90": 819.7439908981323, - "p95": 822.3680257797241, - "p99": 830.3359746932983 - }, - "combine": { - "p50": 751.9360184669495, - "p90": 759.6160173416138, - "p95": 762.0480060577393, - "p99": 765.5680179595947 - }, - "roundtrip": { - "p50": 1521.9520330429077, - "p90": 1534.208059310913, - "p95": 1541.4400100708008, - "p99": 1552.5120496749878 - }, - "isolatedSum": { - "p50": 1554.5600056648254, - "p90": 1579.360008239746, - "p95": 1584.4160318374634, - "p99": 1595.903992652893 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1529.0240049362183, - "p90": 1539.5519733428955, - "p95": 1543.4880256652832, - "p99": 1549.504041671753 - }, - "combine": { - "p50": 1399.6479511260986, - "p90": 1406.7840576171875, - "p95": 1409.440040588379, - "p99": 1416.767954826355 - }, - "roundtrip": { - "p50": 2903.520107269287, - "p90": 2916.3520336151123, - "p95": 2920.2558994293213, - "p99": 2930.016040802002 - }, - "isolatedSum": { - "p50": 2928.671956062317, - "p90": 2946.336030960083, - "p95": 2952.928066253662, - "p99": 2966.271996498108 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a1762095", - "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", - "colorKey": "h100_f7ec28aa", - "comparisonKey": "18d3cab3936a264e", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:07.856119+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254376151", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151", - "createdAt": "2026-06-26T17:29:07.856119+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 119.03999745845795, - "p90": 125.44000148773193, - "p95": 126.01600587368011, - "p99": 130.68799674510956 - }, - "combine": { - "p50": 111.32799834012985, - "p90": 113.92000317573547, - "p95": 114.33599889278412, - "p99": 119.77600306272507 - }, - "roundtrip": { - "p50": 207.42399990558624, - "p90": 212.351992726326, - "p95": 214.56000208854675, - "p99": 233.3119958639145 - }, - "isolatedSum": { - "p50": 230.3679957985878, - "p90": 239.3600046634674, - "p95": 240.35200476646423, - "p99": 250.46399980783463 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 159.42400693893433, - "p90": 165.8879965543747, - "p95": 166.6879951953888, - "p99": 169.69600319862366 - }, - "combine": { - "p50": 156.19200468063354, - "p90": 162.49600052833557, - "p95": 163.26400637626648, - "p99": 168.83200407028198 - }, - "roundtrip": { - "p50": 290.336012840271, - "p90": 296.4160144329071, - "p95": 298.43199253082275, - "p99": 313.4399950504303 - }, - "isolatedSum": { - "p50": 315.61601161956787, - "p90": 328.38399708271027, - "p95": 329.9520015716553, - "p99": 338.52800726890564 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 234.78400707244873, - "p90": 240.22400379180908, - "p95": 242.20800399780273, - "p99": 246.2719976902008 - }, - "combine": { - "p50": 244.47999894618988, - "p90": 252.16001272201538, - "p95": 254.8159956932068, - "p99": 262.4959945678711 - }, - "roundtrip": { - "p50": 450.81600546836853, - "p90": 456.83199167251587, - "p95": 458.624005317688, - "p99": 499.1680085659027 - }, - "isolatedSum": { - "p50": 479.2640060186386, - "p90": 492.38401651382446, - "p95": 497.0239996910095, - "p99": 508.7679922580719 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 379.8399865627289, - "p90": 387.58400082588196, - "p95": 389.60000872612, - "p99": 392.9600119590759 - }, - "combine": { - "p50": 402.72000432014465, - "p90": 408.35198760032654, - "p95": 410.5280041694641, - "p99": 414.2400026321411 - }, - "roundtrip": { - "p50": 753.600001335144, - "p90": 759.8080039024353, - "p95": 761.5039944648743, - "p99": 764.959990978241 - }, - "isolatedSum": { - "p50": 782.5599908828735, - "p90": 795.9359884262085, - "p95": 800.1280128955841, - "p99": 807.200014591217 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 663.7120246887207, - "p90": 672.1919775009155, - "p95": 675.9359836578369, - "p99": 683.0080151557922 - }, - "combine": { - "p50": 711.5839719772339, - "p90": 725.5359888076782, - "p95": 729.8880219459534, - "p99": 740.0320172309875 - }, - "roundtrip": { - "p50": 1344.383955001831, - "p90": 1357.5999736785889, - "p95": 1361.0880374908447, - "p99": 1368.6399459838867 - }, - "isolatedSum": { - "p50": 1375.2959966659546, - "p90": 1397.7279663085938, - "p95": 1405.8240056037903, - "p99": 1423.0400323867798 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1251.1359453201294, - "p90": 1264.8320198059082, - "p95": 1269.6640491485596, - "p99": 1279.0080308914185 - }, - "combine": { - "p50": 1326.9120454788208, - "p90": 1337.3440504074097, - "p95": 1343.008041381836, - "p99": 1352.5439500808716 - }, - "roundtrip": { - "p50": 2547.0080375671387, - "p90": 2561.2800121307373, - "p95": 2564.863920211792, - "p99": 2581.696033477783 - }, - "isolatedSum": { - "p50": 2578.04799079895, - "p90": 2602.176070213318, - "p95": 2612.6720905303955, - "p99": 2631.55198097229 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6339c695", - "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", - "colorKey": "h100_93503624", - "comparisonKey": "99696dfafd6d026a", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:46:27.794881+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28255296001", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001", - "createdAt": "2026-06-26T17:46:27.794881+00:00", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 110.72000116109848, - "p90": 114.78400230407715, - "p95": 116.57600104808807, - "p99": 121.0239976644516 - }, - "combine": { - "p50": 105.8880016207695, - "p90": 111.35999858379364, - "p95": 112.0000034570694, - "p99": 114.56000059843063 - }, - "roundtrip": { - "p50": 195.99999487400055, - "p90": 200.00000298023224, - "p95": 201.24800503253937, - "p99": 205.59999346733093 - }, - "isolatedSum": { - "p50": 216.60800278186798, - "p90": 226.1440008878708, - "p95": 228.57600450515747, - "p99": 235.58399826288223 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 144.31999623775482, - "p90": 148.0640023946762, - "p95": 149.24800395965576, - "p99": 152.0960032939911 - }, - "combine": { - "p50": 146.62399888038635, - "p90": 151.10400319099426, - "p95": 152.51199901103973, - "p99": 155.32800555229187 - }, - "roundtrip": { - "p50": 260.8959972858429, - "p90": 265.3760015964508, - "p95": 266.400009393692, - "p99": 270.7520127296448 - }, - "isolatedSum": { - "p50": 290.9439951181412, - "p90": 299.16800558567047, - "p95": 301.7600029706955, - "p99": 307.42400884628296 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 205.56800067424774, - "p90": 210.36800742149353, - "p95": 212.09600567817688, - "p99": 214.6880030632019 - }, - "combine": { - "p50": 214.78399634361267, - "p90": 219.13599967956543, - "p95": 220.70400416851044, - "p99": 225.2800017595291 - }, - "roundtrip": { - "p50": 394.8799967765808, - "p90": 400.2879858016968, - "p95": 401.88801288604736, - "p99": 407.9680144786835 - }, - "isolatedSum": { - "p50": 420.3519970178604, - "p90": 429.50400710105896, - "p95": 432.8000098466873, - "p99": 439.968004822731 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 326.9760012626648, - "p90": 332.35201239585876, - "p95": 334.46401357650757, - "p99": 337.98399567604065 - }, - "combine": { - "p50": 338.75200152397156, - "p90": 346.0479974746704, - "p95": 347.4240005016327, - "p99": 379.5199990272522 - }, - "roundtrip": { - "p50": 642.8160071372986, - "p90": 650.6879925727844, - "p95": 652.895987033844, - "p99": 658.7520241737366 - }, - "isolatedSum": { - "p50": 665.7280027866364, - "p90": 678.4000098705292, - "p95": 681.8880140781403, - "p99": 717.5039947032928 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 584.0640068054199, - "p90": 592.6719903945923, - "p95": 595.5520272254944, - "p99": 601.2160181999207 - }, - "combine": { - "p50": 568.8639879226685, - "p90": 576.9280195236206, - "p95": 579.3920159339905, - "p99": 584.5119953155518 - }, - "roundtrip": { - "p50": 1122.3679780960083, - "p90": 1133.8560581207275, - "p95": 1138.6239528656006, - "p99": 1146.783947944641 - }, - "isolatedSum": { - "p50": 1152.9279947280884, - "p90": 1169.600009918213, - "p95": 1174.9440431594849, - "p99": 1185.7280135154724 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1107.200026512146, - "p90": 1119.0400123596191, - "p95": 1124.384045600891, - "p99": 1133.344054222107 - }, - "combine": { - "p50": 1020.6719636917114, - "p90": 1029.1839838027954, - "p95": 1032.1919918060303, - "p99": 1037.8559827804565 - }, - "roundtrip": { - "p50": 2098.4959602355957, - "p90": 2110.1760864257812, - "p95": 2113.856077194214, - "p99": 2120.60809135437 - }, - "isolatedSum": { - "p50": 2127.8719902038574, - "p90": 2148.2239961624146, - "p95": 2156.5760374069214, - "p99": 2171.2000370025635 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-96b1ca55", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_5df912ff", - "comparisonKey": "9fdbd6763ea7346a", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:28:17.076570+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254332840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840", - "createdAt": "2026-06-26T17:28:17.076570+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 101.31199657917023, - "p90": 105.69600015878677, - "p95": 107.55199939012527, - "p99": 110.84800213575363 - }, - "combine": { - "p50": 105.82400113344193, - "p90": 107.42399841547012, - "p95": 108.60799998044968, - "p99": 112.64000087976456 - }, - "roundtrip": { - "p50": 183.1360012292862, - "p90": 188.03200125694275, - "p95": 188.960000872612, - "p99": 195.13599574565887 - }, - "isolatedSum": { - "p50": 207.13599771261215, - "p90": 213.1199985742569, - "p95": 216.15999937057495, - "p99": 223.4880030155182 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 132.89600610733032, - "p90": 137.08800077438354, - "p95": 138.2399946451187, - "p99": 140.70400595664978 - }, - "combine": { - "p50": 144.96000111103058, - "p90": 147.5840061903, - "p95": 148.28799664974213, - "p99": 152.63999998569489 - }, - "roundtrip": { - "p50": 249.56800043582916, - "p90": 253.53598594665527, - "p95": 254.59200143814087, - "p99": 256.73601031303406 - }, - "isolatedSum": { - "p50": 277.8560072183609, - "p90": 284.67200696468353, - "p95": 286.52799129486084, - "p99": 293.34400594234467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 193.4400051832199, - "p90": 202.68799364566803, - "p95": 203.87199521064758, - "p99": 209.9519968032837 - }, - "combine": { - "p50": 216.8319970369339, - "p90": 220.92799842357635, - "p95": 223.55200350284576, - "p99": 226.04799270629883 - }, - "roundtrip": { - "p50": 382.4959993362427, - "p90": 387.7759873867035, - "p95": 388.7679874897003, - "p99": 392.767995595932 - }, - "isolatedSum": { - "p50": 410.2720022201538, - "p90": 423.6159920692444, - "p95": 427.42399871349335, - "p99": 435.9999895095825 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 315.0720000267029, - "p90": 320.1279938220978, - "p95": 322.04800844192505, - "p99": 324.5759904384613 - }, - "combine": { - "p50": 329.27998900413513, - "p90": 333.3759903907776, - "p95": 335.61599254608154, - "p99": 338.9120101928711 - }, - "roundtrip": { - "p50": 619.0720200538635, - "p90": 625.2480149269104, - "p95": 627.839982509613, - "p99": 630.7839751243591 - }, - "isolatedSum": { - "p50": 644.351989030838, - "p90": 653.5039842128754, - "p95": 657.6640009880066, - "p99": 663.4880006313324 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 560.8959794044495, - "p90": 569.8879957199097, - "p95": 572.1920132637024, - "p99": 577.2799849510193 - }, - "combine": { - "p50": 563.3599758148193, - "p90": 573.248028755188, - "p95": 576.3840079307556, - "p99": 580.672025680542 - }, - "roundtrip": { - "p50": 1093.727946281433, - "p90": 1102.6240587234497, - "p95": 1105.5999994277954, - "p99": 1112.0959520339966 - }, - "isolatedSum": { - "p50": 1124.2559552192688, - "p90": 1143.1360244750977, - "p95": 1148.576021194458, - "p99": 1157.9520106315613 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1059.0720176696777, - "p90": 1071.7439651489258, - "p95": 1074.8480558395386, - "p99": 1091.2959575653076 - }, - "combine": { - "p50": 1026.8800258636475, - "p90": 1036.2880229949951, - "p95": 1038.7840270996094, - "p99": 1047.4879741668701 - }, - "roundtrip": { - "p50": 2055.1679134368896, - "p90": 2067.13604927063, - "p95": 2069.823980331421, - "p99": 2075.5200386047363 - }, - "isolatedSum": { - "p50": 2085.952043533325, - "p90": 2108.031988143921, - "p95": 2113.632082939148, - "p99": 2138.7839317321777 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1ed69eb7", - "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_17694d2c", - "comparisonKey": "379c3371e525c0fb", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:34.870060+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · bf16 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271555838", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271555838", - "createdAt": "2026-06-26T23:48:34.870060+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 105.27999699115753, - "p90": 110.36799848079681, - "p95": 112.12799698114395, - "p99": 115.23199826478958 - }, - "combine": { - "p50": 106.175996363163, - "p90": 108.0000028014183, - "p95": 111.1999973654747, - "p99": 113.72800171375275 - }, - "roundtrip": { - "p50": 183.3599954843521, - "p90": 188.48000466823578, - "p95": 190.17599523067474, - "p99": 193.56800615787506 - }, - "isolatedSum": { - "p50": 211.45599335432053, - "p90": 218.36800128221512, - "p95": 223.32799434661865, - "p99": 228.95999997854233 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 134.5919966697693, - "p90": 140.06400108337402, - "p95": 142.46399700641632, - "p99": 146.88000082969666 - }, - "combine": { - "p50": 152.12799608707428, - "p90": 158.36800634860992, - "p95": 161.0880047082901, - "p99": 162.81600296497345 - }, - "roundtrip": { - "p50": 254.46400046348572, - "p90": 259.93600487709045, - "p95": 262.4639868736267, - "p99": 268.2560086250305 - }, - "isolatedSum": { - "p50": 286.71999275684357, - "p90": 298.43200743198395, - "p95": 303.5520017147064, - "p99": 309.6960037946701 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 188.960000872612, - "p90": 194.97600197792053, - "p95": 198.11199605464935, - "p99": 202.5279998779297 - }, - "combine": { - "p50": 228.67199778556824, - "p90": 236.09599471092224, - "p95": 237.05600202083588, - "p99": 241.08800292015076 - }, - "roundtrip": { - "p50": 391.90399646759033, - "p90": 399.80798959732056, - "p95": 402.3999869823456, - "p99": 424.0959882736206 - }, - "isolatedSum": { - "p50": 417.63199865818024, - "p90": 431.0719966888428, - "p95": 435.16799807548523, - "p99": 443.61600279808044 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 294.0160036087036, - "p90": 311.3279938697815, - "p95": 315.20000100135803, - "p99": 326.07999444007874 - }, - "combine": { - "p50": 366.1760091781616, - "p90": 382.9120099544525, - "p95": 391.32800698280334, - "p99": 407.039999961853 - }, - "roundtrip": { - "p50": 632.9600214958191, - "p90": 674.3680238723755, - "p95": 687.3279809951782, - "p99": 835.3919982910156 - }, - "isolatedSum": { - "p50": 660.1920127868652, - "p90": 694.240003824234, - "p95": 706.5280079841614, - "p99": 733.1199944019318 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 509.15199518203735, - "p90": 521.8560099601746, - "p95": 526.1120200157166, - "p99": 533.0560207366943 - }, - "combine": { - "p50": 635.2319717407227, - "p90": 645.5680131912231, - "p95": 649.4719982147217, - "p99": 656.3839912414551 - }, - "roundtrip": { - "p50": 1114.9760484695435, - "p90": 1128.0319690704346, - "p95": 1131.9680213928223, - "p99": 1147.711992263794 - }, - "isolatedSum": { - "p50": 1144.38396692276, - "p90": 1167.4240231513977, - "p95": 1175.5840182304382, - "p99": 1189.4400119781494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 970.848023891449, - "p90": 995.136022567749, - "p95": 1001.7919540405273, - "p99": 1016.1279439926147 - }, - "combine": { - "p50": 1156.3199758529663, - "p90": 1167.1040058135986, - "p95": 1172.287940979004, - "p99": 1184.928059577942 - }, - "roundtrip": { - "p50": 2089.279890060425, - "p90": 2105.664014816284, - "p95": 2110.431909561157, - "p99": 2118.0479526519775 - }, - "isolatedSum": { - "p50": 2127.1679997444153, - "p90": 2162.2400283813477, - "p95": 2174.0798950195312, - "p99": 2201.0560035705566 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-39ba4bd5", - "identity": "h100|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h100_a96c99f3", - "comparisonKey": "b9c15d0905ec0061", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:58.971427+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287505969", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287505969", - "createdAt": "2026-06-27T11:13:58.971427+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 79.03999835252762, - "p90": 84.63999629020691, - "p95": 88.28800171613693, - "p99": 108.89600217342377 - }, - "combine": { - "p50": 77.02399790287018, - "p90": 80.1599994301796, - "p95": 82.0159986615181, - "p99": 85.85599809885025 - }, - "roundtrip": { - "p50": 171.64799571037292, - "p90": 178.01600694656372, - "p95": 187.74400651454926, - "p99": 233.50399732589722 - }, - "isolatedSum": { - "p50": 156.0639962553978, - "p90": 164.7999957203865, - "p95": 170.30400037765503, - "p99": 194.75200027227402 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 95.39200365543365, - "p90": 113.82400244474411, - "p95": 119.61600184440613, - "p99": 132.57600367069244 - }, - "combine": { - "p50": 103.74400019645691, - "p90": 114.43199962377548, - "p95": 121.24799937009811, - "p99": 155.2640050649643 - }, - "roundtrip": { - "p50": 235.9679937362671, - "p90": 250.62400102615356, - "p95": 265.1839852333069, - "p99": 275.2000093460083 - }, - "isolatedSum": { - "p50": 199.13600385189056, - "p90": 228.2560020685196, - "p95": 240.86400121450424, - "p99": 287.84000873565674 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 126.8479973077774, - "p90": 132.7040046453476, - "p95": 135.74400544166565, - "p99": 147.45600521564484 - }, - "combine": { - "p50": 159.96800363063812, - "p90": 164.95999693870544, - "p95": 166.49599373340607, - "p99": 175.9359985589981 - }, - "roundtrip": { - "p50": 375.8719861507416, - "p90": 385.4080140590668, - "p95": 393.18400621414185, - "p99": 407.9680144786835 - }, - "isolatedSum": { - "p50": 286.8160009384155, - "p90": 297.66400158405304, - "p95": 302.2399991750717, - "p99": 323.39200377464294 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 191.55199825763702, - "p90": 209.08799767494202, - "p95": 216.15999937057495, - "p99": 227.55199670791626 - }, - "combine": { - "p50": 267.2320008277893, - "p90": 272.3200023174286, - "p95": 273.6000120639801, - "p99": 275.84001421928406 - }, - "roundtrip": { - "p50": 636.2879872322083, - "p90": 641.8560147285461, - "p95": 644.1280245780945, - "p99": 809.0239763259888 - }, - "isolatedSum": { - "p50": 458.78399908542633, - "p90": 481.4079999923706, - "p95": 489.76001143455505, - "p99": 503.3920109272003 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 307.776004076004, - "p90": 323.35999608039856, - "p95": 325.21599531173706, - "p99": 330.6879997253418 - }, - "combine": { - "p50": 460.9279930591583, - "p90": 468.4160053730011, - "p95": 470.2720046043396, - "p99": 600.9600162506104 - }, - "roundtrip": { - "p50": 1133.8239908218384, - "p90": 1142.5600051879883, - "p95": 1146.1759805679321, - "p99": 1151.3279676437378 - }, - "isolatedSum": { - "p50": 768.7039971351624, - "p90": 791.7760014533997, - "p95": 795.4879999160767, - "p99": 931.6480159759521 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 546.3039875030518, - "p90": 551.2959957122803, - "p95": 553.4719824790955, - "p99": 559.6799850463867 - }, - "combine": { - "p50": 841.6640162467957, - "p90": 850.816011428833, - "p95": 854.1120290756226, - "p99": 870.1440095901489 - }, - "roundtrip": { - "p50": 2148.0960845947266, - "p90": 2161.184072494507, - "p95": 2165.440082550049, - "p99": 2175.0400066375732 - }, - "isolatedSum": { - "p50": 1387.9680037498474, - "p90": 1402.1120071411133, - "p95": 1407.584011554718, - "p99": 1429.8239946365356 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8fb1cb65", - "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h100_97196257", - "comparisonKey": "d361c128552b2ee8", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:51.842450+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_10", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271695735", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271695735", - "createdAt": "2026-06-26T23:51:51.842450+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 196.28800451755524, - "p90": 202.33599841594696, - "p95": 203.96800339221954, - "p99": 210.07999777793884 - }, - "combine": { - "p50": 85.11999994516373, - "p90": 87.5839963555336, - "p95": 89.72799777984619, - "p99": 93.24800223112106 - }, - "roundtrip": { - "p50": 266.7520046234131, - "p90": 273.824006319046, - "p95": 277.5680124759674, - "p99": 291.83998703956604 - }, - "isolatedSum": { - "p50": 281.40800446271896, - "p90": 289.91999477148056, - "p95": 293.69600117206573, - "p99": 303.3280000090599 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 248.3839988708496, - "p90": 296.7680096626282, - "p95": 299.80799555778503, - "p99": 321.28000259399414 - }, - "combine": { - "p50": 118.81600320339203, - "p90": 125.15200674533844, - "p95": 126.17599964141846, - "p99": 128.06400656700134 - }, - "roundtrip": { - "p50": 353.85599732398987, - "p90": 407.9360067844391, - "p95": 410.3040099143982, - "p99": 414.40001130104065 - }, - "isolatedSum": { - "p50": 367.20000207424164, - "p90": 421.9200164079666, - "p95": 425.9839951992035, - "p99": 449.3440091609955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55552000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 341.43999218940735, - "p90": 394.0480053424835, - "p95": 396.5440094470978, - "p99": 400.41598677635193 - }, - "combine": { - "p50": 185.12000143527985, - "p90": 191.3280040025711, - "p95": 193.05600225925446, - "p99": 195.74399292469025 - }, - "roundtrip": { - "p50": 510.0160241127014, - "p90": 567.3919916152954, - "p95": 570.8479881286621, - "p99": 574.176013469696 - }, - "isolatedSum": { - "p50": 526.5599936246872, - "p90": 585.3760093450546, - "p95": 589.6000117063522, - "p99": 596.1599797010422 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 111549440, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 517.9839730262756, - "p90": 568.6720013618469, - "p95": 574.0159749984741, - "p99": 579.8079967498779 - }, - "combine": { - "p50": 291.26399755477905, - "p90": 295.80798745155334, - "p95": 297.08799719810486, - "p99": 299.96800422668457 - }, - "roundtrip": { - "p50": 794.2079901695251, - "p90": 801.3120293617249, - "p95": 804.095983505249, - "p99": 814.4959807395935 - }, - "isolatedSum": { - "p50": 809.2479705810547, - "p90": 864.4799888134003, - "p95": 871.103972196579, - "p99": 879.7760009765625 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 223365120, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 962.5599980354309, - "p90": 971.2640047073364, - "p95": 973.0560183525085, - "p99": 978.3999919891357 - }, - "combine": { - "p50": 513.1199955940247, - "p90": 523.5520005226135, - "p95": 526.0800123214722, - "p99": 531.9039821624756 - }, - "roundtrip": { - "p50": 1460.576057434082, - "p90": 1472.4160432815552, - "p95": 1476.6080379486084, - "p99": 1773.3759880065918 - }, - "isolatedSum": { - "p50": 1475.6799936294556, - "p90": 1494.81600522995, - "p95": 1499.1360306739807, - "p99": 1510.3039741516113 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 446817280, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1818.5919523239136, - "p90": 1826.9439935684204, - "p95": 1829.7280073165894, - "p99": 1833.8559865951538 - }, - "combine": { - "p50": 930.3359985351562, - "p90": 939.7119879722595, - "p95": 942.8160190582275, - "p99": 948.0640292167664 - }, - "roundtrip": { - "p50": 2736.9279861450195, - "p90": 2750.3039836883545, - "p95": 2755.199909210205, - "p99": 2763.64803314209 - }, - "isolatedSum": { - "p50": 2748.92795085907, - "p90": 2766.65598154068, - "p95": 2772.544026374817, - "p99": 2781.92001581192 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 893132800, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-26196af1", - "identity": "h100|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_a96c99f3", - "comparisonKey": "fd1c952adc3abb43", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:32.456116+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287494014", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287494014", - "createdAt": "2026-06-27T11:13:32.456116+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.30400151014328, - "p90": 99.5199978351593, - "p95": 101.08800232410431, - "p99": 104.99200224876404 - }, - "combine": { - "p50": 91.80799871683121, - "p90": 100.28800368309021, - "p95": 101.31199657917023, - "p99": 105.95200210809708 - }, - "roundtrip": { - "p50": 200.22399723529816, - "p90": 218.87999773025513, - "p95": 220.41599452495575, - "p99": 234.52800512313843 - }, - "isolatedSum": { - "p50": 178.1120002269745, - "p90": 199.8080015182495, - "p95": 202.39999890327454, - "p99": 210.94400435686111 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 105.34399747848511, - "p90": 117.60000139474869, - "p95": 119.35999989509583, - "p99": 123.36000055074692 - }, - "combine": { - "p50": 131.9359987974167, - "p90": 142.87999272346497, - "p95": 148.70400726795197, - "p99": 308.4479868412018 - }, - "roundtrip": { - "p50": 299.9039888381958, - "p90": 312.5759959220886, - "p95": 314.65598940849304, - "p99": 318.7200129032135 - }, - "isolatedSum": { - "p50": 237.2799962759018, - "p90": 260.47999411821365, - "p95": 268.0640071630478, - "p99": 431.8079873919487 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 139.3599957227707, - "p90": 143.0400013923645, - "p95": 144.51199769973755, - "p99": 150.94399452209473 - }, - "combine": { - "p50": 200.32000541687012, - "p90": 203.67999374866486, - "p95": 204.73599433898926, - "p99": 209.82399582862854 - }, - "roundtrip": { - "p50": 476.83200240135193, - "p90": 482.40000009536743, - "p95": 484.47999358177185, - "p99": 581.2479853630066 - }, - "isolatedSum": { - "p50": 339.6800011396408, - "p90": 346.71999514102936, - "p95": 349.2479920387268, - "p99": 360.76799035072327 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 210.1760059595108, - "p90": 214.6880030632019, - "p95": 216.19200706481934, - "p99": 219.55199539661407 - }, - "combine": { - "p50": 324.95999336242676, - "p90": 330.1759958267212, - "p95": 332.2240114212036, - "p99": 353.4719944000244 - }, - "roundtrip": { - "p50": 807.9040050506592, - "p90": 815.6800270080566, - "p95": 819.6160197257996, - "p99": 854.9759984016418 - }, - "isolatedSum": { - "p50": 535.1359993219376, - "p90": 544.8639988899231, - "p95": 548.416018486023, - "p99": 573.0239897966385 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 352.60799527168274, - "p90": 362.5600039958954, - "p95": 365.85599184036255, - "p99": 394.9440121650696 - }, - "combine": { - "p50": 570.8479881286621, - "p90": 576.7679810523987, - "p95": 579.0719985961914, - "p99": 581.4719796180725 - }, - "roundtrip": { - "p50": 1472.5439548492432, - "p90": 1483.680009841919, - "p95": 1486.4319562911987, - "p99": 1497.4080324172974 - }, - "isolatedSum": { - "p50": 923.4559834003448, - "p90": 939.3279850482941, - "p95": 944.927990436554, - "p99": 976.4159917831421 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 622.8160262107849, - "p90": 630.2400231361389, - "p95": 632.8639984130859, - "p99": 637.503981590271 - }, - "combine": { - "p50": 1051.8399477005005, - "p90": 1060.4480504989624, - "p95": 1063.5839700698853, - "p99": 1077.728033065796 - }, - "roundtrip": { - "p50": 2821.791887283325, - "p90": 2846.463918685913, - "p95": 2856.384038925171, - "p99": 2868.5760498046875 - }, - "isolatedSum": { - "p50": 1674.6559739112854, - "p90": 1690.6880736351013, - "p95": 1696.4479684829712, - "p99": 1715.232014656067 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-db3c52ad", - "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_97196257", - "comparisonKey": "d4fd66af6f4726f6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:52:17.424978+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271710412", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271710412", - "createdAt": "2026-06-26T23:52:17.424978+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 208.8640034198761, - "p90": 215.68000316619873, - "p95": 218.75199675559998, - "p99": 469.56801414489746 - }, - "combine": { - "p50": 90.33600240945816, - "p90": 93.21600198745728, - "p95": 95.551997423172, - "p99": 98.1760025024414 - }, - "roundtrip": { - "p50": 286.72000765800476, - "p90": 290.75199365615845, - "p95": 293.0240035057068, - "p99": 295.52000761032104 - }, - "isolatedSum": { - "p50": 299.20000582933426, - "p90": 308.896005153656, - "p95": 314.303994178772, - "p99": 567.7440166473389 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 267.2320008277893, - "p90": 273.4079957008362, - "p95": 275.64799785614014, - "p99": 286.8480086326599 - }, - "combine": { - "p50": 127.23200023174286, - "p90": 130.40000200271606, - "p95": 131.52000308036804, - "p99": 134.0479999780655 - }, - "roundtrip": { - "p50": 387.3920142650604, - "p90": 392.2879993915558, - "p95": 394.9440121650696, - "p99": 403.328001499176 - }, - "isolatedSum": { - "p50": 394.46400105953217, - "p90": 403.80799770355225, - "p95": 407.1680009365082, - "p99": 420.8960086107254 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 377.6960074901581, - "p90": 423.93600940704346, - "p95": 426.4320135116577, - "p99": 432.6080083847046 - }, - "combine": { - "p50": 203.42400670051575, - "p90": 213.95200490951538, - "p95": 215.03999829292297, - "p99": 218.6879962682724 - }, - "roundtrip": { - "p50": 564.4479990005493, - "p90": 604.8960089683533, - "p95": 608.1920266151428, - "p99": 615.1360273361206 - }, - "isolatedSum": { - "p50": 581.1200141906738, - "p90": 637.8880143165588, - "p95": 641.4720118045807, - "p99": 651.296004652977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 593.504011631012, - "p90": 597.9200005531311, - "p95": 599.295973777771, - "p99": 620.9279894828796 - }, - "combine": { - "p50": 322.59199023246765, - "p90": 326.78401470184326, - "p95": 328.7999927997589, - "p99": 331.36001229286194 - }, - "roundtrip": { - "p50": 899.1680145263672, - "p90": 904.416024684906, - "p95": 906.6240191459656, - "p99": 913.0560159683228 - }, - "isolatedSum": { - "p50": 916.0960018634796, - "p90": 924.7040152549744, - "p95": 928.0959665775299, - "p99": 952.2880017757416 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 1106.943964958191, - "p90": 1117.9519891738892, - "p95": 1120.7040548324585, - "p99": 1126.2719631195068 - }, - "combine": { - "p50": 574.4640231132507, - "p90": 583.0720067024231, - "p95": 584.991991519928, - "p99": 590.719997882843 - }, - "roundtrip": { - "p50": 1684.0640306472778, - "p90": 1699.5840072631836, - "p95": 1705.1520347595215, - "p99": 1751.9680261611938 - }, - "isolatedSum": { - "p50": 1681.4079880714417, - "p90": 1701.0239958763123, - "p95": 1705.6960463523865, - "p99": 1716.9919610023499 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 2105.151891708374, - "p90": 2114.687919616699, - "p95": 2120.1279163360596, - "p99": 2217.0560359954834 - }, - "combine": { - "p50": 1052.9279708862305, - "p90": 1061.0560178756714, - "p95": 1063.264012336731, - "p99": 1068.0320262908936 - }, - "roundtrip": { - "p50": 3201.6959190368652, - "p90": 3233.1199645996094, - "p95": 3240.8320903778076, - "p99": 3259.615898132324 - }, - "isolatedSum": { - "p50": 3158.0798625946045, - "p90": 3175.7439374923706, - "p95": 3183.3919286727905, - "p99": 3285.088062286377 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-bf310e7a", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_a96c99f3", - "comparisonKey": "3fc4c710187195cb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:13:01.422194+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286086353", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286086353", - "createdAt": "2026-06-27T10:13:01.422194+00:00", - "sha": "76a3032d20288ee17220eb6099346f74d56ce005" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 89.53599631786346, - "p90": 94.30400282144547, - "p95": 95.87199985980988, - "p99": 98.7199991941452 - }, - "combine": { - "p50": 100.51199793815613, - "p90": 103.35999727249146, - "p95": 104.96000200510025, - "p99": 108.73600095510483 - }, - "roundtrip": { - "p50": 218.07999908924103, - "p90": 221.69600427150726, - "p95": 222.78399765491486, - "p99": 227.64800488948822 - }, - "isolatedSum": { - "p50": 190.0479942560196, - "p90": 197.66400009393692, - "p95": 200.83200186491013, - "p99": 207.45600014925003 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 108.73600095510483, - "p90": 128.83199751377106, - "p95": 132.64000415802002, - "p99": 136.83199882507324 - }, - "combine": { - "p50": 144.76799964904785, - "p90": 153.08800339698792, - "p95": 157.6319932937622, - "p99": 160.7999950647354 - }, - "roundtrip": { - "p50": 332.96000957489014, - "p90": 344.60800886154175, - "p95": 350.271999835968, - "p99": 356.86400532722473 - }, - "isolatedSum": { - "p50": 253.50400060415268, - "p90": 281.920000910759, - "p95": 290.2719974517822, - "p99": 297.63199388980865 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 144.22400295734406, - "p90": 159.29600596427917, - "p95": 160.863995552063, - "p99": 166.6879951953888 - }, - "combine": { - "p50": 224.95999932289124, - "p90": 231.26399517059326, - "p95": 233.18399488925934, - "p99": 236.12800240516663 - }, - "roundtrip": { - "p50": 525.2799987792969, - "p90": 531.4559936523438, - "p95": 534.6879959106445, - "p99": 546.2719798088074 - }, - "isolatedSum": { - "p50": 369.1840022802353, - "p90": 390.56000113487244, - "p95": 394.0479904413223, - "p99": 402.8159976005554 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 213.1199985742569, - "p90": 218.9760059118271, - "p95": 226.3679951429367, - "p99": 239.42400515079498 - }, - "combine": { - "p50": 360.8640134334564, - "p90": 365.79200625419617, - "p95": 367.3279881477356, - "p99": 371.7760145664215 - }, - "roundtrip": { - "p50": 894.208014011383, - "p90": 899.7120261192322, - "p95": 901.6320109367371, - "p99": 904.8320055007935 - }, - "isolatedSum": { - "p50": 573.9840120077133, - "p90": 584.7680121660233, - "p95": 593.6959832906723, - "p99": 611.2000197172165 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 348.54400157928467, - "p90": 367.39200353622437, - "p95": 369.7600066661835, - "p99": 378.6559998989105 - }, - "combine": { - "p50": 634.4000101089478, - "p90": 643.0079936981201, - "p95": 645.7599997520447, - "p99": 650.0160098075867 - }, - "roundtrip": { - "p50": 1619.871973991394, - "p90": 1633.5680484771729, - "p95": 1636.3840103149414, - "p99": 1644.09601688385 - }, - "isolatedSum": { - "p50": 982.9440116882324, - "p90": 1010.3999972343445, - "p95": 1015.5200064182281, - "p99": 1028.6720097064972 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 631.3279867172241, - "p90": 641.6320204734802, - "p95": 644.9919939041138, - "p99": 654.4640064239502 - }, - "combine": { - "p50": 1156.607985496521, - "p90": 1167.0080423355103, - "p95": 1169.7280406951904, - "p99": 1179.3279647827148 - }, - "roundtrip": { - "p50": 3077.791929244995, - "p90": 3088.8640880584717, - "p95": 3093.4720039367676, - "p99": 3101.408004760742 - }, - "isolatedSum": { - "p50": 1787.9359722137451, - "p90": 1808.6400628089905, - "p95": 1814.7200345993042, - "p99": 1833.791971206665 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9440251a", - "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_97196257", - "comparisonKey": "2b50b361430bc4f6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:40.278594+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271583505", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271583505", - "createdAt": "2026-06-26T23:48:40.278594+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 211.2639993429184, - "p90": 217.28000044822693, - "p95": 219.32800114154816, - "p99": 226.78400576114655 - }, - "combine": { - "p50": 97.15200215578079, - "p90": 100.41599720716476, - "p95": 102.27199643850327, - "p99": 105.59999942779541 - }, - "roundtrip": { - "p50": 296.640008687973, - "p90": 303.26399207115173, - "p95": 305.82401156425476, - "p99": 313.9199912548065 - }, - "isolatedSum": { - "p50": 308.4160014986992, - "p90": 317.6959976553917, - "p95": 321.5999975800514, - "p99": 332.38400518894196 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 281.98400139808655, - "p90": 324.6400058269501, - "p95": 327.7760148048401, - "p99": 332.99198746681213 - }, - "combine": { - "p50": 141.53599739074707, - "p90": 147.87200093269348, - "p95": 149.9519944190979, - "p99": 152.70400047302246 - }, - "roundtrip": { - "p50": 409.7920060157776, - "p90": 415.45599699020386, - "p95": 417.7280068397522, - "p99": 423.39199781417847 - }, - "isolatedSum": { - "p50": 423.5199987888336, - "p90": 472.51200675964355, - "p95": 477.728009223938, - "p99": 485.6959879398346 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 402.24000811576843, - "p90": 444.64001059532166, - "p95": 447.3919868469238, - "p99": 454.1440010070801 - }, - "combine": { - "p50": 224.16000068187714, - "p90": 233.0559939146042, - "p95": 235.23199558258057, - "p99": 239.29600417613983 - }, - "roundtrip": { - "p50": 613.9839887619019, - "p90": 657.7600240707397, - "p95": 661.9200110435486, - "p99": 734.7840070724487 - }, - "isolatedSum": { - "p50": 626.4000087976456, - "p90": 677.6960045099258, - "p95": 682.6239824295044, - "p99": 693.4400051832199 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 664.7359728813171, - "p90": 669.5680022239685, - "p95": 671.1360216140747, - "p99": 674.7519969940186 - }, - "combine": { - "p50": 358.0799996852875, - "p90": 363.23198676109314, - "p95": 364.47998881340027, - "p99": 369.9199855327606 - }, - "roundtrip": { - "p50": 1005.2160024642944, - "p90": 1010.2720260620117, - "p95": 1012.287974357605, - "p99": 1019.9999809265137 - }, - "isolatedSum": { - "p50": 1022.8159725666046, - "p90": 1032.7999889850616, - "p95": 1035.616010427475, - "p99": 1044.6719825267792 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 1237.8560304641724, - "p90": 1245.919942855835, - "p95": 1249.0559816360474, - "p99": 1253.6319494247437 - }, - "combine": { - "p50": 632.0639848709106, - "p90": 639.0720009803772, - "p95": 641.5359973907471, - "p99": 646.9119787216187 - }, - "roundtrip": { - "p50": 1845.0239896774292, - "p90": 1854.3039560317993, - "p95": 1857.983946800232, - "p99": 1862.720012664795 - }, - "isolatedSum": { - "p50": 1869.920015335083, - "p90": 1884.9919438362122, - "p95": 1890.5919790267944, - "p99": 1900.5439281463623 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 2360.80002784729, - "p90": 2371.2639808654785, - "p95": 2375.3280639648438, - "p99": 2383.3279609680176 - }, - "combine": { - "p50": 1150.8159637451172, - "p90": 1160.032033920288, - "p95": 1162.9120111465454, - "p99": 1171.6159582138062 - }, - "roundtrip": { - "p50": 3508.7039470672607, - "p90": 3525.631904602051, - "p95": 3531.615972518921, - "p99": 3547.4560260772705 - }, - "isolatedSum": { - "p50": 3511.615991592407, - "p90": 3531.2960147857666, - "p95": 3538.240075111389, - "p99": 3554.9439191818237 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4bac404d", - "identity": "h100|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h100_a96c99f3", - "comparisonKey": "773edc302de99204", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:16:09.188835+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287500362", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287500362", - "createdAt": "2026-06-27T11:16:09.188835+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 89.9839997291565, - "p90": 93.9520001411438, - "p95": 96.44799679517746, - "p99": 158.87999534606934 - }, - "combine": { - "p50": 98.04800152778625, - "p90": 100.5759984254837, - "p95": 102.11200267076492, - "p99": 104.67199981212616 - }, - "roundtrip": { - "p50": 217.75999665260315, - "p90": 221.66399657726288, - "p95": 223.00800681114197, - "p99": 227.7120053768158 - }, - "isolatedSum": { - "p50": 188.03200125694275, - "p90": 194.5279985666275, - "p95": 198.55999946594238, - "p99": 263.5519951581955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 107.4879989027977, - "p90": 111.42399907112122, - "p95": 114.59200084209442, - "p99": 170.30400037765503 - }, - "combine": { - "p50": 143.39199662208557, - "p90": 146.4959979057312, - "p95": 147.8399932384491, - "p99": 150.56000649929047 - }, - "roundtrip": { - "p50": 329.24801111221313, - "p90": 333.5680067539215, - "p95": 335.32801270484924, - "p99": 338.7199938297272 - }, - "isolatedSum": { - "p50": 250.87999552488327, - "p90": 257.9199969768524, - "p95": 262.4319940805435, - "p99": 320.8640068769455 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 146.97599411010742, - "p90": 158.9439958333969, - "p95": 163.39200735092163, - "p99": 460.86400747299194 - }, - "combine": { - "p50": 223.80800545215607, - "p90": 229.40799593925476, - "p95": 230.81600666046143, - "p99": 233.40800404548645 - }, - "roundtrip": { - "p50": 523.967981338501, - "p90": 529.1839838027954, - "p95": 530.6879878044128, - "p99": 534.6559882164001 - }, - "isolatedSum": { - "p50": 370.7839995622635, - "p90": 388.3519917726517, - "p95": 394.20801401138306, - "p99": 694.2720115184784 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 215.32799303531647, - "p90": 219.55199539661407, - "p95": 220.7999974489212, - "p99": 223.51999580860138 - }, - "combine": { - "p50": 361.11998558044434, - "p90": 366.5280044078827, - "p95": 367.8719997406006, - "p99": 371.7760145664215 - }, - "roundtrip": { - "p50": 895.6800103187561, - "p90": 901.2479782104492, - "p95": 903.1360149383545, - "p99": 906.8480134010315 - }, - "isolatedSum": { - "p50": 576.4479786157608, - "p90": 586.0799998044968, - "p95": 588.6719971895218, - "p99": 595.2960103750229 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 354.8479974269867, - "p90": 371.8079924583435, - "p95": 374.2400109767914, - "p99": 380.2880048751831 - }, - "combine": { - "p50": 633.0239772796631, - "p90": 642.304003238678, - "p95": 644.927978515625, - "p99": 650.7840156555176 - }, - "roundtrip": { - "p50": 1625.4080533981323, - "p90": 1642.7520513534546, - "p95": 1652.6720523834229, - "p99": 1691.4559602737427 - }, - "isolatedSum": { - "p50": 987.8719747066498, - "p90": 1014.1119956970215, - "p95": 1019.1679894924164, - "p99": 1031.0720205307007 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 641.3760185241699, - "p90": 647.5200057029724, - "p95": 649.5040059089661, - "p99": 655.4880142211914 - }, - "combine": { - "p50": 1148.4800577163696, - "p90": 1157.5679779052734, - "p95": 1160.2239608764648, - "p99": 1163.807988166809 - }, - "roundtrip": { - "p50": 3082.240104675293, - "p90": 3094.0160751342773, - "p95": 3098.112106323242, - "p99": 3109.055995941162 - }, - "isolatedSum": { - "p50": 1789.8560762405396, - "p90": 1805.0879836082458, - "p95": 1809.727966785431, - "p99": 1819.2960023880005 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0ee3ca7d", - "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h100_97196257", - "comparisonKey": "7f26f72cd9fff78c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:56.826066+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271663775", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271663775", - "createdAt": "2026-06-26T23:50:56.826066+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 211.93599700927734, - "p90": 218.33600103855133, - "p95": 220.5120027065277, - "p99": 225.055992603302 - }, - "combine": { - "p50": 97.59999811649323, - "p90": 100.0640019774437, - "p95": 101.85600072145462, - "p99": 104.5759990811348 - }, - "roundtrip": { - "p50": 297.91998863220215, - "p90": 303.9360046386719, - "p95": 306.5600097179413, - "p99": 328.000009059906 - }, - "isolatedSum": { - "p50": 309.53599512577057, - "p90": 318.400003015995, - "p95": 322.36800342798233, - "p99": 329.6319916844368 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 278.3359885215759, - "p90": 284.5759987831116, - "p95": 285.8560085296631, - "p99": 292.03200340270996 - }, - "combine": { - "p50": 141.88799262046814, - "p90": 145.1520025730133, - "p95": 146.88000082969666, - "p99": 151.39199793338776 - }, - "roundtrip": { - "p50": 404.4800102710724, - "p90": 410.7840061187744, - "p95": 413.9519929885864, - "p99": 420.51199078559875 - }, - "isolatedSum": { - "p50": 420.22398114204407, - "p90": 429.7280013561249, - "p95": 432.73600935935974, - "p99": 443.4240013360977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 397.2800076007843, - "p90": 402.8480052947998, - "p95": 405.44000267982483, - "p99": 410.71999073028564 - }, - "combine": { - "p50": 221.02400660514832, - "p90": 225.0880002975464, - "p95": 226.01599991321564, - "p99": 229.50400412082672 - }, - "roundtrip": { - "p50": 601.4400124549866, - "p90": 608.1600189208984, - "p95": 610.4000210762024, - "p99": 616.8000102043152 - }, - "isolatedSum": { - "p50": 618.3040142059326, - "p90": 627.9360055923462, - "p95": 631.4560025930405, - "p99": 640.2239948511124 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 659.5199704170227, - "p90": 663.2959842681885, - "p95": 665.0239825248718, - "p99": 667.2319769859314 - }, - "combine": { - "p50": 360.22400856018066, - "p90": 364.9280071258545, - "p95": 366.3040101528168, - "p99": 369.85599994659424 - }, - "roundtrip": { - "p50": 1002.9439926147461, - "p90": 1008.3839893341064, - "p95": 1010.0159645080566, - "p99": 1013.856053352356 - }, - "isolatedSum": { - "p50": 1019.7439789772034, - "p90": 1028.223991394043, - "p95": 1031.3279926776886, - "p99": 1037.0879769325256 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 1226.9760370254517, - "p90": 1235.1679801940918, - "p95": 1236.8320226669312, - "p99": 1242.143988609314 - }, - "combine": { - "p50": 624.5120167732239, - "p90": 631.8399906158447, - "p95": 634.1120004653931, - "p99": 675.8400201797485 - }, - "roundtrip": { - "p50": 1831.455945968628, - "p90": 1840.831995010376, - "p95": 1843.775987625122, - "p99": 1848.2880592346191 - }, - "isolatedSum": { - "p50": 1851.4880537986755, - "p90": 1867.0079708099365, - "p95": 1870.9440231323242, - "p99": 1917.9840087890625 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 2344.1600799560547, - "p90": 2350.719928741455, - "p95": 2352.9601097106934, - "p99": 2358.0799102783203 - }, - "combine": { - "p50": 1141.4719820022583, - "p90": 1150.9439945220947, - "p95": 1153.7920236587524, - "p99": 1162.592053413391 - }, - "roundtrip": { - "p50": 3469.856023788452, - "p90": 3481.6958904266357, - "p95": 3484.3521118164062, - "p99": 3490.528106689453 - }, - "isolatedSum": { - "p50": 3485.632061958313, - "p90": 3501.66392326355, - "p95": 3506.752133369446, - "p99": 3520.6719636917114 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-560e55e7", - "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|dc27c5e0894e569", - "colorKey": "h100_7f10961a", - "comparisonKey": "6a3a9660e48371b3", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:45:34.307375+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28273218274", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273218274", - "createdAt": "2026-06-27T00:45:34.307375+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 196.79999351501465, - "p90": 203.80799472332, - "p95": 205.79199492931366, - "p99": 214.11199867725372 - }, - "combine": { - "p50": 75.71200281381607, - "p90": 78.5600021481514, - "p95": 80.54400235414505, - "p99": 84.6719965338707 - }, - "roundtrip": { - "p50": 255.64798712730408, - "p90": 264.41600918769836, - "p95": 274.1119861602783, - "p99": 321.9519853591919 - }, - "isolatedSum": { - "p50": 272.5119963288307, - "p90": 282.3679968714714, - "p95": 286.3359972834587, - "p99": 298.7839952111244 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 231.26399517059326, - "p90": 269.6639895439148, - "p95": 272.19200134277344, - "p99": 278.01600098609924 - }, - "combine": { - "p50": 100.99200159311295, - "p90": 109.82400178909302, - "p95": 110.81600189208984, - "p99": 113.3119985461235 - }, - "roundtrip": { - "p50": 315.8720135688782, - "p90": 327.39201188087463, - "p95": 355.679988861084, - "p99": 369.53601241111755 - }, - "isolatedSum": { - "p50": 332.2559967637062, - "p90": 379.4879913330078, - "p95": 383.0080032348633, - "p99": 391.32799953222275 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 309.59999561309814, - "p90": 352.1279990673065, - "p95": 355.00800609588623, - "p99": 361.1519932746887 - }, - "combine": { - "p50": 147.90399372577667, - "p90": 156.99200332164764, - "p95": 158.24000537395477, - "p99": 162.08000481128693 - }, - "roundtrip": { - "p50": 442.4000084400177, - "p90": 483.3280146121979, - "p95": 487.8399968147278, - "p99": 518.4000134468079 - }, - "isolatedSum": { - "p50": 457.5039893388748, - "p90": 509.12000238895416, - "p95": 513.248011469841, - "p99": 523.2319980859756 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 453.98399233818054, - "p90": 459.3600034713745, - "p95": 461.88798546791077, - "p99": 466.623991727829 - }, - "combine": { - "p50": 235.29599606990814, - "p90": 239.00799453258514, - "p95": 240.51199853420258, - "p99": 242.46400594711304 - }, - "roundtrip": { - "p50": 673.3120083808899, - "p90": 678.8480281829834, - "p95": 680.6079745292664, - "p99": 684.544026851654 - }, - "isolatedSum": { - "p50": 689.2799884080887, - "p90": 698.3679980039597, - "p95": 702.3999840021133, - "p99": 709.087997674942 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 784.928023815155, - "p90": 799.8719811439514, - "p95": 803.2000064849854, - "p99": 809.0239763259888 - }, - "combine": { - "p50": 405.4720103740692, - "p90": 416.06399416923523, - "p95": 418.3039963245392, - "p99": 422.4959909915924 - }, - "roundtrip": { - "p50": 1170.1120138168335, - "p90": 1179.58402633667, - "p95": 1183.6479902267456, - "p99": 1192.7679777145386 - }, - "isolatedSum": { - "p50": 1190.4000341892242, - "p90": 1215.9359753131866, - "p95": 1221.5040028095245, - "p99": 1231.5199673175812 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1483.3279848098755, - "p90": 1490.496039390564, - "p95": 1493.6319589614868, - "p99": 1501.5679597854614 - }, - "combine": { - "p50": 732.2880029678345, - "p90": 738.8160228729248, - "p95": 740.8000230789185, - "p99": 745.9840178489685 - }, - "roundtrip": { - "p50": 2199.039936065674, - "p90": 2209.439992904663, - "p95": 2212.5439643859863, - "p99": 2217.087984085083 - }, - "isolatedSum": { - "p50": 2215.61598777771, - "p90": 2229.3120622634888, - "p95": 2234.4319820404053, - "p99": 2247.55197763443 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-de081cfe", - "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_91aa6e56", - "comparisonKey": "e439d265ee12c9f2", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:20.983875+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_03", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254323956", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956", - "createdAt": "2026-06-26T17:30:20.983875+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 89.59999680519104, - "p90": 93.72799843549728, - "p95": 95.36000341176987, - "p99": 100.832000374794 - }, - "combine": { - "p50": 98.14400225877762, - "p90": 100.60799866914749, - "p95": 102.11200267076492, - "p99": 105.0880029797554 - }, - "roundtrip": { - "p50": 215.13600647449493, - "p90": 218.55999529361725, - "p95": 220.12799978256226, - "p99": 228.06400060653687 - }, - "isolatedSum": { - "p50": 187.74399906396866, - "p90": 194.33599710464478, - "p95": 197.4720060825348, - "p99": 205.9200033545494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 106.175996363163, - "p90": 121.47200107574463, - "p95": 122.52800166606903, - "p99": 125.91999769210815 - }, - "combine": { - "p50": 139.48799669742584, - "p90": 146.17599546909332, - "p95": 147.61599898338318, - "p99": 149.82399344444275 - }, - "roundtrip": { - "p50": 320.92800736427307, - "p90": 336.41600608825684, - "p95": 337.92001008987427, - "p99": 341.2800133228302 - }, - "isolatedSum": { - "p50": 245.66399306058884, - "p90": 267.64799654483795, - "p95": 270.1440006494522, - "p99": 275.7439911365509 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 138.46400380134583, - "p90": 182.8799992799759, - "p95": 190.97599387168884, - "p99": 197.28000462055206 - }, - "combine": { - "p50": 208.3200067281723, - "p90": 223.00800681114197, - "p95": 231.83999955654144, - "p99": 242.01600253582 - }, - "roundtrip": { - "p50": 509.69600677490234, - "p90": 521.5680003166199, - "p95": 523.4879851341248, - "p99": 528.9599895477295 - }, - "isolatedSum": { - "p50": 346.7840105295181, - "p90": 405.88800609111786, - "p95": 422.8159934282303, - "p99": 439.29600715637207 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 197.4399983882904, - "p90": 210.87999641895294, - "p95": 213.31200003623962, - "p99": 216.2880003452301 - }, - "combine": { - "p50": 325.82399249076843, - "p90": 330.1120102405548, - "p95": 331.6799998283386, - "p99": 335.80800890922546 - }, - "roundtrip": { - "p50": 847.4879860877991, - "p90": 858.0160140991211, - "p95": 861.0879778862, - "p99": 869.2799806594849 - }, - "isolatedSum": { - "p50": 523.2639908790588, - "p90": 540.9920066595078, - "p95": 544.9919998645782, - "p99": 552.0960092544556 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 318.65599751472473, - "p90": 335.29600501060486, - "p95": 338.0799889564514, - "p99": 347.29599952697754 - }, - "combine": { - "p50": 559.7760081291199, - "p90": 566.815972328186, - "p95": 569.5040225982666, - "p99": 573.311984539032 - }, - "roundtrip": { - "p50": 1524.0000486373901, - "p90": 1544.0640449523926, - "p95": 1550.7839918136597, - "p99": 1576.7359733581543 - }, - "isolatedSum": { - "p50": 878.4320056438446, - "p90": 902.1119773387909, - "p95": 907.584011554718, - "p99": 920.6079840660095 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 574.7519731521606, - "p90": 593.1839942932129, - "p95": 598.1760025024414, - "p99": 604.7999858856201 - }, - "combine": { - "p50": 1025.056004524231, - "p90": 1033.5359573364258, - "p95": 1036.1920595169067, - "p99": 1042.847990989685 - }, - "roundtrip": { - "p50": 2880.863904953003, - "p90": 2894.5279121398926, - "p95": 2899.9040126800537, - "p99": 2908.3518981933594 - }, - "isolatedSum": { - "p50": 1599.8079776763916, - "p90": 1626.7199516296387, - "p95": 1634.3680620193481, - "p99": 1647.6479768753052 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e8c2a4d2", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h100_eddc3af6", - "comparisonKey": "fd73340f2af530d5", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:48.926445+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_19", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254341346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346", - "createdAt": "2026-06-26T17:30:48.926445+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 77.60000228881836, - "p90": 81.4720019698143, - "p95": 83.52000266313553, - "p99": 102.7199998497963 - }, - "combine": { - "p50": 98.08000177145004, - "p90": 102.01600193977356, - "p95": 115.35999923944473, - "p99": 344.0319895744324 - }, - "roundtrip": { - "p50": 205.1520049571991, - "p90": 208.19200575351715, - "p95": 209.85600352287292, - "p99": 214.9440050125122 - }, - "isolatedSum": { - "p50": 175.6800040602684, - "p90": 183.48800390958786, - "p95": 198.88000190258026, - "p99": 446.75198942422867 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 95.74399888515472, - "p90": 110.07999628782272, - "p95": 111.13599687814713, - "p99": 114.81600254774094 - }, - "combine": { - "p50": 141.7279988527298, - "p90": 148.8959938287735, - "p95": 150.4960060119629, - "p99": 153.02400290966034 - }, - "roundtrip": { - "p50": 311.45599484443665, - "p90": 319.5840120315552, - "p95": 321.696013212204, - "p99": 324.67201352119446 - }, - "isolatedSum": { - "p50": 237.47199773788452, - "p90": 258.9759901165962, - "p95": 261.63200289011, - "p99": 267.8400054574013 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 129.56799566745758, - "p90": 144.57599818706512, - "p95": 146.14400267601013, - "p99": 148.8959938287735 - }, - "combine": { - "p50": 213.4079933166504, - "p90": 218.36799383163452, - "p95": 219.7760045528412, - "p99": 224.2240011692047 - }, - "roundtrip": { - "p50": 500.70399045944214, - "p90": 508.1599950790405, - "p95": 510.81597805023193, - "p99": 514.8159861564636 - }, - "isolatedSum": { - "p50": 342.97598898410797, - "p90": 362.94399201869965, - "p95": 365.9200072288513, - "p99": 373.1199949979782 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 186.49600446224213, - "p90": 196.0960030555725, - "p95": 197.50399887561798, - "p99": 202.55999267101288 - }, - "combine": { - "p50": 327.7760148048401, - "p90": 333.18400382995605, - "p95": 334.3679904937744, - "p99": 337.72799372673035 - }, - "roundtrip": { - "p50": 835.2640271186829, - "p90": 841.69602394104, - "p95": 844.0639972686768, - "p99": 848.2879996299744 - }, - "isolatedSum": { - "p50": 514.2720192670822, - "p90": 529.2800068855286, - "p95": 531.8719893693924, - "p99": 540.2879863977432 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 306.62399530410767, - "p90": 320.19200921058655, - "p95": 322.7519989013672, - "p99": 327.1679878234863 - }, - "combine": { - "p50": 559.6479773521423, - "p90": 567.296028137207, - "p95": 570.1119899749756, - "p99": 574.5919942855835 - }, - "roundtrip": { - "p50": 1509.6960067749023, - "p90": 1522.7199792861938, - "p95": 1525.6320238113403, - "p99": 1585.9839916229248 - }, - "isolatedSum": { - "p50": 866.27197265625, - "p90": 887.4880373477936, - "p95": 892.8639888763428, - "p99": 901.7599821090698 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 539.8719906806946, - "p90": 550.7839918136597, - "p95": 555.7119846343994, - "p99": 564.7040009498596 - }, - "combine": { - "p50": 1024.9279737472534, - "p90": 1034.3040227890015, - "p95": 1037.11998462677, - "p99": 1047.0720529556274 - }, - "roundtrip": { - "p50": 2850.719928741455, - "p90": 2861.407995223999, - "p95": 2864.9280071258545, - "p99": 2870.176076889038 - }, - "isolatedSum": { - "p50": 1564.799964427948, - "p90": 1585.0880146026611, - "p95": 1592.8319692611694, - "p99": 1611.776053905487 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f6d2d196", - "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ec72792b", - "comparisonKey": "39b4bc74c45641cb", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:48:09.793091+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_09", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep · fp8 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271576503", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271576503", - "createdAt": "2026-06-26T23:48:09.793091+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 76.73600316047668, - "p90": 80.19199967384338, - "p95": 82.17599987983704, - "p99": 85.4720026254654 - }, - "combine": { - "p50": 98.68799895048141, - "p90": 100.8640006184578, - "p95": 102.84800082445145, - "p99": 113.27999830245972 - }, - "roundtrip": { - "p50": 204.25599813461304, - "p90": 206.84799551963806, - "p95": 208.0959975719452, - "p99": 211.32799983024597 - }, - "isolatedSum": { - "p50": 175.4240021109581, - "p90": 181.05600029230118, - "p95": 185.02400070428848, - "p99": 198.7520009279251 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 95.551997423172, - "p90": 98.65599870681763, - "p95": 100.44799745082855, - "p99": 104.63999956846237 - }, - "combine": { - "p50": 143.51999759674072, - "p90": 146.04799449443817, - "p95": 147.2640037536621, - "p99": 150.07999539375305 - }, - "roundtrip": { - "p50": 317.05600023269653, - "p90": 320.67200541496277, - "p95": 322.07998633384705, - "p99": 325.56799054145813 - }, - "isolatedSum": { - "p50": 239.07199501991272, - "p90": 244.7039932012558, - "p95": 247.71200120449066, - "p99": 254.71999496221542 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 132.4159950017929, - "p90": 136.09600067138672, - "p95": 137.40800321102142, - "p99": 140.19200205802917 - }, - "combine": { - "p50": 224.16000068187714, - "p90": 228.2239943742752, - "p95": 229.312002658844, - "p99": 232.03200101852417 - }, - "roundtrip": { - "p50": 517.5039768218994, - "p90": 522.5920081138611, - "p95": 523.8400101661682, - "p99": 534.1759920120239 - }, - "isolatedSum": { - "p50": 356.57599568367004, - "p90": 364.3199950456619, - "p95": 366.7200058698654, - "p99": 372.22400307655334 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 203.5199999809265, - "p90": 207.39200711250305, - "p95": 208.95999670028687, - "p99": 213.1199985742569 - }, - "combine": { - "p50": 359.0719997882843, - "p90": 364.25599455833435, - "p95": 365.4080033302307, - "p99": 367.35999584198 - }, - "roundtrip": { - "p50": 883.679986000061, - "p90": 889.6960020065308, - "p95": 891.5839791297913, - "p99": 897.7599740028381 - }, - "isolatedSum": { - "p50": 562.5919997692108, - "p90": 571.6480016708374, - "p95": 574.3680000305176, - "p99": 580.4799944162369 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 341.0240113735199, - "p90": 352.9280126094818, - "p95": 354.7840118408203, - "p99": 361.31200194358826 - }, - "combine": { - "p50": 631.2000155448914, - "p90": 639.136016368866, - "p95": 641.5359973907471, - "p99": 644.1599726676941 - }, - "roundtrip": { - "p50": 1616.5440082550049, - "p90": 1624.9920129776, - "p95": 1627.3599863052368, - "p99": 1631.9680213928223 - }, - "isolatedSum": { - "p50": 972.2240269184113, - "p90": 992.0640289783478, - "p95": 996.3200092315674, - "p99": 1005.4719746112823 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 611.6160154342651, - "p90": 621.0240125656128, - "p95": 624.0959763526917, - "p99": 790.3040051460266 - }, - "combine": { - "p50": 1165.503978729248, - "p90": 1175.487995147705, - "p95": 1177.664041519165, - "p99": 1188.9280080795288 - }, - "roundtrip": { - "p50": 3078.4640312194824, - "p90": 3095.8399772644043, - "p95": 3103.071928024292, - "p99": 3115.9679889678955 - }, - "isolatedSum": { - "p50": 1777.1199941635132, - "p90": 1796.5120077133179, - "p95": 1801.7600178718567, - "p99": 1979.2320132255554 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0f748c2f", - "identity": "h100|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_ec8c28a9", - "comparisonKey": "04d8dc12f0898400", - "schemaVersion": 3, - "generatedAt": "2026-06-28T02:32:47.489418+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_08", - "sku": "h100", - "backend": "deepep-hybrid", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep-hybrid · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "hybrid-e0a5b1d", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28308875809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308875809", - "createdAt": "2026-06-28T02:32:47.489418+00:00", - "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 167.1680063009262, - "p90": 219.7120040655136, - "p95": 222.01600670814514, - "p99": 227.84000635147095 - }, - "combine": { - "p50": 36.896001547575, - "p90": 52.2879995405674, - "p95": 52.799999713897705, - "p99": 57.34400078654289 - }, - "roundtrip": { - "p50": 195.3279972076416, - "p90": 256.76798820495605, - "p95": 260.51199436187744, - "p99": 266.2079930305481 - }, - "isolatedSum": { - "p50": 204.0640078485012, - "p90": 272.000003606081, - "p95": 274.81600642204285, - "p99": 285.18400713801384 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 165.27999937534332, - "p90": 173.08799922466278, - "p95": 185.98400056362152, - "p99": 224.48000311851501 - }, - "combine": { - "p50": 35.93600168824196, - "p90": 39.264000952243805, - "p95": 42.047999799251556, - "p99": 47.93599992990494 - }, - "roundtrip": { - "p50": 193.34399700164795, - "p90": 199.64799284934998, - "p95": 202.72000133991241, - "p99": 207.58399367332458 - }, - "isolatedSum": { - "p50": 201.21600106358528, - "p90": 212.35200017690659, - "p95": 228.03200036287308, - "p99": 272.41600304841995 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 166.97600483894348, - "p90": 218.55999529361725, - "p95": 221.72799706459045, - "p99": 226.17599368095398 - }, - "combine": { - "p50": 39.64800015091896, - "p90": 52.15999856591225, - "p95": 52.76799947023392, - "p99": 55.93600124120712 - }, - "roundtrip": { - "p50": 195.0400024652481, - "p90": 255.90398907661438, - "p95": 258.432000875473, - "p99": 266.1759853363037 - }, - "isolatedSum": { - "p50": 206.62400498986244, - "p90": 270.7199938595295, - "p95": 274.49599653482437, - "p99": 282.1119949221611 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 168.44800114631653, - "p90": 221.02400660514832, - "p95": 223.39199483394623, - "p99": 229.34399545192719 - }, - "combine": { - "p50": 39.744000881910324, - "p90": 52.352000027894974, - "p95": 53.18399891257286, - "p99": 58.079998940229416 - }, - "roundtrip": { - "p50": 195.77600061893463, - "p90": 259.68000292778015, - "p95": 262.14399933815, - "p99": 267.64801144599915 - }, - "isolatedSum": { - "p50": 208.19200202822685, - "p90": 273.3760066330433, - "p95": 276.5759937465191, - "p99": 287.4239943921566 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 180.28800189495087, - "p90": 222.1119999885559, - "p95": 225.37599503993988, - "p99": 237.0239943265915 - }, - "combine": { - "p50": 42.94399917125702, - "p90": 53.727999329566956, - "p95": 57.0559985935688, - "p99": 143.96800100803375 - }, - "roundtrip": { - "p50": 211.07199788093567, - "p90": 258.84801149368286, - "p95": 261.85598969459534, - "p99": 270.7520127296448 - }, - "isolatedSum": { - "p50": 223.23200106620789, - "p90": 275.83999931812286, - "p95": 282.4319936335087, - "p99": 380.99199533462524 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 205.4399996995926, - "p90": 242.5599992275238, - "p95": 244.89599466323853, - "p99": 249.31199848651886 - }, - "combine": { - "p50": 45.21600157022476, - "p90": 54.55999821424484, - "p95": 55.48800155520439, - "p99": 59.13599953055382 - }, - "roundtrip": { - "p50": 241.66400730609894, - "p90": 279.9679934978485, - "p95": 282.20799565315247, - "p99": 286.3680124282837 - }, - "isolatedSum": { - "p50": 250.65600126981735, - "p90": 297.11999744176865, - "p95": 300.3839962184429, - "p99": 308.4479980170727 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 258.36798548698425, - "p90": 366.784006357193, - "p95": 370.36800384521484, - "p99": 381.98399543762207 - }, - "combine": { - "p50": 56.352000683546066, - "p90": 67.29599833488464, - "p95": 68.09599697589874, - "p99": 72.9919970035553 - }, - "roundtrip": { - "p50": 305.88799715042114, - "p90": 344.9920117855072, - "p95": 346.78399562835693, - "p99": 349.8559892177582 - }, - "isolatedSum": { - "p50": 314.7199861705303, - "p90": 434.08000469207764, - "p95": 438.4640008211136, - "p99": 454.97599244117737 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 262.0159983634949, - "p90": 300.86401104927063, - "p95": 303.99999022483826, - "p99": 463.1359875202179 - }, - "combine": { - "p50": 69.85600292682648, - "p90": 80.73599636554718, - "p95": 81.53600245714188, - "p99": 84.95999872684479 - }, - "roundtrip": { - "p50": 325.0240087509155, - "p90": 364.4160032272339, - "p95": 366.36799573898315, - "p99": 370.11200189590454 - }, - "isolatedSum": { - "p50": 331.87200129032135, - "p90": 381.6000074148178, - "p95": 385.53599268198013, - "p99": 548.0959862470627 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-402bdadc", - "identity": "h100|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_ec8c28a9", - "comparisonKey": "2d8d821b3680de8a", - "schemaVersion": 3, - "generatedAt": "2026-06-28T02:32:51.441168+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "deepep-hybrid", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · deepep-hybrid · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "hybrid-e0a5b1d", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28308875809", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308875809", - "createdAt": "2026-06-28T02:32:51.441168+00:00", - "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 257.79199600219727, - "p90": 262.65600323677063, - "p95": 264.95999097824097, - "p99": 272.2240090370178 - }, - "combine": { - "p50": 69.21599805355072, - "p90": 71.19999825954437, - "p95": 73.27999919652939, - "p99": 79.83999699354172 - }, - "roundtrip": { - "p50": 320.47998905181885, - "p90": 324.3519961833954, - "p95": 327.07199454307556, - "p99": 332.3200047016144 - }, - "isolatedSum": { - "p50": 327.007994055748, - "p90": 333.856001496315, - "p95": 338.23999017477036, - "p99": 352.06400603055954 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 269.6320116519928, - "p90": 274.6559977531433, - "p95": 276.70401334762573, - "p99": 282.6240062713623 - }, - "combine": { - "p50": 104.51199859380722, - "p90": 106.59199953079224, - "p95": 107.45599865913391, - "p99": 110.36799848079681 - }, - "roundtrip": { - "p50": 368.3199882507324, - "p90": 372.79999256134033, - "p95": 375.0399947166443, - "p99": 377.85598635673523 - }, - "isolatedSum": { - "p50": 374.1440102458, - "p90": 381.24799728393555, - "p95": 384.16001200675964, - "p99": 392.9920047521591 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 293.15200448036194, - "p90": 298.5279858112335, - "p95": 300.7360100746155, - "p99": 305.9839904308319 - }, - "combine": { - "p50": 172.31999337673187, - "p90": 174.78400468826294, - "p95": 175.9680062532425, - "p99": 179.1680008172989 - }, - "roundtrip": { - "p50": 464.4159972667694, - "p90": 468.8960015773773, - "p95": 470.8159863948822, - "p99": 480.76799511909485 - }, - "isolatedSum": { - "p50": 465.4719978570938, - "p90": 473.31199049949646, - "p95": 476.70401632785797, - "p99": 485.1519912481308 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 448.0000138282776, - "p90": 458.8800072669983, - "p95": 461.1839950084686, - "p99": 468.4799909591675 - }, - "combine": { - "p50": 299.1679906845093, - "p90": 301.7280101776123, - "p95": 302.4959862232208, - "p99": 305.6960105895996 - }, - "roundtrip": { - "p50": 749.9840259552002, - "p90": 761.568009853363, - "p95": 765.2480006217957, - "p99": 789.8880243301392 - }, - "isolatedSum": { - "p50": 747.1680045127869, - "p90": 760.6080174446106, - "p95": 763.6799812316895, - "p99": 774.1760015487671 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 729.3440103530884, - "p90": 734.2399954795837, - "p95": 737.824022769928, - "p99": 864.9600148200989 - }, - "combine": { - "p50": 555.8080077171326, - "p90": 558.6240291595459, - "p95": 559.935986995697, - "p99": 565.7600164413452 - }, - "roundtrip": { - "p50": 1285.599946975708, - "p90": 1290.560007095337, - "p95": 1292.7680015563965, - "p99": 1297.9520559310913 - }, - "isolatedSum": { - "p50": 1285.152018070221, - "p90": 1292.8640246391296, - "p95": 1297.760009765625, - "p99": 1430.720031261444 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1308.0320358276367, - "p90": 1330.7839632034302, - "p95": 1333.2480192184448, - "p99": 1338.43195438385 - }, - "combine": { - "p50": 1069.3119764328003, - "p90": 1073.2159614562988, - "p95": 1074.7519731521606, - "p99": 1078.3040523529053 - }, - "roundtrip": { - "p50": 2376.9280910491943, - "p90": 2398.9760875701904, - "p95": 2401.3121128082275, - "p99": 2405.503988265991 - }, - "isolatedSum": { - "p50": 2377.344012260437, - "p90": 2403.999924659729, - "p95": 2407.9999923706055, - "p99": 2416.7360067367554 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f1858975", - "identity": "h100|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_6c33dc8f", - "comparisonKey": "5205049e72237a92", - "schemaVersion": 3, - "generatedAt": "2026-06-27T17:24:08.744102+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "flashinfer", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · flashinfer · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28296376857", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296376857", - "createdAt": "2026-06-27T17:24:08.744102+00:00", - "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 99.35999661684036, - "p90": 103.29599678516388, - "p95": 105.53599894046783, - "p99": 110.81600189208984 - }, - "combine": { - "p50": 99.35999661684036, - "p90": 103.29599678516388, - "p95": 105.53599894046783, - "p99": 110.81600189208984 - }, - "roundtrip": { - "p50": 99.35999661684036, - "p90": 103.29599678516388, - "p95": 105.53599894046783, - "p99": 110.81600189208984 - }, - "isolatedSum": { - "p50": 198.71999323368073, - "p90": 206.59199357032776, - "p95": 211.07199788093567, - "p99": 221.6320037841797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 99.20000284910202, - "p90": 102.78400033712387, - "p95": 105.53599894046783, - "p99": 109.63200032711029 - }, - "combine": { - "p50": 99.20000284910202, - "p90": 102.78400033712387, - "p95": 105.53599894046783, - "p99": 109.63200032711029 - }, - "roundtrip": { - "p50": 99.20000284910202, - "p90": 102.78400033712387, - "p95": 105.53599894046783, - "p99": 109.63200032711029 - }, - "isolatedSum": { - "p50": 198.40000569820404, - "p90": 205.56800067424774, - "p95": 211.07199788093567, - "p99": 219.26400065422058 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 96.96000069379807, - "p90": 101.08800232410431, - "p95": 103.42399775981903, - "p99": 108.86400192975998 - }, - "combine": { - "p50": 96.96000069379807, - "p90": 101.08800232410431, - "p95": 103.42399775981903, - "p99": 108.86400192975998 - }, - "roundtrip": { - "p50": 96.96000069379807, - "p90": 101.08800232410431, - "p95": 103.42399775981903, - "p99": 108.86400192975998 - }, - "isolatedSum": { - "p50": 193.92000138759613, - "p90": 202.17600464820862, - "p95": 206.84799551963806, - "p99": 217.72800385951996 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 98.68799895048141, - "p90": 102.30399668216705, - "p95": 104.25599664449692, - "p99": 109.21599715948105 - }, - "combine": { - "p50": 98.68799895048141, - "p90": 102.30399668216705, - "p95": 104.25599664449692, - "p99": 109.21599715948105 - }, - "roundtrip": { - "p50": 98.68799895048141, - "p90": 102.30399668216705, - "p95": 104.25599664449692, - "p99": 109.21599715948105 - }, - "isolatedSum": { - "p50": 197.37599790096283, - "p90": 204.6079933643341, - "p95": 208.51199328899384, - "p99": 218.4319943189621 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 96.54399752616882, - "p90": 101.72799974679947, - "p95": 102.94400155544281, - "p99": 107.42399841547012 - }, - "combine": { - "p50": 96.54399752616882, - "p90": 101.72799974679947, - "p95": 102.94400155544281, - "p99": 107.42399841547012 - }, - "roundtrip": { - "p50": 96.54399752616882, - "p90": 101.72799974679947, - "p95": 102.94400155544281, - "p99": 107.42399841547012 - }, - "isolatedSum": { - "p50": 193.08799505233765, - "p90": 203.45599949359894, - "p95": 205.88800311088562, - "p99": 214.84799683094025 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 97.72799909114838, - "p90": 101.6639992594719, - "p95": 105.0880029797554, - "p99": 111.84000223875046 - }, - "combine": { - "p50": 97.72799909114838, - "p90": 101.6639992594719, - "p95": 105.0880029797554, - "p99": 111.84000223875046 - }, - "roundtrip": { - "p50": 97.72799909114838, - "p90": 101.6639992594719, - "p95": 105.0880029797554, - "p99": 111.84000223875046 - }, - "isolatedSum": { - "p50": 195.45599818229675, - "p90": 203.3279985189438, - "p95": 210.1760059595108, - "p99": 223.68000447750092 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 98.4639972448349, - "p90": 104.44799810647964, - "p95": 122.75200337171555, - "p99": 401.5359878540039 - }, - "combine": { - "p50": 98.4639972448349, - "p90": 104.44799810647964, - "p95": 122.75200337171555, - "p99": 401.5359878540039 - }, - "roundtrip": { - "p50": 98.4639972448349, - "p90": 104.44799810647964, - "p95": 122.75200337171555, - "p99": 401.5359878540039 - }, - "isolatedSum": { - "p50": 196.9279944896698, - "p90": 208.8959962129593, - "p95": 245.5040067434311, - "p99": 803.0719757080078 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 110.62400043010712, - "p90": 115.84000289440155, - "p95": 118.23999881744385, - "p99": 130.52800297737122 - }, - "combine": { - "p50": 110.62400043010712, - "p90": 115.84000289440155, - "p95": 118.23999881744385, - "p99": 130.52800297737122 - }, - "roundtrip": { - "p50": 110.62400043010712, - "p90": 115.84000289440155, - "p95": 118.23999881744385, - "p99": 130.52800297737122 - }, - "isolatedSum": { - "p50": 221.24800086021423, - "p90": 231.6800057888031, - "p95": 236.4799976348877, - "p99": 261.05600595474243 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-236b5900", - "identity": "h100|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_865f08c6", - "comparisonKey": "63f2ed34d1d8c7db", - "schemaVersion": 3, - "generatedAt": "2026-06-28T01:38:24.466545+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_15", - "sku": "h100", - "backend": "flashinfer", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · flashinfer · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28307778986", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307778986", - "createdAt": "2026-06-28T01:38:24.466545+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 109.11999642848969, - "p90": 113.8560026884079, - "p95": 116.19199812412262, - "p99": 121.34400010108948 - }, - "combine": { - "p50": 109.11999642848969, - "p90": 113.8560026884079, - "p95": 116.19199812412262, - "p99": 121.34400010108948 - }, - "roundtrip": { - "p50": 109.11999642848969, - "p90": 113.8560026884079, - "p95": 116.19199812412262, - "p99": 121.34400010108948 - }, - "isolatedSum": { - "p50": 218.23999285697937, - "p90": 227.7120053768158, - "p95": 232.38399624824524, - "p99": 242.68800020217896 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 108.96000266075134, - "p90": 116.80000275373459, - "p95": 122.30399996042252, - "p99": 135.3279948234558 - }, - "combine": { - "p50": 108.96000266075134, - "p90": 116.80000275373459, - "p95": 122.30399996042252, - "p99": 135.3279948234558 - }, - "roundtrip": { - "p50": 108.96000266075134, - "p90": 116.80000275373459, - "p95": 122.30399996042252, - "p99": 135.3279948234558 - }, - "isolatedSum": { - "p50": 217.92000532150269, - "p90": 233.60000550746918, - "p95": 244.60799992084503, - "p99": 270.6559896469116 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 109.18399691581726, - "p90": 115.99999666213989, - "p95": 119.55200135707855, - "p99": 376.6399919986725 - }, - "combine": { - "p50": 109.18399691581726, - "p90": 115.99999666213989, - "p95": 119.55200135707855, - "p99": 376.6399919986725 - }, - "roundtrip": { - "p50": 109.18399691581726, - "p90": 115.99999666213989, - "p95": 119.55200135707855, - "p99": 376.6399919986725 - }, - "isolatedSum": { - "p50": 218.36799383163452, - "p90": 231.99999332427979, - "p95": 239.1040027141571, - "p99": 753.279983997345 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 108.70400071144104, - "p90": 114.30399864912033, - "p95": 116.12799763679504, - "p99": 120.64000219106674 - }, - "combine": { - "p50": 108.70400071144104, - "p90": 114.30399864912033, - "p95": 116.12799763679504, - "p99": 120.64000219106674 - }, - "roundtrip": { - "p50": 108.70400071144104, - "p90": 114.30399864912033, - "p95": 116.12799763679504, - "p99": 120.64000219106674 - }, - "isolatedSum": { - "p50": 217.40800142288208, - "p90": 228.60799729824066, - "p95": 232.2559952735901, - "p99": 241.28000438213348 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 108.86400192975998, - "p90": 114.656001329422, - "p95": 119.03999745845795, - "p99": 151.19999647140503 - }, - "combine": { - "p50": 108.86400192975998, - "p90": 114.656001329422, - "p95": 119.03999745845795, - "p99": 151.19999647140503 - }, - "roundtrip": { - "p50": 108.86400192975998, - "p90": 114.656001329422, - "p95": 119.03999745845795, - "p99": 151.19999647140503 - }, - "isolatedSum": { - "p50": 217.72800385951996, - "p90": 229.312002658844, - "p95": 238.0799949169159, - "p99": 302.39999294281006 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 109.21599715948105, - "p90": 121.18399888277054, - "p95": 127.3919939994812, - "p99": 205.56800067424774 - }, - "combine": { - "p50": 109.21599715948105, - "p90": 121.18399888277054, - "p95": 127.3919939994812, - "p99": 205.56800067424774 - }, - "roundtrip": { - "p50": 109.21599715948105, - "p90": 121.18399888277054, - "p95": 127.3919939994812, - "p99": 205.56800067424774 - }, - "isolatedSum": { - "p50": 218.4319943189621, - "p90": 242.36799776554108, - "p95": 254.7839879989624, - "p99": 411.1360013484955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 110.97600311040878, - "p90": 115.93600362539291, - "p95": 118.27199906110764, - "p99": 126.88000500202179 - }, - "combine": { - "p50": 110.97600311040878, - "p90": 115.93600362539291, - "p95": 118.27199906110764, - "p99": 126.88000500202179 - }, - "roundtrip": { - "p50": 110.97600311040878, - "p90": 115.93600362539291, - "p95": 118.27199906110764, - "p99": 126.88000500202179 - }, - "isolatedSum": { - "p50": 221.95200622081757, - "p90": 231.87200725078583, - "p95": 236.54399812221527, - "p99": 253.76001000404358 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.41600114107132, - "p90": 129.05600666999817, - "p95": 130.5599957704544, - "p99": 136.4479959011078 - }, - "combine": { - "p50": 124.41600114107132, - "p90": 129.05600666999817, - "p95": 130.5599957704544, - "p99": 136.4479959011078 - }, - "roundtrip": { - "p50": 124.41600114107132, - "p90": 129.05600666999817, - "p95": 130.5599957704544, - "p99": 136.4479959011078 - }, - "isolatedSum": { - "p50": 248.83200228214264, - "p90": 258.11201333999634, - "p95": 261.1199915409088, - "p99": 272.8959918022156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0d201725", - "identity": "h100|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_1686fbdd", - "comparisonKey": "27114da636b19722", - "schemaVersion": 3, - "generatedAt": "2026-06-28T01:37:57.511914+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_17", - "sku": "h100", - "backend": "flashinfer", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · flashinfer · mxfp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "mxfp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28307780015", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307780015", - "createdAt": "2026-06-28T01:37:57.511914+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 109.56799983978271, - "p90": 113.98400366306305, - "p95": 117.85600334405899, - "p99": 129.72800433635712 - }, - "combine": { - "p50": 109.56799983978271, - "p90": 113.98400366306305, - "p95": 117.85600334405899, - "p99": 129.72800433635712 - }, - "roundtrip": { - "p50": 109.56799983978271, - "p90": 113.98400366306305, - "p95": 117.85600334405899, - "p99": 129.72800433635712 - }, - "isolatedSum": { - "p50": 219.13599967956543, - "p90": 227.9680073261261, - "p95": 235.71200668811798, - "p99": 259.45600867271423 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 109.11999642848969, - "p90": 113.27999830245972, - "p95": 115.35999923944473, - "p99": 119.84000355005264 - }, - "combine": { - "p50": 109.11999642848969, - "p90": 113.27999830245972, - "p95": 115.35999923944473, - "p99": 119.84000355005264 - }, - "roundtrip": { - "p50": 109.11999642848969, - "p90": 113.27999830245972, - "p95": 115.35999923944473, - "p99": 119.84000355005264 - }, - "isolatedSum": { - "p50": 218.23999285697937, - "p90": 226.55999660491943, - "p95": 230.71999847888947, - "p99": 239.68000710010529 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 108.99200290441513, - "p90": 112.99200356006622, - "p95": 116.64000153541565, - "p99": 122.36800044775009 - }, - "combine": { - "p50": 108.99200290441513, - "p90": 112.99200356006622, - "p95": 116.64000153541565, - "p99": 122.36800044775009 - }, - "roundtrip": { - "p50": 108.99200290441513, - "p90": 112.99200356006622, - "p95": 116.64000153541565, - "p99": 122.36800044775009 - }, - "isolatedSum": { - "p50": 217.98400580883026, - "p90": 225.98400712013245, - "p95": 233.2800030708313, - "p99": 244.73600089550018 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 108.67200046777725, - "p90": 112.92800307273865, - "p95": 115.1999980211258, - "p99": 121.79200351238251 - }, - "combine": { - "p50": 108.67200046777725, - "p90": 112.92800307273865, - "p95": 115.1999980211258, - "p99": 121.79200351238251 - }, - "roundtrip": { - "p50": 108.67200046777725, - "p90": 112.92800307273865, - "p95": 115.1999980211258, - "p99": 121.79200351238251 - }, - "isolatedSum": { - "p50": 217.3440009355545, - "p90": 225.8560061454773, - "p95": 230.3999960422516, - "p99": 243.58400702476501 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 108.96000266075134, - "p90": 113.47199976444244, - "p95": 116.80000275373459, - "p99": 125.40799379348755 - }, - "combine": { - "p50": 108.96000266075134, - "p90": 113.47199976444244, - "p95": 116.80000275373459, - "p99": 125.40799379348755 - }, - "roundtrip": { - "p50": 108.96000266075134, - "p90": 113.47199976444244, - "p95": 116.80000275373459, - "p99": 125.40799379348755 - }, - "isolatedSum": { - "p50": 217.92000532150269, - "p90": 226.9439995288849, - "p95": 233.60000550746918, - "p99": 250.8159875869751 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 108.86400192975998, - "p90": 113.21599781513214, - "p95": 116.60800129175186, - "p99": 119.00799721479416 - }, - "combine": { - "p50": 108.86400192975998, - "p90": 113.21599781513214, - "p95": 116.60800129175186, - "p99": 119.00799721479416 - }, - "roundtrip": { - "p50": 108.86400192975998, - "p90": 113.21599781513214, - "p95": 116.60800129175186, - "p99": 119.00799721479416 - }, - "isolatedSum": { - "p50": 217.72800385951996, - "p90": 226.43199563026428, - "p95": 233.21600258350372, - "p99": 238.01599442958832 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 110.33599823713303, - "p90": 115.42399972677231, - "p95": 118.78400295972824, - "p99": 129.88799810409546 - }, - "combine": { - "p50": 110.33599823713303, - "p90": 115.42399972677231, - "p95": 118.78400295972824, - "p99": 129.88799810409546 - }, - "roundtrip": { - "p50": 110.33599823713303, - "p90": 115.42399972677231, - "p95": 118.78400295972824, - "p99": 129.88799810409546 - }, - "isolatedSum": { - "p50": 220.67199647426605, - "p90": 230.84799945354462, - "p95": 237.56800591945648, - "p99": 259.7759962081909 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.84799665212631, - "p90": 128.28800082206726, - "p95": 130.048006772995, - "p99": 133.08799266815186 - }, - "combine": { - "p50": 122.84799665212631, - "p90": 128.28800082206726, - "p95": 130.048006772995, - "p99": 133.08799266815186 - }, - "roundtrip": { - "p50": 122.84799665212631, - "p90": 128.28800082206726, - "p95": 130.048006772995, - "p99": 133.08799266815186 - }, - "isolatedSum": { - "p50": 245.69599330425262, - "p90": 256.5760016441345, - "p95": 260.09601354599, - "p99": 266.1759853363037 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6fee4962", - "identity": "h100|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_6c33dc8f", - "comparisonKey": "ab2d6ab146526e25", - "schemaVersion": 3, - "generatedAt": "2026-06-27T17:55:14.883072+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_16", - "sku": "h100", - "backend": "flashinfer", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · flashinfer · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28297139240", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28297139240", - "createdAt": "2026-06-27T17:55:14.883072+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 88.92799913883209, - "p90": 92.38400310277939, - "p95": 93.66399794816971, - "p99": 97.50399738550186 - }, - "combine": { - "p50": 88.92799913883209, - "p90": 92.38400310277939, - "p95": 93.66399794816971, - "p99": 97.50399738550186 - }, - "roundtrip": { - "p50": 88.92799913883209, - "p90": 92.38400310277939, - "p95": 93.66399794816971, - "p99": 97.50399738550186 - }, - "isolatedSum": { - "p50": 177.85599827766418, - "p90": 184.76800620555878, - "p95": 187.32799589633942, - "p99": 195.00799477100372 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 149.9519944190979, - "p90": 152.51199901103973, - "p95": 153.50399911403656, - "p99": 157.56799280643463 - }, - "combine": { - "p50": 149.9519944190979, - "p90": 152.51199901103973, - "p95": 153.50399911403656, - "p99": 157.56799280643463 - }, - "roundtrip": { - "p50": 149.9519944190979, - "p90": 152.51199901103973, - "p95": 153.50399911403656, - "p99": 157.56799280643463 - }, - "isolatedSum": { - "p50": 299.9039888381958, - "p90": 305.02399802207947, - "p95": 307.0079982280731, - "p99": 315.13598561286926 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 264.70398902893066, - "p90": 268.38400959968567, - "p95": 269.9199914932251, - "p99": 279.07198667526245 - }, - "combine": { - "p50": 264.70398902893066, - "p90": 268.38400959968567, - "p95": 269.9199914932251, - "p99": 279.07198667526245 - }, - "roundtrip": { - "p50": 264.70398902893066, - "p90": 268.38400959968567, - "p95": 269.9199914932251, - "p99": 279.07198667526245 - }, - "isolatedSum": { - "p50": 529.4079780578613, - "p90": 536.7680191993713, - "p95": 539.8399829864502, - "p99": 558.1439733505249 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 498.6239969730377, - "p90": 502.4319887161255, - "p95": 504.2240023612976, - "p99": 506.9440007209778 - }, - "combine": { - "p50": 498.6239969730377, - "p90": 502.4319887161255, - "p95": 504.2240023612976, - "p99": 506.9440007209778 - }, - "roundtrip": { - "p50": 498.6239969730377, - "p90": 502.4319887161255, - "p95": 504.2240023612976, - "p99": 506.9440007209778 - }, - "isolatedSum": { - "p50": 997.2479939460754, - "p90": 1004.863977432251, - "p95": 1008.4480047225952, - "p99": 1013.8880014419556 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 953.6640048027039, - "p90": 962.0800018310547, - "p95": 964.2559885978699, - "p99": 967.9039716720581 - }, - "combine": { - "p50": 953.6640048027039, - "p90": 962.0800018310547, - "p95": 964.2559885978699, - "p99": 967.9039716720581 - }, - "roundtrip": { - "p50": 953.6640048027039, - "p90": 962.0800018310547, - "p95": 964.2559885978699, - "p99": 967.9039716720581 - }, - "isolatedSum": { - "p50": 1907.3280096054077, - "p90": 1924.1600036621094, - "p95": 1928.5119771957397, - "p99": 1935.8079433441162 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1854.8799753189087, - "p90": 1862.4320030212402, - "p95": 1864.2560243606567, - "p99": 1869.7919845581055 - }, - "combine": { - "p50": 1854.8799753189087, - "p90": 1862.4320030212402, - "p95": 1864.2560243606567, - "p99": 1869.7919845581055 - }, - "roundtrip": { - "p50": 1854.8799753189087, - "p90": 1862.4320030212402, - "p95": 1864.2560243606567, - "p99": 1869.7919845581055 - }, - "isolatedSum": { - "p50": 3709.7599506378174, - "p90": 3724.8640060424805, - "p95": 3728.5120487213135, - "p99": 3739.583969116211 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6d37a6fd", - "identity": "h100|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_865f08c6", - "comparisonKey": "7ac85b4ec0b69909", - "schemaVersion": 3, - "generatedAt": "2026-06-28T01:37:55.644705+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_00", - "sku": "h100", - "backend": "flashinfer", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · flashinfer · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28307778986", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307778986", - "createdAt": "2026-06-28T01:37:55.644705+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 101.15200281143188, - "p90": 120.99199742078781, - "p95": 122.17599898576736, - "p99": 127.87200510501862 - }, - "combine": { - "p50": 101.15200281143188, - "p90": 120.99199742078781, - "p95": 122.17599898576736, - "p99": 127.87200510501862 - }, - "roundtrip": { - "p50": 101.15200281143188, - "p90": 120.99199742078781, - "p95": 122.17599898576736, - "p99": 127.87200510501862 - }, - "isolatedSum": { - "p50": 202.30400562286377, - "p90": 241.98399484157562, - "p95": 244.35199797153473, - "p99": 255.74401021003723 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 139.5840048789978, - "p90": 168.60799491405487, - "p95": 170.97599804401398, - "p99": 179.1359931230545 - }, - "combine": { - "p50": 139.5840048789978, - "p90": 168.60799491405487, - "p95": 170.97599804401398, - "p99": 179.1359931230545 - }, - "roundtrip": { - "p50": 139.5840048789978, - "p90": 168.60799491405487, - "p95": 170.97599804401398, - "p99": 179.1359931230545 - }, - "isolatedSum": { - "p50": 279.1680097579956, - "p90": 337.21598982810974, - "p95": 341.95199608802795, - "p99": 358.271986246109 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 221.66399657726288, - "p90": 232.7360063791275, - "p95": 234.46400463581085, - "p99": 239.00799453258514 - }, - "combine": { - "p50": 221.66399657726288, - "p90": 232.7360063791275, - "p95": 234.46400463581085, - "p99": 239.00799453258514 - }, - "roundtrip": { - "p50": 221.66399657726288, - "p90": 232.7360063791275, - "p95": 234.46400463581085, - "p99": 239.00799453258514 - }, - "isolatedSum": { - "p50": 443.32799315452576, - "p90": 465.472012758255, - "p95": 468.9280092716217, - "p99": 478.0159890651703 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 406.43200278282166, - "p90": 415.19999504089355, - "p95": 416.9920086860657, - "p99": 419.74401473999023 - }, - "combine": { - "p50": 406.43200278282166, - "p90": 415.19999504089355, - "p95": 416.9920086860657, - "p99": 419.74401473999023 - }, - "roundtrip": { - "p50": 406.43200278282166, - "p90": 415.19999504089355, - "p95": 416.9920086860657, - "p99": 419.74401473999023 - }, - "isolatedSum": { - "p50": 812.8640055656433, - "p90": 830.3999900817871, - "p95": 833.9840173721313, - "p99": 839.4880294799805 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 764.959990978241, - "p90": 773.6319899559021, - "p95": 775.5839824676514, - "p99": 795.3600287437439 - }, - "combine": { - "p50": 764.959990978241, - "p90": 773.6319899559021, - "p95": 775.5839824676514, - "p99": 795.3600287437439 - }, - "roundtrip": { - "p50": 764.959990978241, - "p90": 773.6319899559021, - "p95": 775.5839824676514, - "p99": 795.3600287437439 - }, - "isolatedSum": { - "p50": 1529.919981956482, - "p90": 1547.2639799118042, - "p95": 1551.1679649353027, - "p99": 1590.7200574874878 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1479.904055595398, - "p90": 1490.8479452133179, - "p95": 1496.7039823532104, - "p99": 1506.6879987716675 - }, - "combine": { - "p50": 1479.904055595398, - "p90": 1490.8479452133179, - "p95": 1496.7039823532104, - "p99": 1506.6879987716675 - }, - "roundtrip": { - "p50": 1479.904055595398, - "p90": 1490.8479452133179, - "p95": 1496.7039823532104, - "p99": 1506.6879987716675 - }, - "isolatedSum": { - "p50": 2959.808111190796, - "p90": 2981.6958904266357, - "p95": 2993.407964706421, - "p99": 3013.375997543335 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-00728192", - "identity": "h100|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_1686fbdd", - "comparisonKey": "f82129f37146e350", - "schemaVersion": 3, - "generatedAt": "2026-06-28T01:37:59.531491+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_18", - "sku": "h100", - "backend": "flashinfer", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · flashinfer · mxfp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "mxfp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28307780015", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307780015", - "createdAt": "2026-06-28T01:37:59.531491+00:00", - "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 100.19200295209885, - "p90": 127.6479959487915, - "p95": 128.7039965391159, - "p99": 135.68000495433807 - }, - "combine": { - "p50": 100.19200295209885, - "p90": 127.6479959487915, - "p95": 128.7039965391159, - "p99": 135.68000495433807 - }, - "roundtrip": { - "p50": 100.19200295209885, - "p90": 127.6479959487915, - "p95": 128.7039965391159, - "p99": 135.68000495433807 - }, - "isolatedSum": { - "p50": 200.3840059041977, - "p90": 255.295991897583, - "p95": 257.4079930782318, - "p99": 271.36000990867615 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 139.26400244235992, - "p90": 165.12000560760498, - "p95": 166.62399470806122, - "p99": 172.41600155830383 - }, - "combine": { - "p50": 139.26400244235992, - "p90": 165.12000560760498, - "p95": 166.62399470806122, - "p99": 172.41600155830383 - }, - "roundtrip": { - "p50": 139.26400244235992, - "p90": 165.12000560760498, - "p95": 166.62399470806122, - "p99": 172.41600155830383 - }, - "isolatedSum": { - "p50": 278.52800488471985, - "p90": 330.24001121520996, - "p95": 333.24798941612244, - "p99": 344.83200311660767 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 223.90399873256683, - "p90": 230.3999960422516, - "p95": 232.12799429893494, - "p99": 235.29599606990814 - }, - "combine": { - "p50": 223.90399873256683, - "p90": 230.3999960422516, - "p95": 232.12799429893494, - "p99": 235.29599606990814 - }, - "roundtrip": { - "p50": 223.90399873256683, - "p90": 230.3999960422516, - "p95": 232.12799429893494, - "p99": 235.29599606990814 - }, - "isolatedSum": { - "p50": 447.80799746513367, - "p90": 460.7999920845032, - "p95": 464.2559885978699, - "p99": 470.5919921398163 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 407.26399421691895, - "p90": 413.5040044784546, - "p95": 415.3600037097931, - "p99": 419.0399944782257 - }, - "combine": { - "p50": 407.26399421691895, - "p90": 413.5040044784546, - "p95": 415.3600037097931, - "p99": 419.0399944782257 - }, - "roundtrip": { - "p50": 407.26399421691895, - "p90": 413.5040044784546, - "p95": 415.3600037097931, - "p99": 419.0399944782257 - }, - "isolatedSum": { - "p50": 814.5279884338379, - "p90": 827.0080089569092, - "p95": 830.7200074195862, - "p99": 838.0799889564514 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 767.7760124206543, - "p90": 772.8639841079712, - "p95": 775.1359939575195, - "p99": 777.8880000114441 - }, - "combine": { - "p50": 767.7760124206543, - "p90": 772.8639841079712, - "p95": 775.1359939575195, - "p99": 777.8880000114441 - }, - "roundtrip": { - "p50": 767.7760124206543, - "p90": 772.8639841079712, - "p95": 775.1359939575195, - "p99": 777.8880000114441 - }, - "isolatedSum": { - "p50": 1535.5520248413086, - "p90": 1545.7279682159424, - "p95": 1550.271987915039, - "p99": 1555.7760000228882 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1483.0399751663208, - "p90": 1491.7759895324707, - "p95": 1494.2400455474854, - "p99": 1497.5039958953857 - }, - "combine": { - "p50": 1483.0399751663208, - "p90": 1491.7759895324707, - "p95": 1494.2400455474854, - "p99": 1497.5039958953857 - }, - "roundtrip": { - "p50": 1483.0399751663208, - "p90": 1491.7759895324707, - "p95": 1494.2400455474854, - "p99": 1497.5039958953857 - }, - "isolatedSum": { - "p50": 2966.0799503326416, - "p90": 2983.5519790649414, - "p95": 2988.4800910949707, - "p99": 2995.0079917907715 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-5657eb6e", - "identity": "h100|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h100_7104d5f0", - "comparisonKey": "d2fd76f5ec2f3d88", - "schemaVersion": 3, - "generatedAt": "2026-06-27T17:35:51.567423+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_14", - "sku": "h100", - "backend": "uccl", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · uccl · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28296667411", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296667411", - "createdAt": "2026-06-27T17:35:51.567423+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 127.68000364303589, - "p90": 134.3040019273758, - "p95": 136.60800457000732, - "p99": 143.93599331378937 - }, - "combine": { - "p50": 88.0960002541542, - "p90": 90.36800265312195, - "p95": 91.32800251245499, - "p99": 270.30399441719055 - }, - "roundtrip": { - "p50": 200.28799772262573, - "p90": 205.56800067424774, - "p95": 207.42399990558624, - "p99": 212.79999613761902 - }, - "isolatedSum": { - "p50": 215.7760038971901, - "p90": 224.67200458049774, - "p95": 227.9360070824623, - "p99": 414.2399877309799 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 179.51999604701996, - "p90": 186.17600202560425, - "p95": 189.2160028219223, - "p99": 194.91200149059296 - }, - "combine": { - "p50": 99.20000284910202, - "p90": 105.82400113344193, - "p95": 107.19999670982361, - "p99": 191.64800643920898 - }, - "roundtrip": { - "p50": 254.84800338745117, - "p90": 262.7840042114258, - "p95": 265.4719948768616, - "p99": 418.8799858093262 - }, - "isolatedSum": { - "p50": 278.719998896122, - "p90": 292.0000031590462, - "p95": 296.4159995317459, - "p99": 386.56000792980194 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 130.87999820709229, - "p90": 193.4400051832199, - "p95": 196.57599925994873, - "p99": 200.95999538898468 - }, - "combine": { - "p50": 89.72799777984619, - "p90": 107.26399719715118, - "p95": 108.06400328874588, - "p99": 112.31999844312668 - }, - "roundtrip": { - "p50": 204.67199385166168, - "p90": 272.352010011673, - "p95": 274.78399872779846, - "p99": 282.30398893356323 - }, - "isolatedSum": { - "p50": 220.60799598693848, - "p90": 300.7040023803711, - "p95": 304.6400025486946, - "p99": 313.27999383211136 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 133.05599987506866, - "p90": 187.23200261592865, - "p95": 195.93599438667297, - "p99": 479.0079891681671 - }, - "combine": { - "p50": 89.75999802350998, - "p90": 104.73600029945374, - "p95": 105.92000186443329, - "p99": 108.0000028014183 - }, - "roundtrip": { - "p50": 205.63200116157532, - "p90": 260.44800877571106, - "p95": 262.36799359321594, - "p99": 269.79199051856995 - }, - "isolatedSum": { - "p50": 222.81599789857864, - "p90": 291.9680029153824, - "p95": 301.85599625110626, - "p99": 587.0079919695854 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 129.60000336170197, - "p90": 195.16800343990326, - "p95": 197.60000705718994, - "p99": 203.2960057258606 - }, - "combine": { - "p50": 90.52799642086029, - "p90": 107.29599744081497, - "p95": 108.15999656915665, - "p99": 114.30399864912033 - }, - "roundtrip": { - "p50": 206.59199357032776, - "p90": 274.6559977531433, - "p95": 275.9360074996948, - "p99": 280.7680070400238 - }, - "isolatedSum": { - "p50": 220.12799978256226, - "p90": 302.46400088071823, - "p95": 305.7600036263466, - "p99": 317.6000043749809 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 181.7920058965683, - "p90": 195.68000733852386, - "p95": 198.81600141525269, - "p99": 324.47999715805054 - }, - "combine": { - "p50": 108.12799632549286, - "p90": 115.39199948310852, - "p95": 116.19199812412262, - "p99": 118.97599697113037 - }, - "roundtrip": { - "p50": 263.7439966201782, - "p90": 279.83999252319336, - "p95": 281.43998980522156, - "p99": 286.20800375938416 - }, - "isolatedSum": { - "p50": 289.92000222206116, - "p90": 311.0720068216324, - "p95": 315.0079995393753, - "p99": 443.4559941291809 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 147.71200716495514, - "p90": 197.11999595165253, - "p95": 200.3840059041977, - "p99": 211.67999505996704 - }, - "combine": { - "p50": 105.95200210809708, - "p90": 124.32000041007996, - "p95": 125.2799928188324, - "p99": 129.98400628566742 - }, - "roundtrip": { - "p50": 221.0880070924759, - "p90": 289.40799832344055, - "p95": 292.28800535202026, - "p99": 295.77600955963135 - }, - "isolatedSum": { - "p50": 253.66400927305222, - "p90": 321.4399963617325, - "p95": 325.6639987230301, - "p99": 341.66400134563446 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 186.24000251293182, - "p90": 199.74400103092194, - "p95": 202.55999267101288, - "p99": 208.03199708461761 - }, - "combine": { - "p50": 134.20799374580383, - "p90": 139.96799290180206, - "p95": 141.15199446678162, - "p99": 147.2640037536621 - }, - "roundtrip": { - "p50": 292.32001304626465, - "p90": 306.62399530410767, - "p95": 309.63200330734253, - "p99": 314.5279884338379 - }, - "isolatedSum": { - "p50": 320.44799625873566, - "p90": 339.711993932724, - "p95": 343.7119871377945, - "p99": 355.2960008382797 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8af55e63", - "identity": "h100|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h100_7104d5f0", - "comparisonKey": "4f16a23c02cdc2c5", - "schemaVersion": 3, - "generatedAt": "2026-06-27T17:35:56.194527+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h100-dgxc-slurm_07", - "sku": "h100", - "backend": "uccl", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H100 EP8 · uccl · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28296667411", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296667411", - "createdAt": "2026-06-27T17:35:56.194527+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 161.1199975013733, - "p90": 165.24800658226013, - "p95": 166.9120043516159, - "p99": 170.84799706935883 - }, - "combine": { - "p50": 120.99199742078781, - "p90": 123.10399860143661, - "p95": 128.03199887275696, - "p99": 143.99999380111694 - }, - "roundtrip": { - "p50": 242.01600253582, - "p90": 246.7840015888214, - "p95": 248.86399507522583, - "p99": 252.70399451255798 - }, - "isolatedSum": { - "p50": 282.1119949221611, - "p90": 288.35200518369675, - "p95": 294.94400322437286, - "p99": 314.84799087047577 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 186.5919977426529, - "p90": 191.42399728298187, - "p95": 193.15199553966522, - "p99": 197.31199741363525 - }, - "combine": { - "p50": 164.67200219631195, - "p90": 170.04799842834473, - "p95": 171.23199999332428, - "p99": 175.04000663757324 - }, - "roundtrip": { - "p50": 305.08801341056824, - "p90": 309.56798791885376, - "p95": 310.9759986400604, - "p99": 315.42399525642395 - }, - "isolatedSum": { - "p50": 351.26399993896484, - "p90": 361.4719957113266, - "p95": 364.3839955329895, - "p99": 372.3520040512085 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 239.55200612545013, - "p90": 243.68000030517578, - "p95": 245.31200528144836, - "p99": 250.62400102615356 - }, - "combine": { - "p50": 242.78399348258972, - "p90": 246.848002076149, - "p95": 248.60799312591553, - "p99": 251.8720030784607 - }, - "roundtrip": { - "p50": 442.4000084400177, - "p90": 448.35200905799866, - "p95": 450.20800828933716, - "p99": 453.92000675201416 - }, - "isolatedSum": { - "p50": 482.33599960803986, - "p90": 490.52800238132477, - "p95": 493.9199984073639, - "p99": 502.49600410461426 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 346.8799889087677, - "p90": 351.4240086078644, - "p95": 353.4719944000244, - "p99": 358.0799996852875 - }, - "combine": { - "p50": 376.6399919986725, - "p90": 383.4240138530731, - "p95": 385.79198718070984, - "p99": 474.2400050163269 - }, - "roundtrip": { - "p50": 684.0000152587891, - "p90": 691.3920044898987, - "p95": 693.8560009002686, - "p99": 700.4479765892029 - }, - "isolatedSum": { - "p50": 723.5199809074402, - "p90": 734.8480224609375, - "p95": 739.2639815807343, - "p99": 832.3200047016144 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 571.1039900779724, - "p90": 600.8960008621216, - "p95": 606.1760187149048, - "p99": 621.1839914321899 - }, - "combine": { - "p50": 647.5840210914612, - "p90": 655.7440161705017, - "p95": 657.9520106315613, - "p99": 664.9919748306274 - }, - "roundtrip": { - "p50": 1174.720048904419, - "p90": 1189.0239715576172, - "p95": 1194.3999528884888, - "p99": 1201.1200189590454 - }, - "isolatedSum": { - "p50": 1218.6880111694336, - "p90": 1256.6400170326233, - "p95": 1264.128029346466, - "p99": 1286.1759662628174 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1035.5839729309082, - "p90": 1058.0799579620361, - "p95": 1064.9919509887695, - "p99": 1074.463963508606 - }, - "combine": { - "p50": 1176.1280298233032, - "p90": 1185.5679750442505, - "p95": 1188.6399984359741, - "p99": 1197.376012802124 - }, - "roundtrip": { - "p50": 2155.263900756836, - "p90": 2171.488046646118, - "p95": 2174.815893173218, - "p99": 2184.2238903045654 - }, - "isolatedSum": { - "p50": 2211.7120027542114, - "p90": 2243.6479330062866, - "p95": 2253.6319494247437, - "p99": 2271.83997631073 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-7d1c49e4", - "identity": "h200|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h200_d982b749", - "comparisonKey": "d546c8db19c82066", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:25.842054+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287506806", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287506806", - "createdAt": "2026-06-27T11:14:25.842054+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 68.64000111818314, - "p90": 105.82400113344193, - "p95": 117.15199798345566, - "p99": 156.89599514007568 - }, - "combine": { - "p50": 59.87200140953064, - "p90": 75.13599842786789, - "p95": 80.83199709653854, - "p99": 98.75199943780899 - }, - "roundtrip": { - "p50": 113.79200220108032, - "p90": 150.01599490642548, - "p95": 160.73599457740784, - "p99": 198.7520009279251 - }, - "isolatedSum": { - "p50": 128.51200252771378, - "p90": 180.95999956130981, - "p95": 197.9839950799942, - "p99": 255.64799457788467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 69.37599927186966, - "p90": 103.67999970912933, - "p95": 114.3679991364479, - "p99": 147.96799421310425 - }, - "combine": { - "p50": 59.67999994754791, - "p90": 74.40000027418137, - "p95": 81.66400343179703, - "p99": 100.80000013113022 - }, - "roundtrip": { - "p50": 113.82400244474411, - "p90": 150.56000649929047, - "p95": 163.16799819469452, - "p99": 199.74400103092194 - }, - "isolatedSum": { - "p50": 129.05599921941757, - "p90": 178.0799999833107, - "p95": 196.03200256824493, - "p99": 248.76799434423447 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 67.10399687290192, - "p90": 91.45600348711014, - "p95": 103.90400141477585, - "p99": 139.615997672081 - }, - "combine": { - "p50": 59.39200147986412, - "p90": 71.87200337648392, - "p95": 76.09599828720093, - "p99": 94.52799707651138 - }, - "roundtrip": { - "p50": 110.81600189208984, - "p90": 141.59999787807465, - "p95": 150.39999783039093, - "p99": 204.12799715995789 - }, - "isolatedSum": { - "p50": 126.49599835276604, - "p90": 163.32800686359406, - "p95": 179.99999970197678, - "p99": 234.14399474859238 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 72.12799787521362, - "p90": 104.22399640083313, - "p95": 114.46399986743927, - "p99": 165.72800278663635 - }, - "combine": { - "p50": 60.47999858856201, - "p90": 74.20799881219864, - "p95": 82.30400085449219, - "p99": 100.09600222110748 - }, - "roundtrip": { - "p50": 112.5119999051094, - "p90": 143.71199905872345, - "p95": 156.25600516796112, - "p99": 205.53599298000336 - }, - "isolatedSum": { - "p50": 132.60799646377563, - "p90": 178.43199521303177, - "p95": 196.76800072193146, - "p99": 265.82400500774384 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.56800138950348, - "p90": 99.35999661684036, - "p95": 106.33599758148193, - "p99": 118.81600320339203 - }, - "combine": { - "p50": 60.736000537872314, - "p90": 74.94399696588516, - "p95": 80.79999685287476, - "p99": 96.63999825716019 - }, - "roundtrip": { - "p50": 116.67200177907944, - "p90": 153.9199948310852, - "p95": 182.3360025882721, - "p99": 242.97599494457245 - }, - "isolatedSum": { - "p50": 134.3040019273758, - "p90": 174.30399358272552, - "p95": 187.1359944343567, - "p99": 215.45600146055222 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 74.72000271081924, - "p90": 101.98400169610977, - "p95": 109.79200154542923, - "p99": 140.28799533843994 - }, - "combine": { - "p50": 62.68800050020218, - "p90": 80.38400113582611, - "p95": 86.91199868917465, - "p99": 119.71200257539749 - }, - "roundtrip": { - "p50": 116.83200299739838, - "p90": 152.19199657440186, - "p95": 162.56000101566315, - "p99": 194.75199282169342 - }, - "isolatedSum": { - "p50": 137.40800321102142, - "p90": 182.36800283193588, - "p95": 196.70400023460388, - "p99": 259.99999791383743 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 83.48800241947174, - "p90": 127.96799838542938, - "p95": 135.3919953107834, - "p99": 240.1919960975647 - }, - "combine": { - "p50": 70.3359991312027, - "p90": 83.96799862384796, - "p95": 89.9519994854927, - "p99": 99.61599856615067 - }, - "roundtrip": { - "p50": 128.4160017967224, - "p90": 151.74399316310883, - "p95": 159.42400693893433, - "p99": 176.12800002098083 - }, - "isolatedSum": { - "p50": 153.82400155067444, - "p90": 211.93599700927734, - "p95": 225.3439947962761, - "p99": 339.80799466371536 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 90.46400338411331, - "p90": 113.47199976444244, - "p95": 120.19199877977371, - "p99": 153.6960005760193 - }, - "combine": { - "p50": 84.06399935483932, - "p90": 97.6639986038208, - "p95": 102.30399668216705, - "p99": 120.31999975442886 - }, - "roundtrip": { - "p50": 152.6080071926117, - "p90": 178.24000120162964, - "p95": 190.72000682353973, - "p99": 231.99999332427979 - }, - "isolatedSum": { - "p50": 174.52800273895264, - "p90": 211.13599836826324, - "p95": 222.49599546194077, - "p99": 274.01600033044815 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-ab8f0534", - "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "40ee6d196d286895", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:38.574880+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271743900", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271743900", - "createdAt": "2026-06-26T23:53:38.574880+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 68.64000111818314, - "p90": 89.56799656152725, - "p95": 96.41599655151367, - "p99": 126.36800110340118 - }, - "combine": { - "p50": 58.04799869656563, - "p90": 69.60000097751617, - "p95": 74.52800124883652, - "p99": 91.80799871683121 - }, - "roundtrip": { - "p50": 112.73600161075592, - "p90": 135.93600690364838, - "p95": 145.7280069589615, - "p99": 215.26400744915009 - }, - "isolatedSum": { - "p50": 126.68799981474876, - "p90": 159.16799753904343, - "p95": 170.9439978003502, - "p99": 218.1759998202324 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 344064, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 69.34399902820587, - "p90": 88.22400122880936, - "p95": 94.68799829483032, - "p99": 116.15999788045883 - }, - "combine": { - "p50": 58.94400179386139, - "p90": 68.70400160551071, - "p95": 72.03199714422226, - "p99": 83.52000266313553 - }, - "roundtrip": { - "p50": 112.89600282907486, - "p90": 138.3039951324463, - "p95": 150.52799880504608, - "p99": 196.51199877262115 - }, - "isolatedSum": { - "p50": 128.28800082206726, - "p90": 156.92800283432007, - "p95": 166.71999543905258, - "p99": 199.68000054359436 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 704512, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 70.46400010585785, - "p90": 84.63999629020691, - "p95": 92.0960009098053, - "p99": 110.78400164842606 - }, - "combine": { - "p50": 60.28800085186958, - "p90": 70.91200351715088, - "p95": 75.16799867153168, - "p99": 87.5839963555336 - }, - "roundtrip": { - "p50": 114.20799791812897, - "p90": 135.68000495433807, - "p95": 147.64800667762756, - "p99": 195.5520063638687 - }, - "isolatedSum": { - "p50": 130.75200095772743, - "p90": 155.5519998073578, - "p95": 167.26399958133698, - "p99": 198.36799800395966 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1384448, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 72.38399982452393, - "p90": 103.71199995279312, - "p95": 118.72000247240067, - "p99": 215.61600267887115 - }, - "combine": { - "p50": 61.055999249219894, - "p90": 76.03199779987335, - "p95": 81.7599967122078, - "p99": 112.57600039243698 - }, - "roundtrip": { - "p50": 115.84000289440155, - "p90": 143.51999759674072, - "p95": 151.67999267578125, - "p99": 190.46400487422943 - }, - "isolatedSum": { - "p50": 133.43999907374382, - "p90": 179.74399775266647, - "p95": 200.47999918460846, - "p99": 328.19200307130814 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2744320, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 75.23199915885925, - "p90": 102.04800218343735, - "p95": 107.87200182676315, - "p99": 130.20800054073334 - }, - "combine": { - "p50": 61.792001128196716, - "p90": 71.16799801588058, - "p95": 76.64000242948532, - "p99": 86.84799820184708 - }, - "roundtrip": { - "p50": 116.92799627780914, - "p90": 138.2399946451187, - "p95": 147.96799421310425, - "p99": 179.967999458313 - }, - "isolatedSum": { - "p50": 137.02400028705597, - "p90": 173.21600019931793, - "p95": 184.51200425624847, - "p99": 217.0559987425804 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5464064, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 74.75200295448303, - "p90": 90.52799642086029, - "p95": 98.75199943780899, - "p99": 135.48800349235535 - }, - "combine": { - "p50": 63.74400109052658, - "p90": 71.71200215816498, - "p95": 78.78399640321732, - "p99": 91.07200056314468 - }, - "roundtrip": { - "p50": 119.9679970741272, - "p90": 145.47200500965118, - "p95": 149.50400590896606, - "p99": 165.8879965543747 - }, - "isolatedSum": { - "p50": 138.4960040450096, - "p90": 162.23999857902527, - "p95": 177.5359958410263, - "p99": 226.56000405550003 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11124736, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 84.60800349712372, - "p90": 103.13600301742554, - "p95": 112.22399771213531, - "p99": 138.11199367046356 - }, - "combine": { - "p50": 72.03199714422226, - "p90": 82.78399705886841, - "p95": 89.56799656152725, - "p99": 104.92800176143646 - }, - "roundtrip": { - "p50": 131.48799538612366, - "p90": 145.50399780273438, - "p95": 155.8080017566681, - "p99": 189.66400623321533 - }, - "isolatedSum": { - "p50": 156.64000064134598, - "p90": 185.92000007629395, - "p95": 201.79199427366257, - "p99": 243.03999543190002 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22192128, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 94.2080020904541, - "p90": 120.2239990234375, - "p95": 133.82400572299957, - "p99": 215.68000316619873 - }, - "combine": { - "p50": 82.8159973025322, - "p90": 92.70399808883667, - "p95": 96.12800180912018, - "p99": 107.04000294208527 - }, - "roundtrip": { - "p50": 152.22400426864624, - "p90": 168.32000017166138, - "p95": 176.2239933013916, - "p99": 196.03200256824493 - }, - "isolatedSum": { - "p50": 177.0239993929863, - "p90": 212.92799711227417, - "p95": 229.95200753211975, - "p99": 322.720006108284 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-3d690e39", - "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "540c08b08c068f8c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:06.885074+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271759919", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271759919", - "createdAt": "2026-06-26T23:54:06.885074+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.49600034952164, - "p90": 102.1760031580925, - "p95": 111.90400272607803, - "p99": 133.34399461746216 - }, - "combine": { - "p50": 60.5119988322258, - "p90": 72.9919970035553, - "p95": 79.55200225114822, - "p99": 90.55999666452408 - }, - "roundtrip": { - "p50": 113.8560026884079, - "p90": 143.5839980840683, - "p95": 150.94399452209473, - "p99": 190.14400243759155 - }, - "isolatedSum": { - "p50": 131.00799918174744, - "p90": 175.1680001616478, - "p95": 191.45600497722626, - "p99": 223.90399128198624 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 430080, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.88799637556076, - "p90": 109.43999886512756, - "p95": 123.74400347471237, - "p99": 176.2239933013916 - }, - "combine": { - "p50": 62.463998794555664, - "p90": 76.4480009675026, - "p95": 81.37600123882294, - "p99": 89.6959975361824 - }, - "roundtrip": { - "p50": 118.40000003576279, - "p90": 146.7839926481247, - "p95": 154.88000214099884, - "p99": 198.0160027742386 - }, - "isolatedSum": { - "p50": 136.35199517011642, - "p90": 185.88799983263016, - "p95": 205.1200047135353, - "p99": 265.919990837574 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 880640, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.12799787521362, - "p90": 100.8640006184578, - "p95": 107.84000158309937, - "p99": 182.5920045375824 - }, - "combine": { - "p50": 62.24000081419945, - "p90": 77.504001557827, - "p95": 82.36800134181976, - "p99": 100.22400319576263 - }, - "roundtrip": { - "p50": 116.64000153541565, - "p90": 148.3840048313141, - "p95": 158.49600732326508, - "p99": 193.34399700164795 - }, - "isolatedSum": { - "p50": 134.36799868941307, - "p90": 178.3680021762848, - "p95": 190.20800292491913, - "p99": 282.81600773334503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1740800, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 72.60800153017044, - "p90": 101.1200025677681, - "p95": 114.1119971871376, - "p99": 128.06400656700134 - }, - "combine": { - "p50": 63.74400109052658, - "p90": 79.26400005817413, - "p95": 85.50400286912918, - "p99": 120.03199756145477 - }, - "roundtrip": { - "p50": 117.53600090742111, - "p90": 147.74399995803833, - "p95": 156.8319946527481, - "p99": 184.54399704933167 - }, - "isolatedSum": { - "p50": 136.35200262069702, - "p90": 180.38400262594223, - "p95": 199.61600005626678, - "p99": 248.09600412845612 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3471360, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 75.9039968252182, - "p90": 101.79200023412704, - "p95": 111.77600175142288, - "p99": 127.9039978981018 - }, - "combine": { - "p50": 64.41599875688553, - "p90": 79.68000322580338, - "p95": 84.06399935483932, - "p99": 103.61599922180176 - }, - "roundtrip": { - "p50": 124.09599870443344, - "p90": 154.91199493408203, - "p95": 167.35999286174774, - "p99": 218.6560034751892 - }, - "isolatedSum": { - "p50": 140.31999558210373, - "p90": 181.47200345993042, - "p95": 195.8400011062622, - "p99": 231.51999711990356 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6912000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 77.63200253248215, - "p90": 102.08000242710114, - "p95": 110.1439967751503, - "p99": 138.5280042886734 - }, - "combine": { - "p50": 68.4799998998642, - "p90": 83.45600217580795, - "p95": 89.50400352478027, - "p99": 97.82399982213974 - }, - "roundtrip": { - "p50": 122.81599640846252, - "p90": 153.50399911403656, - "p95": 163.13600540161133, - "p99": 190.5599981546402 - }, - "isolatedSum": { - "p50": 146.11200243234634, - "p90": 185.5360046029091, - "p95": 199.64800029993057, - "p99": 236.35200411081314 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13977600, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 91.90399944782257, - "p90": 113.08799684047699, - "p95": 123.52000176906586, - "p99": 162.9759967327118 - }, - "combine": { - "p50": 77.15199887752533, - "p90": 91.13600105047226, - "p95": 97.59999811649323, - "p99": 112.06399649381638 - }, - "roundtrip": { - "p50": 140.47999680042267, - "p90": 166.75199568271637, - "p95": 175.9359985589981, - "p99": 250.20799040794373 - }, - "isolatedSum": { - "p50": 169.0559983253479, - "p90": 204.22399789094925, - "p95": 221.11999988555908, - "p99": 275.03999322652817 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27975680, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 99.07200187444687, - "p90": 122.27199971675873, - "p95": 127.42400169372559, - "p99": 146.7519998550415 - }, - "combine": { - "p50": 90.87999910116196, - "p90": 105.3759977221489, - "p95": 109.37599837779999, - "p99": 125.37600100040436 - }, - "roundtrip": { - "p50": 166.4319932460785, - "p90": 186.5919977426529, - "p95": 193.12000274658203, - "p99": 222.01600670814514 - }, - "isolatedSum": { - "p50": 189.95200097560883, - "p90": 227.64799743890762, - "p95": 236.80000007152557, - "p99": 272.12800085544586 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-59f585e0", - "identity": "h200|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_d982b749", - "comparisonKey": "6df8e885c58ea75d", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:46.508858+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287495061", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287495061", - "createdAt": "2026-06-27T11:13:46.508858+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 71.96799665689468, - "p90": 97.9200005531311, - "p95": 106.1440035700798, - "p99": 119.10399794578552 - }, - "combine": { - "p50": 65.95200300216675, - "p90": 76.12799853086472, - "p95": 81.56800270080566, - "p99": 110.07999628782272 - }, - "roundtrip": { - "p50": 118.6240017414093, - "p90": 145.50399780273438, - "p95": 153.9520025253296, - "p99": 180.63999712467194 - }, - "isolatedSum": { - "p50": 137.91999965906143, - "p90": 174.04799908399582, - "p95": 187.71200627088547, - "p99": 229.18399423360825 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.95999675989151, - "p90": 100.09600222110748, - "p95": 107.84000158309937, - "p99": 141.76000654697418 - }, - "combine": { - "p50": 66.17599725723267, - "p90": 77.37600058317184, - "p95": 85.02399921417236, - "p99": 103.13600301742554 - }, - "roundtrip": { - "p50": 120.60800194740295, - "p90": 148.41599762439728, - "p95": 158.1439971923828, - "p99": 177.5359958410263 - }, - "isolatedSum": { - "p50": 139.13599401712418, - "p90": 177.47200280427933, - "p95": 192.86400079727173, - "p99": 244.89600956439972 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.60000163316727, - "p90": 98.49599748849869, - "p95": 112.76800185441971, - "p99": 134.8479986190796 - }, - "combine": { - "p50": 67.00800359249115, - "p90": 76.54400169849396, - "p95": 85.50400286912918, - "p99": 110.17599701881409 - }, - "roundtrip": { - "p50": 118.8800036907196, - "p90": 144.57599818706512, - "p95": 156.44800662994385, - "p99": 188.83199989795685 - }, - "isolatedSum": { - "p50": 140.60800522565842, - "p90": 175.03999918699265, - "p95": 198.2720047235489, - "p99": 245.02399563789368 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.18399846553802, - "p90": 94.24000233411789, - "p95": 103.00800204277039, - "p99": 124.57600235939026 - }, - "combine": { - "p50": 67.1359971165657, - "p90": 76.67200267314911, - "p95": 87.42400258779526, - "p99": 107.26399719715118 - }, - "roundtrip": { - "p50": 122.3360002040863, - "p90": 178.39999496936798, - "p95": 188.54400515556335, - "p99": 224.31999444961548 - }, - "isolatedSum": { - "p50": 140.31999558210373, - "p90": 170.912005007267, - "p95": 190.43200463056564, - "p99": 231.83999955654144 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 74.27199929952621, - "p90": 96.76799923181534, - "p95": 109.18399691581726, - "p99": 126.97599828243256 - }, - "combine": { - "p50": 68.1919977068901, - "p90": 80.51200211048126, - "p95": 88.51200342178345, - "p99": 103.84000092744827 - }, - "roundtrip": { - "p50": 122.75200337171555, - "p90": 148.70400726795197, - "p95": 161.3440066576004, - "p99": 200.6080001592636 - }, - "isolatedSum": { - "p50": 142.46399700641632, - "p90": 177.2800013422966, - "p95": 197.6960003376007, - "p99": 230.81599920988083 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 85.79199761152267, - "p90": 118.30399930477142, - "p95": 128.28800082206726, - "p99": 147.87200093269348 - }, - "combine": { - "p50": 74.40000027418137, - "p90": 87.55200356245041, - "p95": 92.0960009098053, - "p99": 110.07999628782272 - }, - "roundtrip": { - "p50": 130.72000443935394, - "p90": 155.20000457763672, - "p95": 167.4560010433197, - "p99": 208.48000049591064 - }, - "isolatedSum": { - "p50": 160.19199788570404, - "p90": 205.85600286722183, - "p95": 220.38400173187256, - "p99": 257.9519972205162 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 91.07200056314468, - "p90": 110.81600189208984, - "p95": 117.53600090742111, - "p99": 137.1839940547943 - }, - "combine": { - "p50": 83.26400071382523, - "p90": 94.36800330877304, - "p95": 99.71199929714203, - "p99": 128.00000607967377 - }, - "roundtrip": { - "p50": 149.9200016260147, - "p90": 169.5680022239685, - "p95": 179.29600179195404, - "p99": 200.41599869728088 - }, - "isolatedSum": { - "p50": 174.3360012769699, - "p90": 205.18400520086288, - "p95": 217.24800020456314, - "p99": 265.1840001344681 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 110.55999994277954, - "p90": 124.09599870443344, - "p95": 132.192000746727, - "p99": 179.00800704956055 - }, - "combine": { - "p50": 96.41599655151367, - "p90": 108.51199924945831, - "p95": 115.84000289440155, - "p99": 140.79999923706055 - }, - "roundtrip": { - "p50": 180.38399517536163, - "p90": 201.02399587631226, - "p95": 209.75999534130096, - "p99": 226.6560047864914 - }, - "isolatedSum": { - "p50": 206.9759964942932, - "p90": 232.60799795389175, - "p95": 248.03200364112854, - "p99": 319.8080062866211 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e3311b84", - "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "fc31c0a33afa32cc", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:56.726240+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271775418", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271775418", - "createdAt": "2026-06-26T23:54:56.726240+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.05599749088287, - "p90": 102.7199998497963, - "p95": 111.35999858379364, - "p99": 123.00799787044525 - }, - "combine": { - "p50": 65.92000275850296, - "p90": 79.77599650621414, - "p95": 88.44800293445587, - "p99": 126.30400061607361 - }, - "roundtrip": { - "p50": 118.78400295972824, - "p90": 148.28799664974213, - "p95": 155.8080017566681, - "p99": 184.64000523090363 - }, - "isolatedSum": { - "p50": 138.97600024938583, - "p90": 182.49599635601044, - "p95": 199.8080015182495, - "p99": 249.31199848651886 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 540672, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.5920017361641, - "p90": 107.07200318574905, - "p95": 120.51200121641159, - "p99": 142.87999272346497 - }, - "combine": { - "p50": 67.03999638557434, - "p90": 84.73599702119827, - "p95": 92.12800115346909, - "p99": 114.07999694347382 - }, - "roundtrip": { - "p50": 120.38400024175644, - "p90": 157.18400478363037, - "p95": 169.24799978733063, - "p99": 195.68000733852386 - }, - "isolatedSum": { - "p50": 141.63199812173843, - "p90": 191.80800020694733, - "p95": 212.64000236988068, - "p99": 256.9599896669388 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1056768, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.95999675989151, - "p90": 101.05600208044052, - "p95": 114.68800157308578, - "p99": 137.472003698349 - }, - "combine": { - "p50": 66.14399701356888, - "p90": 79.23199981451035, - "p95": 84.06399935483932, - "p99": 93.50399672985077 - }, - "roundtrip": { - "p50": 120.99199742078781, - "p90": 154.81600165367126, - "p95": 165.95199704170227, - "p99": 220.41599452495575 - }, - "isolatedSum": { - "p50": 139.1039937734604, - "p90": 180.28800189495087, - "p95": 198.7520009279251, - "p99": 230.97600042819977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2125824, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.08799773454666, - "p90": 102.78400033712387, - "p95": 110.88000237941742, - "p99": 142.17600226402283 - }, - "combine": { - "p50": 67.90400296449661, - "p90": 83.29600095748901, - "p95": 89.31200206279755, - "p99": 102.30399668216705 - }, - "roundtrip": { - "p50": 120.95999717712402, - "p90": 156.73600137233734, - "p95": 165.56799411773682, - "p99": 189.43999707698822 - }, - "isolatedSum": { - "p50": 140.99200069904327, - "p90": 186.08000129461288, - "p95": 200.19200444221497, - "p99": 244.47999894618988 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4263936, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 74.81600344181061, - "p90": 102.65599936246872, - "p95": 110.68800091743469, - "p99": 122.49600142240524 - }, - "combine": { - "p50": 68.2239979505539, - "p90": 86.14400029182434, - "p95": 90.4960036277771, - "p99": 105.95200210809708 - }, - "roundtrip": { - "p50": 121.44000083208084, - "p90": 152.25599706172943, - "p95": 161.40800714492798, - "p99": 200.9280025959015 - }, - "isolatedSum": { - "p50": 143.0400013923645, - "p90": 188.79999965429306, - "p95": 201.1840045452118, - "p99": 228.44800353050232 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8503296, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 82.49600231647491, - "p90": 114.01599645614624, - "p95": 123.74400347471237, - "p99": 148.3519971370697 - }, - "combine": { - "p50": 74.14399832487106, - "p90": 88.60799670219421, - "p95": 94.11200135946274, - "p99": 106.81600123643875 - }, - "roundtrip": { - "p50": 128.54400277137756, - "p90": 162.33600676059723, - "p95": 178.20799350738525, - "p99": 222.30400145053864 - }, - "isolatedSum": { - "p50": 156.64000064134598, - "p90": 202.62399315834045, - "p95": 217.8560048341751, - "p99": 255.16799837350845 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16908288, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 94.36800330877304, - "p90": 133.02400708198547, - "p95": 138.49599659442902, - "p99": 182.20800161361694 - }, - "combine": { - "p50": 81.44000172615051, - "p90": 95.42399644851685, - "p95": 100.5759984254837, - "p99": 123.74400347471237 - }, - "roundtrip": { - "p50": 151.2320041656494, - "p90": 172.03199863433838, - "p95": 182.17599391937256, - "p99": 404.1599929332733 - }, - "isolatedSum": { - "p50": 175.80800503492355, - "p90": 228.44800353050232, - "p95": 239.07199501991272, - "p99": 305.9520050883293 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33423360, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 109.0880036354065, - "p90": 134.68800485134125, - "p95": 142.752006649971, - "p99": 173.3119934797287 - }, - "combine": { - "p50": 97.43999689817429, - "p90": 114.97599631547928, - "p95": 121.08799815177917, - "p99": 138.75199854373932 - }, - "roundtrip": { - "p50": 180.1919937133789, - "p90": 205.56800067424774, - "p95": 210.07999777793884, - "p99": 237.7600073814392 - }, - "isolatedSum": { - "p50": 206.52800053358078, - "p90": 249.66400116682053, - "p95": 263.8400048017502, - "p99": 312.063992023468 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a3bb3bd5", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "1e550a8055ce0039", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:16.783949+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272139795", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272139795", - "createdAt": "2026-06-27T00:06:16.783949+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 82.65600353479385, - "p90": 133.59999656677246, - "p95": 142.59199798107147, - "p99": 158.4320068359375 - }, - "combine": { - "p50": 76.38400048017502, - "p90": 99.61599856615067, - "p95": 103.84000092744827, - "p99": 158.1760048866272 - }, - "roundtrip": { - "p50": 128.35200130939484, - "p90": 157.21599757671356, - "p95": 169.63200271129608, - "p99": 325.6959915161133 - }, - "isolatedSum": { - "p50": 159.04000401496887, - "p90": 233.21599513292313, - "p95": 246.43199890851974, - "p99": 316.6080117225647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.60000163316727, - "p90": 94.81599926948547, - "p95": 101.82400047779083, - "p99": 127.32799351215363 - }, - "combine": { - "p50": 70.23999840021133, - "p90": 99.16800260543823, - "p95": 101.34399682283401, - "p99": 121.34400010108948 - }, - "roundtrip": { - "p50": 130.5599957704544, - "p90": 186.46399676799774, - "p95": 191.3280040025711, - "p99": 227.48799622058868 - }, - "isolatedSum": { - "p50": 143.8400000333786, - "p90": 193.9840018749237, - "p95": 203.16799730062485, - "p99": 248.6719936132431 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 84.70399677753448, - "p90": 100.44799745082855, - "p95": 106.55999928712845, - "p99": 121.18399888277054 - }, - "combine": { - "p50": 77.47200131416321, - "p90": 89.47200328111649, - "p95": 95.32800316810608, - "p99": 106.1440035700798 - }, - "roundtrip": { - "p50": 137.37599551677704, - "p90": 158.49600732326508, - "p95": 167.42399334907532, - "p99": 188.54400515556335 - }, - "isolatedSum": { - "p50": 162.1759980916977, - "p90": 189.92000073194504, - "p95": 201.88800245523453, - "p99": 227.32800245285034 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.69600212574005, - "p90": 133.15199315547943, - "p95": 140.25600254535675, - "p99": 154.7199934720993 - }, - "combine": { - "p50": 105.82400113344193, - "p90": 123.55200201272964, - "p95": 129.50399518013, - "p99": 141.85599982738495 - }, - "roundtrip": { - "p50": 196.83200120925903, - "p90": 213.69600296020508, - "p95": 222.04799950122833, - "p99": 265.8880054950714 - }, - "isolatedSum": { - "p50": 223.52000325918198, - "p90": 256.7039951682091, - "p95": 269.75999772548676, - "p99": 296.57599329948425 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9ca51f4f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_d982b749", - "comparisonKey": "a8d7aa1ea70e9702", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:23.408406+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286432534", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286432534", - "createdAt": "2026-06-27T10:26:23.408406+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.80000299215317, - "p90": 86.7839977145195, - "p95": 94.08000111579895, - "p99": 129.12000715732574 - }, - "combine": { - "p50": 69.82400268316269, - "p90": 75.68000257015228, - "p95": 78.75200361013412, - "p99": 84.927998483181 - }, - "roundtrip": { - "p50": 124.64000284671783, - "p90": 133.88800621032715, - "p95": 138.65600526332855, - "p99": 154.62400019168854 - }, - "isolatedSum": { - "p50": 142.62400567531586, - "p90": 162.46400028467178, - "p95": 172.83200472593307, - "p99": 214.04800564050674 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.25599884986877, - "p90": 81.53600245714188, - "p95": 87.64799684286118, - "p99": 115.68000167608261 - }, - "combine": { - "p50": 69.7920024394989, - "p90": 73.72800260782242, - "p95": 78.40000092983246, - "p99": 85.40800213813782 - }, - "roundtrip": { - "p50": 123.74400347471237, - "p90": 142.84799993038177, - "p95": 166.30400717258453, - "p99": 190.2720034122467 - }, - "isolatedSum": { - "p50": 142.04800128936768, - "p90": 155.2640050649643, - "p95": 166.04799777269363, - "p99": 201.08800381422043 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.11999797821045, - "p90": 78.87999713420868, - "p95": 81.82399719953537, - "p99": 103.71199995279312 - }, - "combine": { - "p50": 70.46400010585785, - "p90": 76.92799717187881, - "p95": 79.48800176382065, - "p99": 91.07200056314468 - }, - "roundtrip": { - "p50": 126.08000636100769, - "p90": 152.41600573062897, - "p95": 166.55999422073364, - "p99": 194.0159946680069 - }, - "isolatedSum": { - "p50": 143.5839980840683, - "p90": 155.8079943060875, - "p95": 161.31199896335602, - "p99": 194.7840005159378 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.49600100517273, - "p90": 88.41600269079208, - "p95": 94.65599805116653, - "p99": 108.92800241708755 - }, - "combine": { - "p50": 71.07199728488922, - "p90": 86.68799698352814, - "p95": 92.22400188446045, - "p99": 99.42399710416794 - }, - "roundtrip": { - "p50": 125.47199428081512, - "p90": 137.9839926958084, - "p95": 149.98400211334229, - "p99": 160.35200655460358 - }, - "isolatedSum": { - "p50": 145.56799829006195, - "p90": 175.10399967432022, - "p95": 186.87999993562698, - "p99": 208.3519995212555 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 79.52000200748444, - "p90": 95.10400146245956, - "p95": 101.56799852848053, - "p99": 126.11199915409088 - }, - "combine": { - "p50": 74.5920017361641, - "p90": 89.4400030374527, - "p95": 96.79999947547913, - "p99": 107.13600367307663 - }, - "roundtrip": { - "p50": 130.17599284648895, - "p90": 145.05599439144135, - "p95": 155.2319973707199, - "p99": 176.32000148296356 - }, - "isolatedSum": { - "p50": 154.11200374364853, - "p90": 184.54400449991226, - "p95": 198.36799800395966, - "p99": 233.2480028271675 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.15199953317642, - "p90": 91.67999774217606, - "p95": 97.24800288677216, - "p99": 135.3919953107834 - }, - "combine": { - "p50": 78.59200239181519, - "p90": 85.66399663686752, - "p95": 87.90399879217148, - "p99": 94.78399902582169 - }, - "roundtrip": { - "p50": 136.7039978504181, - "p90": 147.74399995803833, - "p95": 156.51200711727142, - "p99": 208.48000049591064 - }, - "isolatedSum": { - "p50": 159.7440019249916, - "p90": 177.34399437904358, - "p95": 185.15200167894363, - "p99": 230.17599433660507 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 96.54399752616882, - "p90": 109.72800105810165, - "p95": 117.63200163841248, - "p99": 141.59999787807465 - }, - "combine": { - "p50": 88.57599645853043, - "p90": 97.82399982213974, - "p95": 102.04800218343735, - "p99": 136.4160031080246 - }, - "roundtrip": { - "p50": 163.26400637626648, - "p90": 180.92800676822662, - "p95": 191.13600254058838, - "p99": 213.56800198554993 - }, - "isolatedSum": { - "p50": 185.11999398469925, - "p90": 207.5520008802414, - "p95": 219.68000382184982, - "p99": 278.01600098609924 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.79200285673141, - "p90": 128.00000607967377, - "p95": 133.63200426101685, - "p99": 155.74400126934052 - }, - "combine": { - "p50": 104.89600151777267, - "p90": 113.15199732780457, - "p95": 118.75200271606445, - "p99": 133.40799510478973 - }, - "roundtrip": { - "p50": 196.79999351501465, - "p90": 208.8959962129593, - "p95": 215.10399878025055, - "p99": 228.35199534893036 - }, - "isolatedSum": { - "p50": 222.6880043745041, - "p90": 241.15200340747833, - "p95": 252.3840069770813, - "p99": 289.15199637413025 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b7604172", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400", - "colorKey": "h200_d982b749", - "comparisonKey": "4dde4e46080a91eb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:14:07.082435+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": "set:3:07d544ac2af401ec", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272379468", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272379468", - "createdAt": "2026-06-27T00:14:07.082435+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 72.86400347948074, - "p90": 82.24000036716461, - "p95": 88.73599767684937, - "p99": 117.66400188207626 - }, - "combine": { - "p50": 70.01599669456482, - "p90": 75.39200037717819, - "p95": 80.6720033288002, - "p99": 96.0640013217926 - }, - "roundtrip": { - "p50": 123.90399724245071, - "p90": 139.74399864673615, - "p95": 148.47999811172485, - "p99": 178.75200510025024 - }, - "isolatedSum": { - "p50": 142.88000017404556, - "p90": 157.6320007443428, - "p95": 169.40800100564957, - "p99": 213.72800320386887 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.66400343179703, - "p90": 93.53599697351456, - "p95": 100.70399940013885, - "p99": 128.09599936008453 - }, - "combine": { - "p50": 78.11199873685837, - "p90": 84.51200276613235, - "p95": 89.02399986982346, - "p99": 123.6800029873848 - }, - "roundtrip": { - "p50": 135.13599336147308, - "p90": 146.7200070619583, - "p95": 153.9199948310852, - "p99": 176.89600586891174 - }, - "isolatedSum": { - "p50": 159.7760021686554, - "p90": 178.0479997396469, - "p95": 189.7279992699623, - "p99": 251.77600234746933 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.63200163841248, - "p90": 127.3919939994812, - "p95": 134.0479999780655, - "p99": 154.94400262832642 - }, - "combine": { - "p50": 104.67199981212616, - "p90": 115.42399972677231, - "p95": 121.98399752378464, - "p99": 159.93599593639374 - }, - "roundtrip": { - "p50": 196.25599682331085, - "p90": 206.08000457286835, - "p95": 214.08000588417053, - "p99": 245.27999758720398 - }, - "isolatedSum": { - "p50": 222.30400145053864, - "p90": 242.8159937262535, - "p95": 256.0319975018501, - "p99": 314.87999856472015 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-875c4f49", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "h200_d982b749", - "comparisonKey": "c8b8b28ca3d145bb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:54:14.463003+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28273509838", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273509838", - "createdAt": "2026-06-27T00:54:14.463003+00:00", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 71.3919997215271, - "p90": 87.5839963555336, - "p95": 96.3520035147667, - "p99": 139.55199718475342 - }, - "combine": { - "p50": 68.09599697589874, - "p90": 79.55200225114822, - "p95": 84.95999872684479, - "p99": 111.32799834012985 - }, - "roundtrip": { - "p50": 119.55200135707855, - "p90": 147.20000326633453, - "p95": 157.18400478363037, - "p99": 204.6079933643341 - }, - "isolatedSum": { - "p50": 139.48799669742584, - "p90": 167.13599860668182, - "p95": 181.31200224161148, - "p99": 250.87999552488327 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.9919970035553, - "p90": 94.36800330877304, - "p95": 103.13600301742554, - "p99": 130.68799674510956 - }, - "combine": { - "p50": 68.12799721956253, - "p90": 80.9599980711937, - "p95": 88.19200098514557, - "p99": 105.15200346708298 - }, - "roundtrip": { - "p50": 121.5360015630722, - "p90": 147.16799557209015, - "p95": 157.98400342464447, - "p99": 185.92000007629395 - }, - "isolatedSum": { - "p50": 141.11999422311783, - "p90": 175.32800137996674, - "p95": 191.3280040025711, - "p99": 235.84000021219254 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.02399724721909, - "p90": 95.48799693584442, - "p95": 104.86400127410889, - "p99": 133.08799266815186 - }, - "combine": { - "p50": 68.76800209283829, - "p90": 80.57600259780884, - "p95": 86.30400151014328, - "p99": 105.92000186443329 - }, - "roundtrip": { - "p50": 120.12799829244614, - "p90": 145.56799829006195, - "p95": 155.64799308776855, - "p99": 182.68799781799316 - }, - "isolatedSum": { - "p50": 141.79199934005737, - "p90": 176.06399953365326, - "p95": 191.16800278425217, - "p99": 239.00799453258514 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.66400212049484, - "p90": 93.75999867916107, - "p95": 98.65599870681763, - "p99": 113.18399757146835 - }, - "combine": { - "p50": 68.76800209283829, - "p90": 80.54400235414505, - "p95": 82.49600231647491, - "p99": 91.77599847316742 - }, - "roundtrip": { - "p50": 121.08799815177917, - "p90": 145.9839940071106, - "p95": 156.99200332164764, - "p99": 216.35200083255768 - }, - "isolatedSum": { - "p50": 142.43200421333313, - "p90": 174.30400103330612, - "p95": 181.15200102329254, - "p99": 204.95999604463577 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.91999661922455, - "p90": 96.67199850082397, - "p95": 103.2319962978363, - "p99": 125.34399330615997 - }, - "combine": { - "p50": 70.75200229883194, - "p90": 84.03199911117554, - "p95": 89.59999680519104, - "p99": 103.87200117111206 - }, - "roundtrip": { - "p50": 123.9359974861145, - "p90": 155.8080017566681, - "p95": 170.49600183963776, - "p99": 205.6960016489029 - }, - "isolatedSum": { - "p50": 144.6719989180565, - "p90": 180.7039976119995, - "p95": 192.83199310302734, - "p99": 229.21599447727203 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-19b41153", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "fb9666d12f9a34f8", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:55.021886+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272132556", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272132556", - "createdAt": "2026-06-27T00:05:55.021886+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.31999933719635, - "p90": 95.0080007314682, - "p95": 99.93600100278854, - "p99": 117.69600212574005 - }, - "combine": { - "p50": 68.00000369548798, - "p90": 79.55200225114822, - "p95": 85.79199761152267, - "p99": 114.04799669981003 - }, - "roundtrip": { - "p50": 120.70400267839432, - "p90": 148.60799908638, - "p95": 156.54399991035461, - "p99": 199.0399956703186 - }, - "isolatedSum": { - "p50": 140.32000303268433, - "p90": 174.56000298261642, - "p95": 185.72799861431122, - "p99": 231.74399882555008 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.02399724721909, - "p90": 95.87199985980988, - "p95": 102.91200131177902, - "p99": 124.35200065374374 - }, - "combine": { - "p50": 68.67200136184692, - "p90": 82.75199681520462, - "p95": 89.53599631786346, - "p99": 112.96000331640244 - }, - "roundtrip": { - "p50": 123.10399860143661, - "p90": 151.39199793338776, - "p95": 160.19199788570404, - "p99": 189.69599902629852 - }, - "isolatedSum": { - "p50": 141.695998609066, - "p90": 178.6239966750145, - "p95": 192.4479976296425, - "p99": 237.31200397014618 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 87.52000331878662, - "p90": 135.23200154304504, - "p95": 142.04800128936768, - "p99": 161.21600568294525 - }, - "combine": { - "p50": 77.504001557827, - "p90": 92.38400310277939, - "p95": 97.120001912117, - "p99": 111.77600175142288 - }, - "roundtrip": { - "p50": 135.77599823474884, - "p90": 158.81599485874176, - "p95": 168.92799735069275, - "p99": 212.67199516296387 - }, - "isolatedSum": { - "p50": 165.02400487661362, - "p90": 227.61600464582443, - "p95": 239.16800320148468, - "p99": 272.99200743436813 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.67200177907944, - "p90": 136.83199882507324, - "p95": 145.79200744628906, - "p99": 161.6320013999939 - }, - "combine": { - "p50": 105.76000064611435, - "p90": 121.63200229406357, - "p95": 128.06400656700134, - "p99": 140.60799777507782 - }, - "roundtrip": { - "p50": 195.93599438667297, - "p90": 217.3759937286377, - "p95": 223.4240025281906, - "p99": 252.9279887676239 - }, - "isolatedSum": { - "p50": 222.4320024251938, - "p90": 258.4640011191368, - "p95": 273.8560140132904, - "p99": 302.2399991750717 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6b3584db", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "0dade16dc8be5c94", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:19.346761+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272136313", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272136313", - "createdAt": "2026-06-27T00:06:19.346761+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 75.13599842786789, - "p90": 99.84000027179718, - "p95": 112.38399893045425, - "p99": 175.48799514770508 - }, - "combine": { - "p50": 69.95200365781784, - "p90": 81.95199817419052, - "p95": 87.3280018568039, - "p99": 117.95199662446976 - }, - "roundtrip": { - "p50": 127.51999497413635, - "p90": 157.9200029373169, - "p95": 171.7119961977005, - "p99": 223.26399385929108 - }, - "isolatedSum": { - "p50": 145.08800208568573, - "p90": 181.7919984459877, - "p95": 199.71200078725815, - "p99": 293.43999177217484 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 76.03199779987335, - "p90": 112.73600161075592, - "p95": 125.37600100040436, - "p99": 209.4080001115799 - }, - "combine": { - "p50": 70.91200351715088, - "p90": 86.30400151014328, - "p95": 95.13600170612335, - "p99": 123.16799908876419 - }, - "roundtrip": { - "p50": 125.11999905109406, - "p90": 156.99200332164764, - "p95": 177.47199535369873, - "p99": 251.64800882339478 - }, - "isolatedSum": { - "p50": 146.94400131702423, - "p90": 199.0400031208992, - "p95": 220.5120027065277, - "p99": 332.5759992003441 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 84.28800106048584, - "p90": 108.15999656915665, - "p95": 115.29599875211716, - "p99": 152.70400047302246 - }, - "combine": { - "p50": 78.36800068616867, - "p90": 91.87199920415878, - "p95": 98.55999797582626, - "p99": 110.17599701881409 - }, - "roundtrip": { - "p50": 138.46400380134583, - "p90": 167.23200678825378, - "p95": 179.45599555969238, - "p99": 238.91200125217438 - }, - "isolatedSum": { - "p50": 162.6560017466545, - "p90": 200.03199577331543, - "p95": 213.85599672794342, - "p99": 262.87999749183655 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.09599739313126, - "p90": 137.56799697875977, - "p95": 140.76800644397736, - "p99": 157.47199952602386 - }, - "combine": { - "p50": 104.35199737548828, - "p90": 122.97599762678146, - "p95": 125.50400197505951, - "p99": 148.5760062932968 - }, - "roundtrip": { - "p50": 198.7520009279251, - "p90": 219.2319929599762, - "p95": 227.58400440216064, - "p99": 269.3440020084381 - }, - "isolatedSum": { - "p50": 220.44799476861954, - "p90": 260.54399460554123, - "p95": 266.27200841903687, - "p99": 306.0480058193207 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f4f3e72f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d", - "colorKey": "h200_d982b749", - "comparisonKey": "c5d592397744e4a1", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:52.426268+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "zeros", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "8c8497a77d9085d", - "workloadId": "set:4:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272129001", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272129001", - "createdAt": "2026-06-27T00:05:52.426268+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.7600028514862, - "p90": 98.59199821949005, - "p95": 107.77600109577179, - "p99": 133.31200182437897 - }, - "combine": { - "p50": 70.592001080513, - "p90": 84.54400300979614, - "p95": 90.43200314044952, - "p99": 139.26400244235992 - }, - "roundtrip": { - "p50": 125.59999525547028, - "p90": 159.87199544906616, - "p95": 172.57599532604218, - "p99": 367.2960102558136 - }, - "isolatedSum": { - "p50": 144.3520039319992, - "p90": 183.1360012292862, - "p95": 198.2080042362213, - "p99": 272.5760042667389 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.87999647855759, - "p90": 107.04000294208527, - "p95": 128.80000472068787, - "p99": 359.391987323761 - }, - "combine": { - "p50": 70.49600034952164, - "p90": 84.06399935483932, - "p95": 89.88799899816513, - "p99": 102.9760017991066 - }, - "roundtrip": { - "p50": 124.70400333404541, - "p90": 155.10399639606476, - "p95": 165.72800278663635, - "p99": 202.7519941329956 - }, - "isolatedSum": { - "p50": 145.37599682807922, - "p90": 191.1040022969246, - "p95": 218.688003718853, - "p99": 462.3679891228676 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.90399813652039, - "p90": 107.07200318574905, - "p95": 115.9679964184761, - "p99": 136.51199638843536 - }, - "combine": { - "p50": 78.33600044250488, - "p90": 91.93599969148636, - "p95": 97.69599884748459, - "p99": 108.83200168609619 - }, - "roundtrip": { - "p50": 137.2160017490387, - "p90": 170.23999989032745, - "p95": 181.37599527835846, - "p99": 215.36000072956085 - }, - "isolatedSum": { - "p50": 162.23999857902527, - "p90": 199.0080028772354, - "p95": 213.6639952659607, - "p99": 245.34399807453156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.99999666213989, - "p90": 135.71199774742126, - "p95": 143.8400000333786, - "p99": 168.67199540138245 - }, - "combine": { - "p50": 104.73600029945374, - "p90": 121.47200107574463, - "p95": 125.47199428081512, - "p99": 163.00800442695618 - }, - "roundtrip": { - "p50": 196.6720074415207, - "p90": 216.19200706481934, - "p95": 220.5120027065277, - "p99": 240.1919960975647 - }, - "isolatedSum": { - "p50": 220.73599696159363, - "p90": 257.1839988231659, - "p95": 269.3119943141937, - "p99": 331.6799998283386 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-eb6d6f9b", - "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "4a72e21e2f542236", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:45.031759+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271615137", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271615137", - "createdAt": "2026-06-26T23:49:45.031759+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.97599655389786, - "p90": 85.50400286912918, - "p95": 95.36000341176987, - "p99": 316.79999828338623 - }, - "combine": { - "p50": 68.70400160551071, - "p90": 74.72000271081924, - "p95": 78.72000336647034, - "p99": 94.2080020904541 - }, - "roundtrip": { - "p50": 122.56000190973282, - "p90": 143.26399564743042, - "p95": 153.1199961900711, - "p99": 172.2240000963211 - }, - "isolatedSum": { - "p50": 139.67999815940857, - "p90": 160.22400557994843, - "p95": 174.0800067782402, - "p99": 411.00800037384033 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.12799787521362, - "p90": 85.24800091981888, - "p95": 91.93599969148636, - "p99": 119.48800086975098 - }, - "combine": { - "p50": 68.57600063085556, - "p90": 72.83200323581696, - "p95": 77.15199887752533, - "p99": 83.45600217580795 - }, - "roundtrip": { - "p50": 120.83200365304947, - "p90": 129.2160004377365, - "p95": 133.215993642807, - "p99": 145.75999975204468 - }, - "isolatedSum": { - "p50": 140.70399850606918, - "p90": 158.08000415563583, - "p95": 169.0879985690117, - "p99": 202.94400304555893 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.63200187683105, - "p90": 90.84799885749817, - "p95": 103.64799946546555, - "p99": 133.02400708198547 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 80.86399734020233, - "p95": 87.74399757385254, - "p99": 105.6319996714592 - }, - "roundtrip": { - "p50": 123.64800274372101, - "p90": 149.59999918937683, - "p95": 158.33599865436554, - "p99": 186.0480010509491 - }, - "isolatedSum": { - "p50": 144.16000247001648, - "p90": 171.7119961977005, - "p95": 191.39199703931808, - "p99": 238.65600675344467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.18399846553802, - "p90": 83.03999900817871, - "p95": 94.91200000047684, - "p99": 104.09600287675858 - }, - "combine": { - "p50": 69.2799985408783, - "p90": 77.82399654388428, - "p95": 83.10399949550629, - "p99": 110.04800349473953 - }, - "roundtrip": { - "p50": 123.52000176906586, - "p90": 143.19999516010284, - "p95": 152.0960032939911, - "p99": 205.08800446987152 - }, - "isolatedSum": { - "p50": 142.46399700641632, - "p90": 160.863995552063, - "p95": 178.01599949598312, - "p99": 214.1440063714981 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 71.16799801588058, - "p90": 83.36000144481659, - "p95": 94.11200135946274, - "p99": 106.46399855613708 - }, - "combine": { - "p50": 70.04799693822861, - "p90": 78.07999849319458, - "p95": 83.20000022649765, - "p99": 95.71199864149094 - }, - "roundtrip": { - "p50": 124.54400211572647, - "p90": 144.0960019826889, - "p95": 155.008003115654, - "p99": 204.3839991092682 - }, - "isolatedSum": { - "p50": 141.2159949541092, - "p90": 161.43999993801117, - "p95": 177.3120015859604, - "p99": 202.17599719762802 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 80.60800284147263, - "p90": 89.59999680519104, - "p95": 94.81599926948547, - "p99": 117.53600090742111 - }, - "combine": { - "p50": 77.08799839019775, - "p90": 81.95199817419052, - "p95": 87.3280018568039, - "p99": 95.0080007314682 - }, - "roundtrip": { - "p50": 135.19999384880066, - "p90": 148.47999811172485, - "p95": 156.63999319076538, - "p99": 188.57599794864655 - }, - "isolatedSum": { - "p50": 157.69600123167038, - "p90": 171.55199497938156, - "p95": 182.14400112628937, - "p99": 212.5440016388893 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 94.68799829483032, - "p90": 114.97599631547928, - "p95": 122.36800044775009, - "p99": 148.03199470043182 - }, - "combine": { - "p50": 87.39200234413147, - "p90": 97.59999811649323, - "p95": 102.9760017991066, - "p99": 113.95200341939926 - }, - "roundtrip": { - "p50": 158.87999534606934, - "p90": 176.15999281406403, - "p95": 185.2159947156906, - "p99": 225.600004196167 - }, - "isolatedSum": { - "p50": 182.0800006389618, - "p90": 212.5759944319725, - "p95": 225.3440022468567, - "p99": 261.9839981198311 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.22399836778641, - "p90": 125.18399953842163, - "p95": 132.32000172138214, - "p99": 140.83200693130493 - }, - "combine": { - "p50": 105.34399747848511, - "p90": 111.32799834012985, - "p95": 116.28799885511398, - "p99": 123.83999675512314 - }, - "roundtrip": { - "p50": 197.60000705718994, - "p90": 207.2640061378479, - "p95": 214.81600403785706, - "p99": 241.05599522590637 - }, - "isolatedSum": { - "p50": 221.56799584627151, - "p90": 236.51199787855148, - "p95": 248.60800057649612, - "p99": 264.67200368642807 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-55459bb6", - "identity": "h200|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h200_d982b749", - "comparisonKey": "a923f4d59c22dd5b", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:11.699427+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287501303", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287501303", - "createdAt": "2026-06-27T11:14:11.699427+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.89600372314453, - "p90": 89.9519994854927, - "p95": 103.16800326108932, - "p99": 117.63200163841248 - }, - "combine": { - "p50": 68.80000233650208, - "p90": 81.34400099515915, - "p95": 87.26400136947632, - "p99": 99.84000027179718 - }, - "roundtrip": { - "p50": 125.2799928188324, - "p90": 158.52800011634827, - "p95": 173.2800006866455, - "p99": 205.02400398254395 - }, - "isolatedSum": { - "p50": 141.6960060596466, - "p90": 171.29600048065186, - "p95": 190.43200463056564, - "p99": 217.47200191020966 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 76.31999999284744, - "p90": 108.83200168609619, - "p95": 118.65600198507309, - "p99": 149.9519944190979 - }, - "combine": { - "p50": 70.23999840021133, - "p90": 85.85599809885025, - "p95": 93.98400038480759, - "p99": 114.656001329422 - }, - "roundtrip": { - "p50": 127.77599692344666, - "p90": 164.44799304008484, - "p95": 174.23999309539795, - "p99": 215.96799790859222 - }, - "isolatedSum": { - "p50": 146.55999839305878, - "p90": 194.68799978494644, - "p95": 212.64000236988068, - "p99": 264.6079957485199 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 76.89599692821503, - "p90": 109.8560020327568, - "p95": 119.07199770212173, - "p99": 162.11199760437012 - }, - "combine": { - "p50": 71.00799679756165, - "p90": 83.90399813652039, - "p95": 90.08000046014786, - "p99": 109.15199667215347 - }, - "roundtrip": { - "p50": 130.43199479579926, - "p90": 159.39199924468994, - "p95": 176.57600343227386, - "p99": 223.51999580860138 - }, - "isolatedSum": { - "p50": 147.90399372577667, - "p90": 193.7600001692772, - "p95": 209.1519981622696, - "p99": 271.2639942765236 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 77.02399790287018, - "p90": 101.31199657917023, - "p95": 112.44799941778183, - "p99": 129.85600531101227 - }, - "combine": { - "p50": 71.71200215816498, - "p90": 84.927998483181, - "p95": 90.97599983215332, - "p99": 101.95200145244598 - }, - "roundtrip": { - "p50": 128.76799702644348, - "p90": 155.13600409030914, - "p95": 164.32000696659088, - "p99": 192.51200556755066 - }, - "isolatedSum": { - "p50": 148.73600006103516, - "p90": 186.23999506235123, - "p95": 203.42399924993515, - "p99": 231.80800676345825 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 79.6160027384758, - "p90": 105.47199845314026, - "p95": 115.23199826478958, - "p99": 141.53599739074707 - }, - "combine": { - "p50": 72.41600006818771, - "p90": 86.14400029182434, - "p95": 92.00000017881393, - "p99": 109.40799862146378 - }, - "roundtrip": { - "p50": 131.20000064373016, - "p90": 157.27999806404114, - "p95": 168.64000260829926, - "p99": 207.74400234222412 - }, - "isolatedSum": { - "p50": 152.0320028066635, - "p90": 191.6159987449646, - "p95": 207.23199844360352, - "p99": 250.94399601221085 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 88.86399865150452, - "p90": 114.72000181674957, - "p95": 122.8799968957901, - "p99": 132.47999548912048 - }, - "combine": { - "p50": 79.26400005817413, - "p90": 90.01599997282028, - "p95": 98.01600128412247, - "p99": 109.92000252008438 - }, - "roundtrip": { - "p50": 139.77600634098053, - "p90": 164.2879992723465, - "p95": 177.63200402259827, - "p99": 194.59199905395508 - }, - "isolatedSum": { - "p50": 168.12799870967865, - "p90": 204.73600178956985, - "p95": 220.89599817991257, - "p99": 242.39999800920486 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 98.88000041246414, - "p90": 118.01599711179733, - "p95": 126.81600451469421, - "p99": 194.2719966173172 - }, - "combine": { - "p50": 88.22400122880936, - "p90": 102.27199643850327, - "p95": 108.2879975438118, - "p99": 123.07199835777283 - }, - "roundtrip": { - "p50": 163.55200111865997, - "p90": 186.11200153827667, - "p95": 196.83200120925903, - "p99": 244.3840056657791 - }, - "isolatedSum": { - "p50": 187.1040016412735, - "p90": 220.2879935503006, - "p95": 235.104002058506, - "p99": 317.34399497509 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 119.03999745845795, - "p90": 139.42399621009827, - "p95": 145.4080045223236, - "p99": 166.24000668525696 - }, - "combine": { - "p50": 106.36799782514572, - "p90": 121.37600034475327, - "p95": 126.3359934091568, - "p99": 143.0400013923645 - }, - "roundtrip": { - "p50": 199.072003364563, - "p90": 215.26400744915009, - "p95": 224.48000311851501, - "p99": 243.3599978685379 - }, - "isolatedSum": { - "p50": 225.40799528360367, - "p90": 260.79999655485153, - "p95": 271.7439979314804, - "p99": 309.28000807762146 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-dea4952a", - "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "f2cda8ef40003c42", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:13.205485+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271728983", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271728983", - "createdAt": "2026-06-26T23:53:13.205485+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.53600114583969, - "p90": 98.14400225877762, - "p95": 107.45599865913391, - "p99": 121.63200229406357 - }, - "combine": { - "p50": 68.2239979505539, - "p90": 82.24000036716461, - "p95": 87.26400136947632, - "p99": 110.07999628782272 - }, - "roundtrip": { - "p50": 125.59999525547028, - "p90": 155.39200603961945, - "p95": 163.68000209331512, - "p99": 201.6959935426712 - }, - "isolatedSum": { - "p50": 141.75999909639359, - "p90": 180.38400262594223, - "p95": 194.72000002861023, - "p99": 231.7119985818863 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 75.87199658155441, - "p90": 103.2319962978363, - "p95": 112.15999722480774, - "p99": 193.05600225925446 - }, - "combine": { - "p50": 68.60800087451935, - "p90": 83.5840031504631, - "p95": 90.30400216579437, - "p99": 129.60000336170197 - }, - "roundtrip": { - "p50": 123.23199957609177, - "p90": 153.31199765205383, - "p95": 164.38399255275726, - "p99": 185.37600338459015 - }, - "isolatedSum": { - "p50": 144.47999745607376, - "p90": 186.8159994482994, - "p95": 202.4639993906021, - "p99": 322.6560056209564 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1218560, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.87999647855759, - "p90": 96.92800045013428, - "p95": 105.85600137710571, - "p99": 121.15199863910675 - }, - "combine": { - "p50": 69.2799985408783, - "p90": 82.56000280380249, - "p95": 90.30400216579437, - "p99": 102.04800218343735 - }, - "roundtrip": { - "p50": 125.2799928188324, - "p90": 152.28800475597382, - "p95": 160.8320027589798, - "p99": 174.55999553203583 - }, - "isolatedSum": { - "p50": 144.15999501943588, - "p90": 179.48800325393677, - "p95": 196.16000354290009, - "p99": 223.2000008225441 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 76.54400169849396, - "p90": 104.2879968881607, - "p95": 111.42399907112122, - "p99": 138.5599970817566 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 85.66399663686752, - "p95": 91.67999774217606, - "p99": 102.59199887514114 - }, - "roundtrip": { - "p50": 126.39999389648438, - "p90": 154.55999970436096, - "p95": 166.97600483894348, - "p99": 208.67200195789337 - }, - "isolatedSum": { - "p50": 147.07200229167938, - "p90": 189.95199352502823, - "p95": 203.10399681329727, - "p99": 241.15199595689774 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4831232, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 78.40000092983246, - "p90": 99.07200187444687, - "p95": 105.98400235176086, - "p99": 126.3359934091568 - }, - "combine": { - "p50": 71.61600142717361, - "p90": 85.40800213813782, - "p95": 90.27200192213058, - "p99": 109.40799862146378 - }, - "roundtrip": { - "p50": 129.02399897575378, - "p90": 156.2879979610443, - "p95": 166.143998503685, - "p99": 196.51199877262115 - }, - "isolatedSum": { - "p50": 150.01600235700607, - "p90": 184.4800040125847, - "p95": 196.25600427389145, - "p99": 235.74399203062057 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9848832, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 85.40800213813782, - "p90": 105.12000322341919, - "p95": 114.04799669981003, - "p99": 131.71200454235077 - }, - "combine": { - "p50": 77.91999727487564, - "p90": 90.59199690818787, - "p95": 96.63999825716019, - "p99": 105.18400371074677 - }, - "roundtrip": { - "p50": 137.2160017490387, - "p90": 163.07200491428375, - "p95": 172.35200107097626, - "p99": 208.064004778862 - }, - "isolatedSum": { - "p50": 163.32799941301346, - "p90": 195.71200013160706, - "p95": 210.68799495697021, - "p99": 236.89600825309753 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 99.55199807882309, - "p90": 126.65599584579468, - "p95": 135.00800728797913, - "p99": 167.10400581359863 - }, - "combine": { - "p50": 89.24800157546997, - "p90": 106.1440035700798, - "p95": 111.23199760913849, - "p99": 126.65599584579468 - }, - "roundtrip": { - "p50": 162.9759967327118, - "p90": 185.88800728321075, - "p95": 193.6960071325302, - "p99": 255.87201118469238 - }, - "isolatedSum": { - "p50": 188.79999965429306, - "p90": 232.79999941587448, - "p95": 246.24000489711761, - "p99": 293.7600016593933 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.367999792099, - "p90": 135.42400300502777, - "p95": 143.5520052909851, - "p99": 181.88799917697906 - }, - "combine": { - "p50": 105.34399747848511, - "p90": 119.99999731779099, - "p95": 126.78399682044983, - "p99": 139.0399932861328 - }, - "roundtrip": { - "p50": 197.53600656986237, - "p90": 215.83999693393707, - "p95": 224.48000311851501, - "p99": 253.1839907169342 - }, - "isolatedSum": { - "p50": 223.7119972705841, - "p90": 255.42400032281876, - "p95": 270.33600211143494, - "p99": 320.9279924631119 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-14a4cdc0", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", - "colorKey": "h200_b02e4015", - "comparisonKey": "7784b2ab75c0721c", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:31.348412+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups@s1", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s1", - "routingStep": 1, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272358996", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272358996", - "createdAt": "2026-06-27T00:13:31.348412+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 70.75200229883194, - "p90": 98.39999675750732, - "p95": 108.57599973678589, - "p99": 136.03200018405914 - }, - "combine": { - "p50": 67.52000004053116, - "p90": 79.83999699354172, - "p95": 84.09599959850311, - "p99": 104.09600287675858 - }, - "roundtrip": { - "p50": 122.8799968957901, - "p90": 146.62399888038635, - "p95": 155.32800555229187, - "p99": 178.3359944820404 - }, - "isolatedSum": { - "p50": 138.2720023393631, - "p90": 178.23999375104904, - "p95": 192.671999335289, - "p99": 240.12800306081772 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 84.44800227880478, - "p90": 110.30399799346924, - "p95": 138.20800185203552, - "p99": 196.22400403022766 - }, - "combine": { - "p50": 75.16799867153168, - "p90": 85.34400165081024, - "p95": 91.00800007581711, - "p99": 101.02400183677673 - }, - "roundtrip": { - "p50": 135.3919953107834, - "p90": 156.3200056552887, - "p95": 166.4000004529953, - "p99": 198.36799800395966 - }, - "isolatedSum": { - "p50": 159.61600095033646, - "p90": 195.64799964427948, - "p95": 229.21600192785263, - "p99": 297.2480058670044 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 128.48000228405, - "p90": 142.91200041770935, - "p95": 151.36000514030457, - "p99": 290.0159955024719 - }, - "combine": { - "p50": 111.455999314785, - "p90": 123.6800029873848, - "p95": 127.93600559234619, - "p99": 143.71199905872345 - }, - "roundtrip": { - "p50": 210.81599593162537, - "p90": 223.26399385929108, - "p95": 229.34399545192719, - "p99": 257.79199600219727 - }, - "isolatedSum": { - "p50": 239.936001598835, - "p90": 266.59200340509415, - "p95": 279.29601073265076, - "p99": 433.7279945611954 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4bdc0b92", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", - "colorKey": "h200_ad2e3b5c", - "comparisonKey": "7784b2ab75c0721c", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:31.907403+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups@s2", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s2", - "routingStep": 2, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "3cd13eac5b27759", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272362308", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272362308", - "createdAt": "2026-06-27T00:13:31.907403+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 71.32799923419952, - "p90": 93.82399916648865, - "p95": 109.72800105810165, - "p99": 145.1520025730133 - }, - "combine": { - "p50": 66.880002617836, - "p90": 72.25599884986877, - "p95": 80.32000064849854, - "p99": 91.39200299978256 - }, - "roundtrip": { - "p50": 123.48800152540207, - "p90": 140.51200449466705, - "p95": 156.8319946527481, - "p99": 195.64799964427948 - }, - "isolatedSum": { - "p50": 138.20800185203552, - "p90": 166.07999801635742, - "p95": 190.0480017066002, - "p99": 236.54400557279587 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.4720019698143, - "p90": 136.48000359535217, - "p95": 151.13599598407745, - "p99": 198.04799556732178 - }, - "combine": { - "p50": 75.80800354480743, - "p90": 89.47200328111649, - "p95": 102.91200131177902, - "p99": 122.36800044775009 - }, - "roundtrip": { - "p50": 134.5279961824417, - "p90": 149.31200444698334, - "p95": 162.9440039396286, - "p99": 204.73599433898926 - }, - "isolatedSum": { - "p50": 157.28000551462173, - "p90": 225.95200687646866, - "p95": 254.04799729585648, - "p99": 320.41599601507187 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.60800260305405, - "p90": 131.80799782276154, - "p95": 137.2160017490387, - "p99": 164.35199975967407 - }, - "combine": { - "p50": 111.00800335407257, - "p90": 119.39200013875961, - "p95": 125.5359947681427, - "p99": 155.03999590873718 - }, - "roundtrip": { - "p50": 208.41600000858307, - "p90": 218.6560034751892, - "p95": 229.72799837589264, - "p99": 263.3279860019684 - }, - "isolatedSum": { - "p50": 235.61600595712662, - "p90": 251.19999796152115, - "p95": 262.7519965171814, - "p99": 319.39199566841125 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fcadbf18", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", - "colorKey": "h200_ae2e3cef", - "comparisonKey": "7784b2ab75c0721c", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:36.495887+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · alternating-groups@s3", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "alternating-groups", - "routingLabel": "alternating-groups@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f8662de0b3559f9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272365812", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272365812", - "createdAt": "2026-06-27T00:13:36.495887+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 71.48800045251846, - "p90": 94.46399658918381, - "p95": 99.29600358009338, - "p99": 110.23999750614166 - }, - "combine": { - "p50": 67.10399687290192, - "p90": 80.09599894285202, - "p95": 84.54400300979614, - "p99": 108.31999778747559 - }, - "roundtrip": { - "p50": 119.61600184440613, - "p90": 148.83199334144592, - "p95": 158.01599621772766, - "p99": 279.9359858036041 - }, - "isolatedSum": { - "p50": 138.59199732542038, - "p90": 174.55999553203583, - "p95": 183.84000658988953, - "p99": 218.55999529361725 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3297280, - "combineLogicalBytes": 3297280, - "fanoutMean": 3.59375, - "recvTokensMax": 61, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 80.83199709653854, - "p90": 99.45599734783173, - "p95": 105.05600273609161, - "p99": 118.07999759912491 - }, - "combine": { - "p50": 75.23199915885925, - "p90": 87.52000331878662, - "p95": 92.0960009098053, - "p99": 108.51199924945831 - }, - "roundtrip": { - "p50": 133.91999900341034, - "p90": 154.78399395942688, - "p95": 162.04799711704254, - "p99": 176.1920005083084 - }, - "isolatedSum": { - "p50": 156.0639962553978, - "p90": 186.97600066661835, - "p95": 197.1520036458969, - "p99": 226.59199684858322 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13275136, - "combineLogicalBytes": 13275136, - "fanoutMean": 3.6171875, - "recvTokensMax": 236, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 125.15200674533844, - "p90": 135.96799969673157, - "p95": 141.79199934005737, - "p99": 205.34400641918182 - }, - "combine": { - "p50": 109.72800105810165, - "p90": 120.15999853610992, - "p95": 123.36000055074692, - "p99": 136.7039978504181 - }, - "roundtrip": { - "p50": 207.96799659729004, - "p90": 225.50399601459503, - "p95": 231.77599906921387, - "p99": 246.20799720287323 - }, - "isolatedSum": { - "p50": 234.8800078034401, - "p90": 256.1279982328415, - "p95": 265.1519998908043, - "p99": 342.0480042695999 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 53172224, - "combineLogicalBytes": 53172224, - "fanoutMean": 3.6220703125, - "recvTokensMax": 934, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f361a9a4", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971", - "colorKey": "h200_b5c683eb", - "comparisonKey": "d82096ba4baa0cd5", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:27.284944+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2279937619f3971", - "workloadId": "set:4:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271830346", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271830346", - "createdAt": "2026-06-26T23:56:27.284944+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 76.73600316047668, - "p90": 126.24000012874603, - "p95": 134.46399569511414, - "p99": 156.63999319076538 - }, - "combine": { - "p50": 72.41600006818771, - "p90": 83.71199667453766, - "p95": 87.07199990749359, - "p99": 99.13600236177444 - }, - "roundtrip": { - "p50": 128.38399410247803, - "p90": 148.03199470043182, - "p95": 154.62400019168854, - "p99": 179.6479970216751 - }, - "isolatedSum": { - "p50": 149.1520032286644, - "p90": 209.9519968032837, - "p95": 221.53599560260773, - "p99": 255.77599555253983 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.83999633789062, - "p90": 87.99999952316284, - "p95": 98.11200201511383, - "p99": 113.02399635314941 - }, - "combine": { - "p50": 71.84000313282013, - "p90": 79.6160027384758, - "p95": 85.56800335645676, - "p99": 95.87199985980988 - }, - "roundtrip": { - "p50": 126.81600451469421, - "p90": 139.67999815940857, - "p95": 149.63200688362122, - "p99": 170.20800709724426 - }, - "isolatedSum": { - "p50": 147.67999947071075, - "p90": 167.61600226163864, - "p95": 183.6800053715706, - "p99": 208.8959962129593 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 92.73599833250046, - "p90": 105.50399869680405, - "p95": 112.41599917411804, - "p99": 132.60799646377563 - }, - "combine": { - "p50": 81.98399841785431, - "p90": 93.56799721717834, - "p95": 99.58399832248688, - "p99": 112.57600039243698 - }, - "roundtrip": { - "p50": 148.70400726795197, - "p90": 168.7999963760376, - "p95": 180.7679980993271, - "p99": 196.6720074415207 - }, - "isolatedSum": { - "p50": 174.71999675035477, - "p90": 199.0719959139824, - "p95": 211.99999749660492, - "p99": 245.18399685621262 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 139.00800049304962, - "p90": 175.99999904632568, - "p95": 181.34400248527527, - "p99": 197.91999459266663 - }, - "combine": { - "p50": 127.20000743865967, - "p90": 150.68799257278442, - "p95": 153.6639928817749, - "p99": 160.5439931154251 - }, - "roundtrip": { - "p50": 232.92799293994904, - "p90": 266.04801416397095, - "p95": 271.5199887752533, - "p99": 294.20799016952515 - }, - "isolatedSum": { - "p50": 266.2080079317093, - "p90": 326.6879916191101, - "p95": 335.00799536705017, - "p99": 358.46398770809174 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-d65f5a76", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", - "colorKey": "h200_b5c683eb", - "comparisonKey": "d82096ba4baa0cd5", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:47.642624+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272028751", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272028751", - "createdAt": "2026-06-27T00:02:47.642624+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 75.99999755620956, - "p90": 106.175996363163, - "p95": 117.60000139474869, - "p99": 352.512001991272 - }, - "combine": { - "p50": 70.68800181150436, - "p90": 85.9839990735054, - "p95": 90.52799642086029, - "p99": 104.12800312042236 - }, - "roundtrip": { - "p50": 124.60800260305405, - "p90": 158.62399339675903, - "p95": 166.46400094032288, - "p99": 186.27199530601501 - }, - "isolatedSum": { - "p50": 146.68799936771393, - "p90": 192.1599954366684, - "p95": 208.12799781560898, - "p99": 456.64000511169434 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.65600222349167, - "p90": 98.91200065612793, - "p95": 104.09600287675858, - "p99": 114.84800279140472 - }, - "combine": { - "p50": 70.65600156784058, - "p90": 87.20000088214874, - "p95": 91.32800251245499, - "p99": 106.46399855613708 - }, - "roundtrip": { - "p50": 125.59999525547028, - "p90": 161.02400422096252, - "p95": 170.78399658203125, - "p99": 197.05599546432495 - }, - "isolatedSum": { - "p50": 145.31200379133224, - "p90": 186.11200153827667, - "p95": 195.42400538921356, - "p99": 221.3120013475418 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.20799881219864, - "p90": 102.24000364542007, - "p95": 111.35999858379364, - "p99": 129.63199615478516 - }, - "combine": { - "p50": 71.87200337648392, - "p90": 88.22400122880936, - "p95": 94.52799707651138, - "p99": 115.26399850845337 - }, - "roundtrip": { - "p50": 125.08800625801086, - "p90": 153.53600680828094, - "p95": 163.87200355529785, - "p99": 176.86399817466736 - }, - "isolatedSum": { - "p50": 146.08000218868256, - "p90": 190.46400487422943, - "p95": 205.88799566030502, - "p99": 244.89599466323853 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.30399954319, - "p90": 98.49599748849869, - "p95": 106.59199953079224, - "p99": 120.19199877977371 - }, - "combine": { - "p50": 71.16799801588058, - "p90": 86.36800199747086, - "p95": 90.52799642086029, - "p99": 109.40799862146378 - }, - "roundtrip": { - "p50": 124.64000284671783, - "p90": 156.73600137233734, - "p95": 164.48000073432922, - "p99": 189.15200233459473 - }, - "isolatedSum": { - "p50": 145.4719975590706, - "p90": 184.86399948596954, - "p95": 197.11999595165253, - "p99": 229.5999974012375 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 75.26399940252304, - "p90": 102.59199887514114, - "p95": 109.76000130176544, - "p99": 125.59999525547028 - }, - "combine": { - "p50": 76.92799717187881, - "p90": 91.23200178146362, - "p95": 94.94400024414062, - "p99": 105.82400113344193 - }, - "roundtrip": { - "p50": 128.7039965391159, - "p90": 160.51200032234192, - "p95": 171.07200622558594, - "p99": 223.13599288463593 - }, - "isolatedSum": { - "p50": 152.19199657440186, - "p90": 193.82400065660477, - "p95": 204.70400154590607, - "p99": 231.4239963889122 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 90.59199690818787, - "p90": 110.52799969911575, - "p95": 119.00799721479416, - "p99": 143.39199662208557 - }, - "combine": { - "p50": 81.53600245714188, - "p90": 98.11200201511383, - "p95": 105.79200088977814, - "p99": 123.4240010380745 - }, - "roundtrip": { - "p50": 145.4080045223236, - "p90": 173.0239987373352, - "p95": 180.4479956626892, - "p99": 203.45599949359894 - }, - "isolatedSum": { - "p50": 172.12799936532974, - "p90": 208.64000171422958, - "p95": 224.7999981045723, - "p99": 266.81599766016006 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 110.84800213575363, - "p90": 130.17599284648895, - "p95": 137.56799697875977, - "p99": 212.12799847126007 - }, - "combine": { - "p50": 95.13600170612335, - "p90": 114.20799791812897, - "p95": 124.57600235939026, - "p99": 243.42399835586548 - }, - "roundtrip": { - "p50": 178.14399302005768, - "p90": 205.24799823760986, - "p95": 233.40800404548645, - "p99": 432.2560131549835 - }, - "isolatedSum": { - "p50": 205.98400384187698, - "p90": 244.38399076461792, - "p95": 262.14399933815, - "p99": 455.55199682712555 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 133.44000279903412, - "p90": 152.48000621795654, - "p95": 157.95199573040009, - "p99": 172.2240000963211 - }, - "combine": { - "p50": 125.72799623012543, - "p90": 140.60799777507782, - "p95": 145.31199634075165, - "p99": 176.7359972000122 - }, - "roundtrip": { - "p50": 237.2480034828186, - "p90": 255.51998615264893, - "p95": 262.65600323677063, - "p99": 295.9040105342865 - }, - "isolatedSum": { - "p50": 259.16799902915955, - "p90": 293.08800399303436, - "p95": 303.26399207115173, - "p99": 348.9599972963333 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-26bc6c27", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", - "colorKey": "h200_d0dfa19a", - "comparisonKey": "5d5c9be2dc9b5f1f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:33.428125+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d02a66236b524b8", - "workloadId": "set:4:2eebbed158fe1320", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271837870", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271837870", - "createdAt": "2026-06-26T23:56:33.428125+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 65.95200300216675, - "p90": 102.7199998497963, - "p95": 115.55200070142746, - "p99": 166.6560024023056 - }, - "combine": { - "p50": 58.6559996008873, - "p90": 72.4480003118515, - "p95": 78.59200239181519, - "p99": 95.64799815416336 - }, - "roundtrip": { - "p50": 112.44799941778183, - "p90": 152.70400047302246, - "p95": 159.2320054769516, - "p99": 181.2479943037033 - }, - "isolatedSum": { - "p50": 124.60800260305405, - "p90": 175.1680001616478, - "p95": 194.14400309324265, - "p99": 262.30400055646896 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 67.9360032081604, - "p90": 85.66399663686752, - "p95": 95.0080007314682, - "p99": 111.00800335407257 - }, - "combine": { - "p50": 59.93599817156792, - "p90": 70.88000327348709, - "p95": 77.18399912118912, - "p99": 92.03200042247772 - }, - "roundtrip": { - "p50": 112.2559979557991, - "p90": 138.11199367046356, - "p95": 150.2400040626526, - "p99": 209.6319943666458 - }, - "isolatedSum": { - "p50": 127.87200137972832, - "p90": 156.54399991035461, - "p95": 172.19199985265732, - "p99": 203.0400037765503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 82.97599852085114, - "p90": 103.71199995279312, - "p95": 112.15999722480774, - "p99": 135.23200154304504 - }, - "combine": { - "p50": 69.76000219583511, - "p90": 85.05599945783615, - "p95": 93.88799965381622, - "p99": 128.60800325870514 - }, - "roundtrip": { - "p50": 125.56800246238708, - "p90": 148.70400726795197, - "p95": 165.92000424861908, - "p99": 200.3519982099533 - }, - "isolatedSum": { - "p50": 152.73600071668625, - "p90": 188.76799941062927, - "p95": 206.04799687862396, - "p99": 263.8400048017502 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 1, - "recvTokensMax": 32, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 83.39200168848038, - "p90": 109.27999764680862, - "p95": 120.28799951076508, - "p99": 153.18399667739868 - }, - "combine": { - "p50": 69.50400024652481, - "p90": 82.87999778985977, - "p95": 90.27200192213058, - "p99": 100.89600086212158 - }, - "roundtrip": { - "p50": 128.67200374603271, - "p90": 153.53600680828094, - "p95": 162.62400150299072, - "p99": 190.65600633621216 - }, - "isolatedSum": { - "p50": 152.8960019350052, - "p90": 192.1599954366684, - "p95": 210.56000143289566, - "p99": 254.07999753952026 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b2e52442", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", - "colorKey": "h200_06544e53", - "comparisonKey": "57040e121807e028", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:47.649756+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f0e66a15078595b", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272031884", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272031884", - "createdAt": "2026-06-27T00:02:47.649756+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 68.15999746322632, - "p90": 100.60799866914749, - "p95": 110.72000116109848, - "p99": 138.75199854373932 - }, - "combine": { - "p50": 60.70400029420853, - "p90": 72.86400347948074, - "p95": 79.3600007891655, - "p99": 86.11200004816055 - }, - "roundtrip": { - "p50": 116.92799627780914, - "p90": 150.2079963684082, - "p95": 158.6879938840866, - "p99": 184.83200669288635 - }, - "isolatedSum": { - "p50": 128.86399775743484, - "p90": 173.47200214862823, - "p95": 190.08000195026398, - "p99": 224.86399859189987 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 2, - "recvTokensMax": 3, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.29599899053574, - "p90": 104.54399883747101, - "p95": 113.8560026884079, - "p99": 152.99199521541595 - }, - "combine": { - "p50": 61.983998864889145, - "p90": 78.97599786520004, - "p95": 83.5840031504631, - "p99": 98.49599748849869 - }, - "roundtrip": { - "p50": 116.5120005607605, - "p90": 149.88799393177032, - "p95": 163.71199488639832, - "p99": 195.45599818229675 - }, - "isolatedSum": { - "p50": 133.27999785542488, - "p90": 183.51999670267105, - "p95": 197.440005838871, - "p99": 251.48799270391464 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 2, - "recvTokensMax": 6, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.4480003118515, - "p90": 95.04000097513199, - "p95": 104.63999956846237, - "p99": 125.40799379348755 - }, - "combine": { - "p50": 61.664000153541565, - "p90": 73.02399724721909, - "p95": 81.82399719953537, - "p99": 99.58399832248688 - }, - "roundtrip": { - "p50": 117.08799749612808, - "p90": 144.41600441932678, - "p95": 157.72800147533417, - "p99": 314.88001346588135 - }, - "isolatedSum": { - "p50": 134.11200046539307, - "p90": 168.06399822235107, - "p95": 186.46399676799774, - "p99": 224.99199211597443 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 2, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.95199686288834, - "p90": 99.2640033364296, - "p95": 105.8880016207695, - "p99": 122.27199971675873 - }, - "combine": { - "p50": 62.6240000128746, - "p90": 84.25600081682205, - "p95": 90.11200070381165, - "p99": 102.78400033712387 - }, - "roundtrip": { - "p50": 116.15999788045883, - "p90": 150.36800503730774, - "p95": 161.69600188732147, - "p99": 189.08800184726715 - }, - "isolatedSum": { - "p50": 136.57599687576294, - "p90": 183.52000415325165, - "p95": 196.00000232458115, - "p99": 225.0560000538826 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 2, - "recvTokensMax": 24, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 75.87199658155441, - "p90": 96.70399874448776, - "p95": 106.01600259542465, - "p99": 122.94399738311768 - }, - "combine": { - "p50": 62.94400244951248, - "p90": 78.65600287914276, - "p95": 84.73599702119827, - "p99": 96.6079980134964 - }, - "roundtrip": { - "p50": 117.15199798345566, - "p90": 145.11999487876892, - "p95": 153.47200632095337, - "p99": 190.75199961662292 - }, - "isolatedSum": { - "p50": 138.8159990310669, - "p90": 175.36000162363052, - "p95": 190.75199961662292, - "p99": 219.55199539661407 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 2, - "recvTokensMax": 48, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 77.82399654388428, - "p90": 99.32799637317657, - "p95": 108.22399705648422, - "p99": 131.52000308036804 - }, - "combine": { - "p50": 66.3359984755516, - "p90": 80.35200089216232, - "p95": 87.74399757385254, - "p99": 170.23999989032745 - }, - "roundtrip": { - "p50": 119.90399658679962, - "p90": 146.7519998550415, - "p95": 154.4959992170334, - "p99": 167.4879938364029 - }, - "isolatedSum": { - "p50": 144.15999501943588, - "p90": 179.6799972653389, - "p95": 195.96799463033676, - "p99": 301.7600029706955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 2, - "recvTokensMax": 96, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 83.39200168848038, - "p90": 107.93600231409073, - "p95": 117.47200042009354, - "p99": 157.82399475574493 - }, - "combine": { - "p50": 70.17599791288376, - "p90": 82.36800134181976, - "p95": 89.59999680519104, - "p99": 102.7199998497963 - }, - "roundtrip": { - "p50": 127.51999497413635, - "p90": 154.7199934720993, - "p95": 170.04799842834473, - "p99": 201.27999782562256 - }, - "isolatedSum": { - "p50": 153.56799960136414, - "p90": 190.3040036559105, - "p95": 207.07199722528458, - "p99": 260.54399460554123 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 2, - "recvTokensMax": 192, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 96.19200229644775, - "p90": 114.04799669981003, - "p95": 123.83999675512314, - "p99": 167.4560010433197 - }, - "combine": { - "p50": 84.48000252246857, - "p90": 95.87199985980988, - "p95": 99.93600100278854, - "p99": 113.92000317573547 - }, - "roundtrip": { - "p50": 156.3200056552887, - "p90": 175.64800381660461, - "p95": 185.56800484657288, - "p99": 221.15199267864227 - }, - "isolatedSum": { - "p50": 180.67200481891632, - "p90": 209.9199965596199, - "p95": 223.77599775791168, - "p99": 281.3760042190552 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9febd1e2", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", - "colorKey": "h200_9779cb2d", - "comparisonKey": "65013819dd1ccf9e", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:12:58.540972+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s1", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s1", - "routingStep": 1, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "6288a1aa76c20e7", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272345418", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272345418", - "createdAt": "2026-06-27T00:12:58.540972+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.97599720954895, - "p90": 90.91199934482574, - "p95": 99.32799637317657, - "p99": 128.83199751377106 - }, - "combine": { - "p50": 70.27199864387512, - "p90": 80.1599994301796, - "p95": 89.21600133180618, - "p99": 107.07200318574905 - }, - "roundtrip": { - "p50": 125.47199428081512, - "p90": 145.6959992647171, - "p95": 153.31199765205383, - "p99": 184.54399704933167 - }, - "isolatedSum": { - "p50": 145.24799585342407, - "p90": 171.07199877500534, - "p95": 188.54399770498276, - "p99": 235.9040006995201 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.26400071382523, - "p90": 95.93600034713745, - "p95": 101.05600208044052, - "p99": 118.65600198507309 - }, - "combine": { - "p50": 78.8159966468811, - "p90": 86.75199747085571, - "p95": 92.03200042247772, - "p99": 111.84000223875046 - }, - "roundtrip": { - "p50": 139.13600146770477, - "p90": 150.68799257278442, - "p95": 155.20000457763672, - "p99": 181.05599284172058 - }, - "isolatedSum": { - "p50": 162.07999736070633, - "p90": 182.68799781799316, - "p95": 193.08800250291824, - "p99": 230.49600422382355 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.95999783277512, - "p90": 135.51999628543854, - "p95": 140.54399728775024, - "p99": 153.3759981393814 - }, - "combine": { - "p50": 118.30399930477142, - "p90": 126.0479986667633, - "p95": 131.00799918174744, - "p99": 152.5759994983673 - }, - "roundtrip": { - "p50": 222.27199375629425, - "p90": 233.5679978132248, - "p95": 239.3600046634674, - "p99": 254.55999374389648 - }, - "isolatedSum": { - "p50": 243.26399713754654, - "p90": 261.56799495220184, - "p95": 271.5519964694977, - "p99": 305.9519976377487 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f5a9f57f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", - "colorKey": "h200_9479c674", - "comparisonKey": "65013819dd1ccf9e", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:12.398873+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s2", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s2", - "routingStep": 2, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "675e15b52e37958", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272348704", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272348704", - "createdAt": "2026-06-27T00:13:12.398873+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.87199658155441, - "p90": 93.59999746084213, - "p95": 100.19200295209885, - "p99": 114.56000059843063 - }, - "combine": { - "p50": 71.35999947786331, - "p90": 79.64800298213959, - "p95": 85.63199639320374, - "p99": 97.79199957847595 - }, - "roundtrip": { - "p50": 129.2160004377365, - "p90": 148.5760062932968, - "p95": 158.84800255298615, - "p99": 188.22400271892548 - }, - "isolatedSum": { - "p50": 147.23199605941772, - "p90": 173.24800044298172, - "p95": 185.82399934530258, - "p99": 212.35200017690659 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 88.0960002541542, - "p90": 110.78400164842606, - "p95": 121.72800302505493, - "p99": 175.61599612236023 - }, - "combine": { - "p50": 80.70400357246399, - "p90": 92.3520028591156, - "p95": 98.88000041246414, - "p99": 121.34400010108948 - }, - "roundtrip": { - "p50": 141.37600362300873, - "p90": 164.19200599193573, - "p95": 172.95999825000763, - "p99": 193.7599927186966 - }, - "isolatedSum": { - "p50": 168.8000038266182, - "p90": 203.13600450754166, - "p95": 220.60800343751907, - "p99": 296.9599962234497 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 128.92800569534302, - "p90": 140.3840035200119, - "p95": 146.65600657463074, - "p99": 171.10399901866913 - }, - "combine": { - "p50": 120.28799951076508, - "p90": 132.38400220870972, - "p95": 136.76799833774567, - "p99": 159.36000645160675 - }, - "roundtrip": { - "p50": 224.2880016565323, - "p90": 240.1919960975647, - "p95": 248.1279969215393, - "p99": 276.8320143222809 - }, - "isolatedSum": { - "p50": 249.2160052061081, - "p90": 272.7680057287216, - "p95": 283.4240049123764, - "p99": 330.4640054702759 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-13ab64c2", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", - "colorKey": "h200_9579c807", - "comparisonKey": "65013819dd1ccf9e", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:13:19.903361+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s3", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-moving", - "routingLabel": "hotspot-moving@s3", - "routingStep": 3, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "82b2963fc322419", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272352256", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272352256", - "createdAt": "2026-06-27T00:13:19.903361+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.40000027418137, - "p90": 94.7519987821579, - "p95": 101.9200012087822, - "p99": 123.36000055074692 - }, - "combine": { - "p50": 70.20799815654755, - "p90": 82.17599987983704, - "p95": 89.37600255012512, - "p99": 105.56799918413162 - }, - "roundtrip": { - "p50": 125.34399330615997, - "p90": 150.04800260066986, - "p95": 162.6559942960739, - "p99": 177.88800597190857 - }, - "isolatedSum": { - "p50": 144.6079984307289, - "p90": 176.92799866199493, - "p95": 191.29600375890732, - "p99": 228.92799973487854 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 86.46400272846222, - "p90": 103.00800204277039, - "p95": 111.7440015077591, - "p99": 129.95199859142303 - }, - "combine": { - "p50": 79.26400005817413, - "p90": 90.97599983215332, - "p95": 96.47999703884125, - "p99": 115.9679964184761 - }, - "roundtrip": { - "p50": 139.8400068283081, - "p90": 156.6080003976822, - "p95": 163.96799683570862, - "p99": 176.35199427604675 - }, - "isolatedSum": { - "p50": 165.72800278663635, - "p90": 193.9840018749237, - "p95": 208.22399854660034, - "p99": 245.91999500989914 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.83199685811996, - "p90": 138.59200477600098, - "p95": 144.44799721240997, - "p99": 233.88800024986267 - }, - "combine": { - "p50": 119.07199770212173, - "p90": 130.8159977197647, - "p95": 139.71200585365295, - "p99": 152.5759994983673 - }, - "roundtrip": { - "p50": 222.24000096321106, - "p90": 239.84000086784363, - "p95": 250.65600872039795, - "p99": 283.4239900112152 - }, - "isolatedSum": { - "p50": 243.9039945602417, - "p90": 269.4080024957657, - "p95": 284.1600030660629, - "p99": 386.46399974823 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-7c6f809c", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1", - "colorKey": "h200_189562cd", - "comparisonKey": "6b812f29e2dcdef6", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:16.217396+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2ad5ef98d328fa1", - "workloadId": "set:4:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271859196", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271859196", - "createdAt": "2026-06-26T23:57:16.217396+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 71.71200215816498, - "p90": 98.30400347709656, - "p95": 109.69600081443787, - "p99": 295.48799991607666 - }, - "combine": { - "p50": 67.6800012588501, - "p90": 82.07999914884567, - "p95": 88.16000074148178, - "p99": 110.04800349473953 - }, - "roundtrip": { - "p50": 121.95199728012085, - "p90": 153.24799716472626, - "p95": 161.53599321842194, - "p99": 211.16800606250763 - }, - "isolatedSum": { - "p50": 139.39200341701508, - "p90": 180.38400262594223, - "p95": 197.85600155591965, - "p99": 405.5360034108162 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.17599856853485, - "p90": 98.08000177145004, - "p95": 108.0000028014183, - "p99": 146.14400267601013 - }, - "combine": { - "p50": 69.63200122117996, - "p90": 83.13599973917007, - "p95": 89.02399986982346, - "p99": 103.20000350475311 - }, - "roundtrip": { - "p50": 125.40799379348755, - "p90": 153.50399911403656, - "p95": 165.12000560760498, - "p99": 192.83199310302734 - }, - "isolatedSum": { - "p50": 143.8079997897148, - "p90": 181.21600151062012, - "p95": 197.02400267124176, - "p99": 249.34400618076324 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.02399855852127, - "p90": 105.76000064611435, - "p95": 114.46399986743927, - "p99": 129.72800433635712 - }, - "combine": { - "p50": 77.2159993648529, - "p90": 89.34400230646133, - "p95": 95.8079993724823, - "p99": 114.97599631547928 - }, - "roundtrip": { - "p50": 137.472003698349, - "p90": 158.91200304031372, - "p95": 166.20799899101257, - "p99": 185.08799374103546 - }, - "isolatedSum": { - "p50": 158.23999792337418, - "p90": 195.10400295257568, - "p95": 210.27199923992157, - "p99": 244.7040006518364 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.25599992275238, - "p90": 137.02400028705597, - "p95": 144.51199769973755, - "p99": 166.6879951953888 - }, - "combine": { - "p50": 118.30399930477142, - "p90": 130.14400005340576, - "p95": 135.71199774742126, - "p99": 157.6319932937622 - }, - "roundtrip": { - "p50": 220.06399929523468, - "p90": 239.42400515079498, - "p95": 246.17600440979004, - "p99": 313.6639893054962 - }, - "isolatedSum": { - "p50": 242.5599992275238, - "p90": 267.16800034046173, - "p95": 280.2239954471588, - "p99": 324.319988489151 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-13c27f2d", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", - "colorKey": "h200_189562cd", - "comparisonKey": "6b812f29e2dcdef6", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:10.730241+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b6caf944f6bb621", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272100552", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272100552", - "createdAt": "2026-06-27T00:05:10.730241+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 74.14399832487106, - "p90": 98.84800016880035, - "p95": 106.36799782514572, - "p99": 130.46400249004364 - }, - "combine": { - "p50": 68.15999746322632, - "p90": 80.19199967384338, - "p95": 86.30400151014328, - "p99": 99.16800260543823 - }, - "roundtrip": { - "p50": 122.17599898576736, - "p90": 154.4319987297058, - "p95": 165.98400473594666, - "p99": 216.44799411296844 - }, - "isolatedSum": { - "p50": 142.30399578809738, - "p90": 179.03999984264374, - "p95": 192.671999335289, - "p99": 229.63200509548187 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 76.19199901819229, - "p90": 103.5199984908104, - "p95": 114.3679991364479, - "p99": 145.9520012140274 - }, - "combine": { - "p50": 69.2799985408783, - "p90": 83.96799862384796, - "p95": 90.11200070381165, - "p99": 99.7759997844696 - }, - "roundtrip": { - "p50": 125.02400577068329, - "p90": 152.3520052433014, - "p95": 163.58399391174316, - "p99": 191.16799533367157 - }, - "isolatedSum": { - "p50": 145.4719975590706, - "p90": 187.48799711465836, - "p95": 204.47999984025955, - "p99": 245.728000998497 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, - "recvTokensMax": 16, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 79.16799932718277, - "p90": 122.56000190973282, - "p95": 143.8719928264618, - "p99": 228.03199291229248 - }, - "combine": { - "p50": 70.04799693822861, - "p90": 85.1840004324913, - "p95": 89.9519994854927, - "p99": 98.4639972448349 - }, - "roundtrip": { - "p50": 130.0159990787506, - "p90": 166.17600619792938, - "p95": 180.80000579357147, - "p99": 225.63199698925018 - }, - "isolatedSum": { - "p50": 149.21599626541138, - "p90": 207.74400234222412, - "p95": 233.8239923119545, - "p99": 326.4959901571274 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 32, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.52000135183334, - "p90": 99.71199929714203, - "p95": 106.62399977445602, - "p99": 121.24799937009811 - }, - "combine": { - "p50": 70.592001080513, - "p90": 88.19200098514557, - "p95": 93.31200271844864, - "p99": 122.49600142240524 - }, - "roundtrip": { - "p50": 127.29600071907043, - "p90": 156.44800662994385, - "p95": 164.2879992723465, - "p99": 200.76799392700195 - }, - "isolatedSum": { - "p50": 146.11200243234634, - "p90": 187.9040002822876, - "p95": 199.93600249290466, - "p99": 243.74400079250336 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 75.42400062084198, - "p90": 99.32799637317657, - "p95": 107.16799646615982, - "p99": 116.44800007343292 - }, - "combine": { - "p50": 72.7040022611618, - "p90": 89.59999680519104, - "p95": 95.551997423172, - "p99": 149.1200029850006 - }, - "roundtrip": { - "p50": 129.5360028743744, - "p90": 163.42400014400482, - "p95": 173.18400740623474, - "p99": 210.36800742149353 - }, - "isolatedSum": { - "p50": 148.12800288200378, - "p90": 188.92799317836761, - "p95": 202.71999388933182, - "p99": 265.56800305843353 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, - "fanoutMean": 5.3125, - "recvTokensMax": 128, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 84.89599823951721, - "p90": 109.31199789047241, - "p95": 117.15199798345566, - "p99": 152.92799472808838 - }, - "combine": { - "p50": 78.75200361013412, - "p90": 95.36000341176987, - "p95": 99.10400211811066, - "p99": 120.06399780511856 - }, - "roundtrip": { - "p50": 140.73599874973297, - "p90": 167.29600727558136, - "p95": 174.01599884033203, - "p99": 211.07199788093567 - }, - "isolatedSum": { - "p50": 163.64800184965134, - "p90": 204.67200130224228, - "p95": 216.25600010156631, - "p99": 272.99199253320694 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19525632, - "combineLogicalBytes": 19525632, - "fanoutMean": 5.3203125, - "recvTokensMax": 256, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 98.91200065612793, - "p90": 116.19199812412262, - "p95": 121.31199985742569, - "p99": 146.84799313545227 - }, - "combine": { - "p50": 91.36000275611877, - "p90": 105.50399869680405, - "p95": 109.92000252008438, - "p99": 130.65600395202637 - }, - "roundtrip": { - "p50": 168.7999963760376, - "p90": 190.8479928970337, - "p95": 195.23200392723083, - "p99": 233.69599878787994 - }, - "isolatedSum": { - "p50": 190.2720034122467, - "p90": 221.69599682092667, - "p95": 231.23200237751007, - "p99": 277.50399708747864 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38621184, - "combineLogicalBytes": 38621184, - "fanoutMean": 5.26171875, - "recvTokensMax": 512, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 125.72799623012543, - "p90": 143.16800236701965, - "p95": 147.90399372577667, - "p99": 170.71999609470367 - }, - "combine": { - "p50": 120.06399780511856, - "p90": 136.48000359535217, - "p95": 141.9840008020401, - "p99": 148.44800531864166 - }, - "roundtrip": { - "p50": 224.09600019454956, - "p90": 247.8400021791458, - "p95": 254.68799471855164, - "p99": 276.38399600982666 - }, - "isolatedSum": { - "p50": 245.791994035244, - "p90": 279.6480059623718, - "p95": 289.8879945278168, - "p99": 319.16800141334534 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c4fd916e", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", - "colorKey": "h200_80a72891", - "comparisonKey": "abe9d0af26c5a0c0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:13.797855+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "e41f5099a9733ac", - "workloadId": "set:8:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.830078125, - "eplbImbalanceAfter": 1.0007595486111112, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272103776", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272103776", - "createdAt": "2026-06-27T00:05:13.797855+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.86400347948074, - "p90": 99.10400211811066, - "p95": 107.35999792814255, - "p99": 136.48000359535217 - }, - "combine": { - "p50": 67.87200272083282, - "p90": 82.30400085449219, - "p95": 87.55200356245041, - "p99": 92.12800115346909 - }, - "roundtrip": { - "p50": 121.31199985742569, - "p90": 150.62400698661804, - "p95": 160.76800227165222, - "p99": 204.8639953136444 - }, - "isolatedSum": { - "p50": 140.73600620031357, - "p90": 181.40800297260284, - "p95": 194.91200149059296, - "p99": 228.60800474882126 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.43200051784515, - "p90": 101.34399682283401, - "p95": 109.66400057077408, - "p99": 138.43199610710144 - }, - "combine": { - "p50": 67.90400296449661, - "p90": 80.76799660921097, - "p95": 85.37600189447403, - "p99": 95.13600170612335 - }, - "roundtrip": { - "p50": 121.56800180673599, - "p90": 151.67999267578125, - "p95": 162.23999857902527, - "p99": 191.64800643920898 - }, - "isolatedSum": { - "p50": 142.33600348234177, - "p90": 182.11199343204498, - "p95": 195.0400024652481, - "p99": 233.5679978132248 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.36800003051758, - "p90": 106.30399733781815, - "p95": 112.8000020980835, - "p99": 133.34399461746216 - }, - "combine": { - "p50": 69.31199878454208, - "p90": 85.75999736785889, - "p95": 93.05600076913834, - "p99": 108.41599851846695 - }, - "roundtrip": { - "p50": 123.16799908876419, - "p90": 152.16000378131866, - "p95": 162.33600676059723, - "p99": 187.80800700187683 - }, - "isolatedSum": { - "p50": 143.67999881505966, - "p90": 192.06399470567703, - "p95": 205.85600286722183, - "p99": 241.7599931359291 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 23, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.72800260782242, - "p90": 94.94400024414062, - "p95": 102.30399668216705, - "p99": 121.2799996137619 - }, - "combine": { - "p50": 68.44799965620041, - "p90": 81.91999793052673, - "p95": 88.03199976682663, - "p99": 102.52799838781357 - }, - "roundtrip": { - "p50": 124.22399967908859, - "p90": 154.14400398731232, - "p95": 164.60800170898438, - "p99": 177.44000256061554 - }, - "isolatedSum": { - "p50": 142.17600226402283, - "p90": 176.86399817466736, - "p95": 190.33599644899368, - "p99": 223.80799800157547 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4730880, - "combineLogicalBytes": 4730880, - "fanoutMean": 5.15625, - "recvTokensMax": 44, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 72.12799787521362, - "p90": 98.55999797582626, - "p95": 106.01600259542465, - "p99": 130.62399625778198 - }, - "combine": { - "p50": 69.92000341415405, - "p90": 83.29600095748901, - "p95": 89.28000181913376, - "p99": 106.75200074911118 - }, - "roundtrip": { - "p50": 123.77600371837616, - "p90": 149.63200688362122, - "p95": 158.4639996290207, - "p99": 176.54399573802948 - }, - "isolatedSum": { - "p50": 142.04800128936768, - "p90": 181.85599893331528, - "p95": 195.2960044145584, - "p99": 237.37599700689316 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9691136, - "combineLogicalBytes": 9691136, - "fanoutMean": 5.28125, - "recvTokensMax": 88, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 84.48000252246857, - "p90": 110.75200140476227, - "p95": 119.61600184440613, - "p99": 152.41600573062897 - }, - "combine": { - "p50": 77.2479996085167, - "p90": 91.07200056314468, - "p95": 98.36799651384354, - "p99": 130.17599284648895 - }, - "roundtrip": { - "p50": 134.783998131752, - "p90": 159.04000401496887, - "p95": 166.97600483894348, - "p99": 194.36800479888916 - }, - "isolatedSum": { - "p50": 161.72800213098526, - "p90": 201.82400196790695, - "p95": 217.98399835824966, - "p99": 282.5919985771179 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19568640, - "combineLogicalBytes": 19568640, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 97.02400118112564, - "p90": 121.2799996137619, - "p95": 137.95199990272522, - "p99": 238.87999355793 - }, - "combine": { - "p50": 90.94399958848953, - "p90": 106.97600245475769, - "p95": 113.98400366306305, - "p99": 139.3280029296875 - }, - "roundtrip": { - "p50": 161.05599701404572, - "p90": 182.17599391937256, - "p95": 191.23199582099915, - "p99": 230.27199506759644 - }, - "isolatedSum": { - "p50": 187.96800076961517, - "p90": 228.2560020685196, - "p95": 251.93600356578827, - "p99": 378.2079964876175 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38750208, - "combineLogicalBytes": 38750208, - "fanoutMean": 5.279296875, - "recvTokensMax": 348, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.03199690580368, - "p90": 134.39999520778656, - "p95": 142.5279974937439, - "p99": 206.11199736595154 - }, - "combine": { - "p50": 103.04000228643417, - "p90": 118.23999881744385, - "p95": 122.079998254776, - "p99": 137.69599795341492 - }, - "roundtrip": { - "p50": 195.99999487400055, - "p90": 214.33599293231964, - "p95": 224.5440036058426, - "p99": 265.02400636672974 - }, - "isolatedSum": { - "p50": 219.07199919223785, - "p90": 252.6399940252304, - "p95": 264.6079957485199, - "p99": 343.80799531936646 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77342720, - "combineLogicalBytes": 77342720, - "fanoutMean": 5.2685546875, - "recvTokensMax": 687, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-34b2b051", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", - "colorKey": "h200_2a7f12a0", - "comparisonKey": "4dde4e46080a91eb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:14:22.620116+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform·empty-rank", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform·empty-rank", - "routingStep": 0, - "unevenTokens": "empty-rank", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5621f0d4899ad7a", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272386143", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272386143", - "createdAt": "2026-06-27T00:14:22.620116+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 63, - "dispatch": { - "p50": 73.15199822187424, - "p90": 92.76799857616425, - "p95": 100.28800368309021, - "p99": 131.58400356769562 - }, - "combine": { - "p50": 68.96000355482101, - "p90": 83.64800363779068, - "p95": 88.92799913883209, - "p99": 102.11200267076492 - }, - "roundtrip": { - "p50": 121.66400253772736, - "p90": 145.37599682807922, - "p95": 157.18400478363037, - "p99": 189.56799805164337 - }, - "isolatedSum": { - "p50": 142.11200177669525, - "p90": 176.41600221395493, - "p95": 189.2160028219223, - "p99": 233.69600623846054 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4888576, - "combineLogicalBytes": 4888576, - "fanoutMean": 5.412698268890381, - "recvTokensMax": 46, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 252, - "dispatch": { - "p50": 82.43200182914734, - "p90": 96.28800302743912, - "p95": 103.84000092744827, - "p99": 123.07199835777283 - }, - "combine": { - "p50": 76.60800218582153, - "p90": 86.65599673986435, - "p95": 92.28800237178802, - "p99": 107.84000158309937 - }, - "roundtrip": { - "p50": 134.49600338935852, - "p90": 156.031996011734, - "p95": 167.4879938364029, - "p99": 228.12800109386444 - }, - "isolatedSum": { - "p50": 159.04000401496887, - "p90": 182.94399976730347, - "p95": 196.1280032992363, - "p99": 230.9119999408722 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19396608, - "combineLogicalBytes": 19396608, - "fanoutMean": 5.36904764175415, - "recvTokensMax": 180, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1022, - "dispatch": { - "p50": 115.90400338172913, - "p90": 130.49599528312683, - "p95": 136.86400651931763, - "p99": 152.319997549057 - }, - "combine": { - "p50": 108.92800241708755, - "p90": 121.31199985742569, - "p95": 126.8479973077774, - "p99": 144.06399428844452 - }, - "roundtrip": { - "p50": 201.08799636363983, - "p90": 216.5759950876236, - "p95": 222.33599424362183, - "p99": 238.5919988155365 - }, - "isolatedSum": { - "p50": 224.83200579881668, - "p90": 251.80799514055252, - "p95": 263.71200382709503, - "p99": 296.3839918375015 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77529088, - "combineLogicalBytes": 77529088, - "fanoutMean": 5.2915849685668945, - "recvTokensMax": 722, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2de6a2af", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", - "colorKey": "h200_58b5650b", - "comparisonKey": "4dde4e46080a91eb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:14:22.294115+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform·linear", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform·linear", - "routingStep": 0, - "unevenTokens": "linear", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b029c1a6fded400", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272382939", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272382939", - "createdAt": "2026-06-27T00:14:22.294115+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.19999891519547, - "p90": 97.18400239944458, - "p95": 107.84000158309937, - "p99": 136.1279934644699 - }, - "combine": { - "p50": 68.9919963479042, - "p90": 80.48000186681747, - "p95": 86.62399649620056, - "p99": 96.47999703884125 - }, - "roundtrip": { - "p50": 122.27199971675873, - "p90": 154.6880006790161, - "p95": 166.97600483894348, - "p99": 202.78400182724 - }, - "isolatedSum": { - "p50": 144.19199526309967, - "p90": 177.66400426626205, - "p95": 194.46399807929993, - "p99": 232.60799050331116 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 85.69599688053131, - "p90": 105.8880016207695, - "p95": 113.63200098276138, - "p99": 147.2959965467453 - }, - "combine": { - "p50": 78.40000092983246, - "p90": 89.85599875450134, - "p95": 95.93600034713745, - "p99": 106.84800148010254 - }, - "roundtrip": { - "p50": 134.62400436401367, - "p90": 154.81600165367126, - "p95": 166.1120057106018, - "p99": 190.0160014629364 - }, - "isolatedSum": { - "p50": 164.09599781036377, - "p90": 195.74400037527084, - "p95": 209.56800132989883, - "p99": 254.14399802684784 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.76000261306763, - "p90": 139.13600146770477, - "p95": 149.9200016260147, - "p99": 190.94400107860565 - }, - "combine": { - "p50": 114.88000303506851, - "p90": 121.88799679279327, - "p95": 128.1599998474121, - "p99": 155.61600029468536 - }, - "roundtrip": { - "p50": 208.25600624084473, - "p90": 228.57600450515747, - "p95": 237.37600445747375, - "p99": 271.64798974990845 - }, - "isolatedSum": { - "p50": 232.64000564813614, - "p90": 261.02399826049805, - "p95": 278.0800014734268, - "p99": 346.560001373291 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6ff3844b", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", - "colorKey": "h200_580d7b05", - "comparisonKey": "46ecc7ff5ccb7c5d", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:26.011362+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "73351bbcd4d02de", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.078125, - "eplbImbalanceAfter": 1.00048828125, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272020269", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272020269", - "createdAt": "2026-06-27T00:02:26.011362+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 76.76800340414047, - "p90": 99.23200309276581, - "p95": 110.59200018644333, - "p99": 139.71200585365295 - }, - "combine": { - "p50": 68.1919977068901, - "p90": 80.09599894285202, - "p95": 84.06399935483932, - "p99": 98.65599870681763 - }, - "roundtrip": { - "p50": 123.16799908876419, - "p90": 143.90400052070618, - "p95": 155.8080017566681, - "p99": 181.5679967403412 - }, - "isolatedSum": { - "p50": 144.96000111103058, - "p90": 179.32800203561783, - "p95": 194.65599954128265, - "p99": 238.36800456047058 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 516096, - "combineLogicalBytes": 516096, - "fanoutMean": 4.5, - "recvTokensMax": 6, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 76.09599828720093, - "p90": 102.55999863147736, - "p95": 112.70400136709213, - "p99": 138.5599970817566 - }, - "combine": { - "p50": 69.95200365781784, - "p90": 79.83999699354172, - "p95": 83.39200168848038, - "p99": 91.93599969148636 - }, - "roundtrip": { - "p50": 125.791996717453, - "p90": 143.96800100803375, - "p95": 156.67200088500977, - "p99": 176.5120029449463 - }, - "isolatedSum": { - "p50": 146.04800194501877, - "p90": 182.39999562501907, - "p95": 196.0960030555725, - "p99": 230.49599677324295 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1089536, - "combineLogicalBytes": 1089536, - "fanoutMean": 4.75, - "recvTokensMax": 11, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 77.05599814653397, - "p90": 99.0080013871193, - "p95": 106.6880002617836, - "p99": 139.77600634098053 - }, - "combine": { - "p50": 70.04799693822861, - "p90": 82.49600231647491, - "p95": 85.56800335645676, - "p99": 100.09600222110748 - }, - "roundtrip": { - "p50": 130.17599284648895, - "p90": 161.6320013999939, - "p95": 169.24799978733063, - "p99": 194.43200528621674 - }, - "isolatedSum": { - "p50": 147.10399508476257, - "p90": 181.5040037035942, - "p95": 192.25600361824036, - "p99": 239.872008562088 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2207744, - "combineLogicalBytes": 2207744, - "fanoutMean": 4.8125, - "recvTokensMax": 23, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 77.34400033950806, - "p90": 90.94399958848953, - "p95": 97.9200005531311, - "p99": 113.18399757146835 - }, - "combine": { - "p50": 71.19999825954437, - "p90": 79.9039974808693, - "p95": 84.06399935483932, - "p99": 113.02399635314941 - }, - "roundtrip": { - "p50": 130.0159990787506, - "p90": 153.08800339698792, - "p95": 165.24800658226013, - "p99": 195.3279972076416 - }, - "isolatedSum": { - "p50": 148.54399859905243, - "p90": 170.84799706935883, - "p95": 181.98399990797043, - "p99": 226.20799392461777 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4558848, - "combineLogicalBytes": 4558848, - "fanoutMean": 4.96875, - "recvTokensMax": 46, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 83.48800241947174, - "p90": 106.20799660682678, - "p95": 114.78400230407715, - "p99": 256.0960054397583 - }, - "combine": { - "p50": 72.9919970035553, - "p90": 86.17600053548813, - "p95": 91.51999652385712, - "p99": 108.83200168609619 - }, - "roundtrip": { - "p50": 132.9919993877411, - "p90": 166.24000668525696, - "p95": 176.35199427604675, - "p99": 203.5519927740097 - }, - "isolatedSum": { - "p50": 156.47999942302704, - "p90": 192.3839971423149, - "p95": 206.30399882793427, - "p99": 364.9280071258545 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9347072, - "combineLogicalBytes": 9347072, - "fanoutMean": 5.09375, - "recvTokensMax": 86, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 89.1840010881424, - "p90": 104.51199859380722, - "p95": 112.44799941778183, - "p99": 135.5839967727661 - }, - "combine": { - "p50": 79.3600007891655, - "p90": 87.26400136947632, - "p95": 92.73599833250046, - "p99": 111.32799834012985 - }, - "roundtrip": { - "p50": 139.90400731563568, - "p90": 159.2639982700348, - "p95": 169.3439930677414, - "p99": 189.02400135993958 - }, - "isolatedSum": { - "p50": 168.5440018773079, - "p90": 191.77599996328354, - "p95": 205.1839977502823, - "p99": 246.91199511289597 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 18995200, - "combineLogicalBytes": 18995200, - "fanoutMean": 5.17578125, - "recvTokensMax": 178, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 98.2080027461052, - "p90": 113.40799927711487, - "p95": 119.99999731779099, - "p99": 140.19200205802917 - }, - "combine": { - "p50": 89.12000060081482, - "p90": 98.7199991941452, - "p95": 102.7199998497963, - "p99": 111.455999314785 - }, - "roundtrip": { - "p50": 162.7199947834015, - "p90": 182.0800006389618, - "p95": 189.60000574588776, - "p99": 210.4640007019043 - }, - "isolatedSum": { - "p50": 187.32800334692, - "p90": 212.12799847126007, - "p95": 222.71999716758728, - "p99": 251.64800137281418 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38291456, - "combineLogicalBytes": 38291456, - "fanoutMean": 5.216796875, - "recvTokensMax": 348, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.46400117874146, - "p90": 136.51199638843536, - "p95": 143.64799857139587, - "p99": 156.41599893569946 - }, - "combine": { - "p50": 106.33599758148193, - "p90": 117.91999638080597, - "p95": 122.079998254776, - "p99": 132.09599256515503 - }, - "roundtrip": { - "p50": 200.15999674797058, - "p90": 217.72800385951996, - "p95": 223.29600155353546, - "p99": 246.87999486923218 - }, - "isolatedSum": { - "p50": 228.7999987602234, - "p90": 254.43199276924133, - "p95": 265.7279968261719, - "p99": 288.5119915008545 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77113344, - "combineLogicalBytes": 77113344, - "fanoutMean": 5.2529296875, - "recvTokensMax": 685, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f68ea439", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h200_b6aa6110", - "comparisonKey": "5971fba5c9d29fa7", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:10.278228+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272042133", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272042133", - "createdAt": "2026-06-27T00:03:10.278228+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 71.68000191450119, - "p90": 93.44000369310379, - "p95": 102.68799960613251, - "p99": 140.1599943637848 - }, - "combine": { - "p50": 67.4239993095398, - "p90": 79.45600152015686, - "p95": 86.496002972126, - "p99": 106.01600259542465 - }, - "roundtrip": { - "p50": 119.4240003824234, - "p90": 146.59200608730316, - "p95": 155.07200360298157, - "p99": 181.34400248527527 - }, - "isolatedSum": { - "p50": 139.10400122404099, - "p90": 172.89600521326065, - "p95": 189.18400257825851, - "p99": 246.17599695920944 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.69600236415863, - "p90": 100.92800110578537, - "p95": 109.66400057077408, - "p99": 146.04799449443817 - }, - "combine": { - "p50": 68.28799843788147, - "p90": 80.76799660921097, - "p95": 85.69599688053131, - "p99": 152.8320014476776 - }, - "roundtrip": { - "p50": 121.15199863910675, - "p90": 147.77599275112152, - "p95": 155.71199357509613, - "p99": 193.7599927186966 - }, - "isolatedSum": { - "p50": 141.9840008020401, - "p90": 181.69599771499634, - "p95": 195.3599974513054, - "p99": 298.8799959421158 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.73600250482559, - "p90": 96.12800180912018, - "p95": 103.90400141477585, - "p99": 168.06399822235107 - }, - "combine": { - "p50": 66.91200286149979, - "p90": 78.65600287914276, - "p95": 82.2720006108284, - "p99": 94.71999853849411 - }, - "roundtrip": { - "p50": 118.9119964838028, - "p90": 143.8080072402954, - "p95": 155.71199357509613, - "p99": 209.6959948539734 - }, - "isolatedSum": { - "p50": 139.64800536632538, - "p90": 174.78400468826294, - "p95": 186.17600202560425, - "p99": 262.7839967608452 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.56800138950348, - "p90": 93.82399916648865, - "p95": 101.47199779748917, - "p99": 132.7359974384308 - }, - "combine": { - "p50": 67.6800012588501, - "p90": 79.6160027384758, - "p95": 83.23200047016144, - "p99": 101.21600329875946 - }, - "roundtrip": { - "p50": 119.26399916410446, - "p90": 145.24799585342407, - "p95": 154.4959992170334, - "p99": 191.71200692653656 - }, - "isolatedSum": { - "p50": 141.24800264835358, - "p90": 173.44000190496445, - "p95": 184.7039982676506, - "p99": 233.95200073719025 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 77.66400277614594, - "p90": 104.12800312042236, - "p95": 114.30399864912033, - "p99": 140.6400054693222 - }, - "combine": { - "p50": 70.8480030298233, - "p90": 84.32000130414963, - "p95": 90.7519981265068, - "p99": 122.27199971675873 - }, - "roundtrip": { - "p50": 125.95200538635254, - "p90": 157.151997089386, - "p95": 166.81599617004395, - "p99": 207.23199844360352 - }, - "isolatedSum": { - "p50": 148.51200580596924, - "p90": 188.448004424572, - "p95": 205.05599677562714, - "p99": 262.91200518608093 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 82.94399827718735, - "p90": 101.56799852848053, - "p95": 110.88000237941742, - "p99": 162.11199760437012 - }, - "combine": { - "p50": 76.31999999284744, - "p90": 87.67999708652496, - "p95": 90.68799763917923, - "p99": 98.33600372076035 - }, - "roundtrip": { - "p50": 135.71199774742126, - "p90": 155.20000457763672, - "p95": 165.6000018119812, - "p99": 222.27199375629425 - }, - "isolatedSum": { - "p50": 159.2639982700348, - "p90": 189.2479956150055, - "p95": 201.56800001859665, - "p99": 260.44800132513046 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 96.76799923181534, - "p90": 109.11999642848969, - "p95": 116.5120005607605, - "p99": 174.01599884033203 - }, - "combine": { - "p50": 86.17600053548813, - "p90": 97.31200337409973, - "p95": 103.07200253009796, - "p99": 120.64000219106674 - }, - "roundtrip": { - "p50": 160.67199409008026, - "p90": 175.61599612236023, - "p95": 181.40800297260284, - "p99": 218.9439982175827 - }, - "isolatedSum": { - "p50": 182.94399976730347, - "p90": 206.43199980258942, - "p95": 219.58400309085846, - "p99": 294.6560010313988 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.3360002040863, - "p90": 132.47999548912048, - "p95": 135.51999628543854, - "p99": 155.90399503707886 - }, - "combine": { - "p50": 112.86400258541107, - "p90": 121.8239963054657, - "p95": 126.62400305271149, - "p99": 136.76799833774567 - }, - "roundtrip": { - "p50": 214.52799439430237, - "p90": 232.92799293994904, - "p95": 243.42399835586548, - "p99": 306.97599053382874 - }, - "isolatedSum": { - "p50": 235.20000278949738, - "p90": 254.30399179458618, - "p95": 262.14399933815, - "p99": 292.6719933748245 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9e42f709", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", - "colorKey": "h200_b6aa6110", - "comparisonKey": "5971fba5c9d29fa7", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:48.444120+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "1fa7fe74d0e30a3", - "workloadId": "set:4:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271844665", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271844665", - "createdAt": "2026-06-26T23:56:48.444120+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 71.3919997215271, - "p90": 101.1200025677681, - "p95": 115.1999980211258, - "p99": 144.44799721240997 - }, - "combine": { - "p50": 64.4799992442131, - "p90": 82.78399705886841, - "p95": 91.48799628019333, - "p99": 104.67199981212616 - }, - "roundtrip": { - "p50": 117.98399686813354, - "p90": 156.22399747371674, - "p95": 165.3120070695877, - "p99": 193.12000274658203 - }, - "isolatedSum": { - "p50": 135.8719989657402, - "p90": 183.9039996266365, - "p95": 206.68799430131912, - "p99": 249.11999702453613 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.08799773454666, - "p90": 95.77599912881851, - "p95": 105.66399991512299, - "p99": 147.32800424098969 - }, - "combine": { - "p50": 67.6800012588501, - "p90": 82.59200304746628, - "p95": 89.02399986982346, - "p99": 108.64000022411346 - }, - "roundtrip": { - "p50": 121.2799996137619, - "p90": 152.63999998569489, - "p95": 167.4560010433197, - "p99": 201.7280012369156 - }, - "isolatedSum": { - "p50": 140.76799899339676, - "p90": 178.3680021762848, - "p95": 194.68799978494644, - "p99": 255.96800446510315 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.66400343179703, - "p90": 98.30400347709656, - "p95": 109.98400300741196, - "p99": 134.14399325847626 - }, - "combine": { - "p50": 76.31999999284744, - "p90": 89.21600133180618, - "p95": 95.90400010347366, - "p99": 118.6240017414093 - }, - "roundtrip": { - "p50": 136.00000739097595, - "p90": 157.53600001335144, - "p95": 172.7360039949417, - "p99": 212.25599944591522 - }, - "isolatedSum": { - "p50": 157.98400342464447, - "p90": 187.52000480890274, - "p95": 205.88800311088562, - "p99": 252.76799499988556 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.52800166606903, - "p90": 136.6720050573349, - "p95": 148.15999567508698, - "p99": 160.89600324630737 - }, - "combine": { - "p50": 112.03200370073318, - "p90": 125.21600723266602, - "p95": 132.4480026960373, - "p99": 149.02399480342865 - }, - "roundtrip": { - "p50": 211.58400177955627, - "p90": 233.2800030708313, - "p95": 244.159996509552, - "p99": 292.03200340270996 - }, - "isolatedSum": { - "p50": 234.56000536680222, - "p90": 261.8880122900009, - "p95": 280.60799837112427, - "p99": 309.919998049736 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b1823392", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", - "colorKey": "h200_c5b3365a", - "comparisonKey": "73e84f1c938d90c0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:44.997855+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "22da8b58646609c", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272086516", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272086516", - "createdAt": "2026-06-27T00:04:44.997855+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 64.92800265550613, - "p90": 98.62399846315384, - "p95": 109.27999764680862, - "p99": 182.23999440670013 - }, - "combine": { - "p50": 60.92799827456474, - "p90": 75.42400062084198, - "p95": 80.6720033288002, - "p99": 96.54399752616882 - }, - "roundtrip": { - "p50": 116.57600104808807, - "p90": 152.44799852371216, - "p95": 162.81600296497345, - "p99": 179.51999604701996 - }, - "isolatedSum": { - "p50": 125.85600093007088, - "p90": 174.04799908399582, - "p95": 189.95200097560883, - "p99": 278.78399193286896 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 65.50399959087372, - "p90": 96.47999703884125, - "p95": 104.86400127410889, - "p99": 137.56799697875977 - }, - "combine": { - "p50": 59.808000922203064, - "p90": 72.83200323581696, - "p95": 78.84799689054489, - "p99": 92.19200164079666 - }, - "roundtrip": { - "p50": 110.97600311040878, - "p90": 140.00000059604645, - "p95": 150.87999403476715, - "p99": 177.72799730300903 - }, - "isolatedSum": { - "p50": 125.31200051307678, - "p90": 169.3120002746582, - "p95": 183.71199816465378, - "p99": 229.75999861955643 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.18399846553802, - "p90": 93.63199770450592, - "p95": 108.67200046777725, - "p99": 126.01600587368011 - }, - "combine": { - "p50": 62.20800057053566, - "p90": 70.52800059318542, - "p95": 78.07999849319458, - "p99": 100.51199793815613 - }, - "roundtrip": { - "p50": 116.67200177907944, - "p90": 144.1279947757721, - "p95": 158.91200304031372, - "p99": 186.17600202560425 - }, - "isolatedSum": { - "p50": 135.39199903607368, - "p90": 164.15999829769135, - "p95": 186.75199896097183, - "p99": 226.52800381183624 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 72.64000177383423, - "p90": 98.39999675750732, - "p95": 103.93600165843964, - "p99": 132.28799402713776 - }, - "combine": { - "p50": 60.99199876189232, - "p90": 72.06399738788605, - "p95": 79.52000200748444, - "p99": 91.5519967675209 - }, - "roundtrip": { - "p50": 118.94399672746658, - "p90": 150.30400454998016, - "p95": 160.3199988603592, - "p99": 178.78399789333344 - }, - "isolatedSum": { - "p50": 133.63200053572655, - "p90": 170.46399414539337, - "p95": 183.45600366592407, - "p99": 223.83999079465866 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 72.4480003118515, - "p90": 85.9839990735054, - "p95": 96.99200093746185, - "p99": 122.17599898576736 - }, - "combine": { - "p50": 67.10399687290192, - "p90": 77.11999863386154, - "p95": 83.74399691820145, - "p99": 104.16000336408615 - }, - "roundtrip": { - "p50": 118.40000003576279, - "p90": 138.11199367046356, - "p95": 145.11999487876892, - "p99": 157.18400478363037 - }, - "isolatedSum": { - "p50": 139.55199718475342, - "p90": 163.10399770736694, - "p95": 180.7359978556633, - "p99": 226.33600234985352 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 76.86399668455124, - "p90": 97.31200337409973, - "p95": 106.36799782514572, - "p99": 120.25599926710129 - }, - "combine": { - "p50": 69.47200000286102, - "p90": 82.78399705886841, - "p95": 87.80799806118011, - "p99": 102.9760017991066 - }, - "roundtrip": { - "p50": 128.25599312782288, - "p90": 152.63999998569489, - "p95": 163.10399770736694, - "p99": 197.37599790096283 - }, - "isolatedSum": { - "p50": 146.33599668741226, - "p90": 180.09600043296814, - "p95": 194.17599588632584, - "p99": 223.23200106620789 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 95.8079993724823, - "p90": 111.68000102043152, - "p95": 120.99199742078781, - "p99": 207.61600136756897 - }, - "combine": { - "p50": 81.53600245714188, - "p90": 93.75999867916107, - "p95": 102.24000364542007, - "p99": 131.1360001564026 - }, - "roundtrip": { - "p50": 155.96799552440643, - "p90": 171.23199999332428, - "p95": 179.9360066652298, - "p99": 195.93599438667297 - }, - "isolatedSum": { - "p50": 177.34400182962418, - "p90": 205.4399996995926, - "p95": 223.23200106620789, - "p99": 338.75200152397156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11210752, - "combineLogicalBytes": 11210752, - "fanoutMean": 1.52734375, - "recvTokensMax": 512, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.49600076675415, - "p90": 133.40799510478973, - "p95": 137.472003698349, - "p99": 168.09600591659546 - }, - "combine": { - "p50": 108.51199924945831, - "p90": 121.37600034475327, - "p95": 125.18399953842163, - "p99": 135.74400544166565 - }, - "roundtrip": { - "p50": 205.76000213623047, - "p90": 222.78399765491486, - "p95": 227.84000635147095, - "p99": 288.2879972457886 - }, - "isolatedSum": { - "p50": 227.00800001621246, - "p90": 254.783995449543, - "p95": 262.65600323677063, - "p99": 303.8400113582611 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1cebdc77", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec", - "colorKey": "h200_c5b3365a", - "comparisonKey": "73e84f1c938d90c0", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:04.169845+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "47fddabb3277bec", - "workloadId": "set:4:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271852422", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271852422", - "createdAt": "2026-06-26T23:57:04.169845+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.27199864387512, - "p90": 103.64799946546555, - "p95": 125.791996717453, - "p99": 208.15999805927277 - }, - "combine": { - "p50": 61.95199862122536, - "p90": 75.45600086450577, - "p95": 80.6720033288002, - "p99": 99.07200187444687 - }, - "roundtrip": { - "p50": 117.37599968910217, - "p90": 144.83200013637543, - "p95": 152.73599326610565, - "p99": 179.58399653434753 - }, - "isolatedSum": { - "p50": 132.22399726510048, - "p90": 179.1040003299713, - "p95": 206.4640000462532, - "p99": 307.23199993371964 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.26399940252304, - "p90": 98.11200201511383, - "p95": 106.175996363163, - "p99": 138.3039951324463 - }, - "combine": { - "p50": 63.90400230884552, - "p90": 78.43200117349625, - "p95": 83.99999886751175, - "p99": 94.11200135946274 - }, - "roundtrip": { - "p50": 119.48800086975098, - "p90": 151.16800367832184, - "p95": 161.53599321842194, - "p99": 214.4320011138916 - }, - "isolatedSum": { - "p50": 139.16800171136856, - "p90": 176.54400318861008, - "p95": 190.17599523067474, - "p99": 232.41599649190903 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 82.17599987983704, - "p90": 105.98400235176086, - "p95": 113.11999708414078, - "p99": 133.18400084972382 - }, - "combine": { - "p50": 72.15999811887741, - "p90": 88.76799792051315, - "p95": 93.28000247478485, - "p99": 116.57600104808807 - }, - "roundtrip": { - "p50": 134.49600338935852, - "p90": 162.432000041008, - "p95": 173.47200214862823, - "p99": 268.8640058040619 - }, - "isolatedSum": { - "p50": 154.33599799871445, - "p90": 194.75200027227402, - "p95": 206.39999955892563, - "p99": 249.7600018978119 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5533696, - "combineLogicalBytes": 5533696, - "fanoutMean": 1.5078125, - "recvTokensMax": 256, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 119.58400160074234, - "p90": 131.96800649166107, - "p95": 136.3839954137802, - "p99": 154.59200739860535 - }, - "combine": { - "p50": 109.31199789047241, - "p90": 120.67200243473053, - "p95": 125.69600343704224, - "p99": 135.3919953107834 - }, - "roundtrip": { - "p50": 207.58399367332458, - "p90": 222.91199862957, - "p95": 232.86400735378265, - "p99": 284.89598631858826 - }, - "isolatedSum": { - "p50": 228.89599949121475, - "p90": 252.6400089263916, - "p95": 262.07999885082245, - "p99": 289.98400270938873 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-78ae7872", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", - "colorKey": "h200_06aa1194", - "comparisonKey": "85dbd46cb77d1362", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:54.232728+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5a3054422534366", - "workloadId": "set:8:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.40625, - "eplbImbalanceAfter": 1.0004417782738093, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272090308", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272090308", - "createdAt": "2026-06-27T00:04:54.232728+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 69.21599805355072, - "p90": 83.55200290679932, - "p95": 92.83199906349182, - "p99": 110.75200140476227 - }, - "combine": { - "p50": 67.45599955320358, - "p90": 76.12799853086472, - "p95": 81.53600245714188, - "p99": 88.54400366544724 - }, - "roundtrip": { - "p50": 122.079998254776, - "p90": 140.4159963130951, - "p95": 148.25600385665894, - "p99": 178.3680021762848 - }, - "isolatedSum": { - "p50": 136.6719976067543, - "p90": 159.68000143766403, - "p95": 174.3680015206337, - "p99": 199.2960050702095 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 559104, - "combineLogicalBytes": 559104, - "fanoutMean": 4.875, - "recvTokensMax": 6, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 72.03199714422226, - "p90": 80.57600259780884, - "p95": 86.40000224113464, - "p99": 97.34400361776352 - }, - "combine": { - "p50": 67.61600077152252, - "p90": 75.13599842786789, - "p95": 79.0719985961914, - "p99": 86.40000224113464 - }, - "roundtrip": { - "p50": 120.7360029220581, - "p90": 138.49599659442902, - "p95": 162.01600432395935, - "p99": 265.21599292755127 - }, - "isolatedSum": { - "p50": 139.64799791574478, - "p90": 155.71200102567673, - "p95": 165.47200083732605, - "p99": 183.74400585889816 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1175552, - "combineLogicalBytes": 1175552, - "fanoutMean": 5.125, - "recvTokensMax": 12, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.30399954319, - "p90": 86.91199868917465, - "p95": 100.12800246477127, - "p99": 123.48800152540207 - }, - "combine": { - "p50": 68.51200014352798, - "p90": 77.85599678754807, - "p95": 84.70399677753448, - "p99": 112.15999722480774 - }, - "roundtrip": { - "p50": 121.31199985742569, - "p90": 140.25600254535675, - "p95": 151.64799988269806, - "p99": 177.66399681568146 - }, - "isolatedSum": { - "p50": 142.815999686718, - "p90": 164.76799547672272, - "p95": 184.83199924230576, - "p99": 235.6479987502098 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2465792, - "combineLogicalBytes": 2465792, - "fanoutMean": 5.375, - "recvTokensMax": 25, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.65600222349167, - "p90": 88.28800171613693, - "p95": 97.98400104045868, - "p99": 121.2799996137619 - }, - "combine": { - "p50": 69.56800073385239, - "p90": 78.87999713420868, - "p95": 83.16799998283386, - "p99": 94.84799951314926 - }, - "roundtrip": { - "p50": 126.36800110340118, - "p90": 164.57599401474, - "p95": 172.44799435138702, - "p99": 196.22400403022766 - }, - "isolatedSum": { - "p50": 144.22400295734406, - "p90": 167.1679988503456, - "p95": 181.15200102329254, - "p99": 216.12799912691116 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4988928, - "combineLogicalBytes": 4988928, - "fanoutMean": 5.4375, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.53600114583969, - "p90": 88.0960002541542, - "p95": 96.03200107812881, - "p99": 121.2799996137619 - }, - "combine": { - "p50": 70.39999961853027, - "p90": 78.91199737787247, - "p95": 86.36800199747086, - "p99": 98.9760011434555 - }, - "roundtrip": { - "p50": 125.47199428081512, - "p90": 143.96800100803375, - "p95": 153.6960005760193, - "p99": 172.8000044822693 - }, - "isolatedSum": { - "p50": 143.93600076436996, - "p90": 167.00799763202667, - "p95": 182.40000307559967, - "p99": 220.2560007572174 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9791488, - "combineLogicalBytes": 9791488, - "fanoutMean": 5.3359375, - "recvTokensMax": 94, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.02399855852127, - "p90": 94.71999853849411, - "p95": 106.11200332641602, - "p99": 144.6399986743927 - }, - "combine": { - "p50": 76.7040029168129, - "p90": 88.54400366544724, - "p95": 96.76799923181534, - "p99": 107.00800269842148 - }, - "roundtrip": { - "p50": 135.29600203037262, - "p90": 158.78400206565857, - "p95": 170.84799706935883, - "p99": 241.43999814987183 - }, - "isolatedSum": { - "p50": 157.72800147533417, - "p90": 183.26400220394135, - "p95": 202.88000255823135, - "p99": 251.64800137281418 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19410944, - "combineLogicalBytes": 19410944, - "fanoutMean": 5.2890625, - "recvTokensMax": 178, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 92.96000003814697, - "p90": 105.47199845314026, - "p95": 115.80800265073776, - "p99": 153.56799960136414 - }, - "combine": { - "p50": 86.87999844551086, - "p90": 96.03200107812881, - "p95": 102.33599692583084, - "p99": 112.67200112342834 - }, - "roundtrip": { - "p50": 158.4320068359375, - "p90": 171.26399278640747, - "p95": 179.967999458313, - "p99": 206.43199980258942 - }, - "isolatedSum": { - "p50": 179.83999848365784, - "p90": 201.50399953126907, - "p95": 218.1439995765686, - "p99": 266.2400007247925 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38678528, - "combineLogicalBytes": 38678528, - "fanoutMean": 5.26953125, - "recvTokensMax": 360, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.18399822711945, - "p90": 127.68000364303589, - "p95": 131.3599944114685, - "p99": 140.44800400733948 - }, - "combine": { - "p50": 104.3199971318245, - "p90": 113.76000195741653, - "p95": 121.98399752378464, - "p99": 137.28000223636627 - }, - "roundtrip": { - "p50": 196.28800451755524, - "p90": 208.95999670028687, - "p95": 216.5759950876236, - "p99": 241.56799912452698 - }, - "isolatedSum": { - "p50": 221.50399535894394, - "p90": 241.44000560045242, - "p95": 253.34399193525314, - "p99": 277.72800624370575 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 77285376, - "fanoutMean": 5.2646484375, - "recvTokensMax": 704, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4fa5aaad", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", - "colorKey": "h200_6a794fcd", - "comparisonKey": "50f5858697d33730", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:36.902996+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "f3df51be7d5c32b", - "workloadId": "set:8:289b7f9c14292e96", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272056705", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272056705", - "createdAt": "2026-06-27T00:03:36.902996+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.98399710655212, - "p90": 98.84800016880035, - "p95": 105.98400235176086, - "p99": 125.21600723266602 - }, - "combine": { - "p50": 68.96000355482101, - "p90": 81.66400343179703, - "p95": 86.496002972126, - "p99": 102.88000106811523 - }, - "roundtrip": { - "p50": 119.93599683046341, - "p90": 147.93600142002106, - "p95": 157.53600001335144, - "p99": 168.09600591659546 - }, - "isolatedSum": { - "p50": 142.94400066137314, - "p90": 180.51200360059738, - "p95": 192.48000532388687, - "p99": 228.09600830078125 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.08799773454666, - "p90": 88.73599767684937, - "p95": 100.09600222110748, - "p99": 118.20799857378006 - }, - "combine": { - "p50": 68.35199892520905, - "p90": 77.08799839019775, - "p95": 82.84799754619598, - "p99": 91.61599725484848 - }, - "roundtrip": { - "p50": 123.3920007944107, - "p90": 151.296004652977, - "p95": 158.84800255298615, - "p99": 186.27199530601501 - }, - "isolatedSum": { - "p50": 141.4399966597557, - "p90": 165.82399606704712, - "p95": 182.94399976730347, - "p99": 209.82399582862854 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1103872, - "combineLogicalBytes": 1103872, - "fanoutMean": 4.8125, - "recvTokensMax": 16, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 76.51200145483017, - "p90": 102.88000106811523, - "p95": 109.11999642848969, - "p99": 128.31999361515045 - }, - "combine": { - "p50": 69.82400268316269, - "p90": 81.44000172615051, - "p95": 86.75199747085571, - "p99": 98.04800152778625 - }, - "roundtrip": { - "p50": 126.14400684833527, - "p90": 157.6640009880066, - "p95": 167.84000396728516, - "p99": 190.88000059127808 - }, - "isolatedSum": { - "p50": 146.33600413799286, - "p90": 184.32000279426575, - "p95": 195.8719938993454, - "p99": 226.3679951429367 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2250752, - "combineLogicalBytes": 2250752, - "fanoutMean": 4.90625, - "recvTokensMax": 31, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.9039968252182, - "p90": 97.4079966545105, - "p95": 101.88800096511841, - "p99": 117.60000139474869 - }, - "combine": { - "p50": 70.62400132417679, - "p90": 84.73599702119827, - "p95": 90.11200070381165, - "p99": 107.42399841547012 - }, - "roundtrip": { - "p50": 125.69600343704224, - "p90": 150.751993060112, - "p95": 158.30400586128235, - "p99": 175.4239946603775 - }, - "isolatedSum": { - "p50": 146.527998149395, - "p90": 182.14399367570877, - "p95": 192.00000166893005, - "p99": 225.0239998102188 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4472832, - "combineLogicalBytes": 4472832, - "fanoutMean": 4.875, - "recvTokensMax": 62, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 76.60800218582153, - "p90": 93.47199648618698, - "p95": 101.9200012087822, - "p99": 109.82400178909302 - }, - "combine": { - "p50": 71.26399874687195, - "p90": 84.09599959850311, - "p95": 88.32000195980072, - "p99": 100.89600086212158 - }, - "roundtrip": { - "p50": 128.25599312782288, - "p90": 152.96000242233276, - "p95": 160.76800227165222, - "p99": 201.92000269889832 - }, - "isolatedSum": { - "p50": 147.87200093269348, - "p90": 177.5679960846901, - "p95": 190.24000316858292, - "p99": 210.7200026512146 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8888320, - "combineLogicalBytes": 8888320, - "fanoutMean": 4.84375, - "recvTokensMax": 124, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.8719978928566, - "p90": 102.55999863147736, - "p95": 108.92800241708755, - "p99": 121.76000326871872 - }, - "combine": { - "p50": 78.43200117349625, - "p90": 91.839998960495, - "p95": 96.57599776983261, - "p99": 108.12799632549286 - }, - "roundtrip": { - "p50": 138.46400380134583, - "p90": 160.19199788570404, - "p95": 168.09600591659546, - "p99": 186.14399433135986 - }, - "isolatedSum": { - "p50": 162.30399906635284, - "p90": 194.39999759197235, - "p95": 205.50400018692017, - "p99": 229.88799959421158 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 17733632, - "combineLogicalBytes": 17733632, - "fanoutMean": 4.83203125, - "recvTokensMax": 248, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 99.29600358009338, - "p90": 117.79200285673141, - "p95": 125.44000148773193, - "p99": 154.01600301265717 - }, - "combine": { - "p50": 90.14400094747543, - "p90": 102.91200131177902, - "p95": 110.17599701881409, - "p99": 119.35999989509583 - }, - "roundtrip": { - "p50": 166.75199568271637, - "p90": 185.7600063085556, - "p95": 193.02399456501007, - "p99": 220.60799598693848 - }, - "isolatedSum": { - "p50": 189.44000452756882, - "p90": 220.70400416851044, - "p95": 235.61599850654602, - "p99": 273.376002907753 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 35424256, - "combineLogicalBytes": 35424256, - "fanoutMean": 4.826171875, - "recvTokensMax": 492, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.4800016283989, - "p90": 135.93600690364838, - "p95": 141.184002161026, - "p99": 167.23200678825378 - }, - "combine": { - "p50": 115.68000167608261, - "p90": 127.29600071907043, - "p95": 131.99999928474426, - "p99": 150.78400075435638 - }, - "roundtrip": { - "p50": 216.95999801158905, - "p90": 232.80000686645508, - "p95": 238.27199637889862, - "p99": 261.02399826049805 - }, - "isolatedSum": { - "p50": 240.1600033044815, - "p90": 263.2320076227188, - "p95": 273.18400144577026, - "p99": 318.01600754261017 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-ffad9f17", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", - "colorKey": "h200_b2ffaf91", - "comparisonKey": "b3b8e5cc27948267", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:43.326778+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "16babcaf4204243", - "workloadId": "set:8:289b7f9c14292e96", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.61328125, - "eplbImbalanceAfter": 1.0009114583333334, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272060649", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272060649", - "createdAt": "2026-06-27T00:03:43.326778+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.9919970035553, - "p90": 96.73599898815155, - "p95": 102.7199998497963, - "p99": 128.83199751377106 - }, - "combine": { - "p50": 68.15999746322632, - "p90": 81.05599880218506, - "p95": 86.40000224113464, - "p99": 94.91200000047684 - }, - "roundtrip": { - "p50": 122.30399996042252, - "p90": 153.85599434375763, - "p95": 167.23200678825378, - "p99": 196.03200256824493 - }, - "isolatedSum": { - "p50": 141.15199446678162, - "p90": 177.7919977903366, - "p95": 189.12000209093094, - "p99": 223.7439975142479 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.02399724721909, - "p90": 95.77599912881851, - "p95": 103.74400019645691, - "p99": 121.72800302505493 - }, - "combine": { - "p50": 67.80800223350525, - "p90": 80.73599636554718, - "p95": 87.39200234413147, - "p99": 99.45599734783173 - }, - "roundtrip": { - "p50": 121.34400010108948, - "p90": 149.1840034723282, - "p95": 156.76799416542053, - "p99": 182.36799538135529 - }, - "isolatedSum": { - "p50": 140.83199948072433, - "p90": 176.5119954943657, - "p95": 191.13600254058838, - "p99": 221.18400037288666 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1189888, - "combineLogicalBytes": 1189888, - "fanoutMean": 5.1875, - "recvTokensMax": 12, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.40800017118454, - "p90": 92.54399687051773, - "p95": 101.15200281143188, - "p99": 184.28799510002136 - }, - "combine": { - "p50": 68.28799843788147, - "p90": 82.40000158548355, - "p95": 88.03199976682663, - "p99": 100.44799745082855 - }, - "roundtrip": { - "p50": 124.38400089740753, - "p90": 158.59200060367584, - "p95": 172.2240000963211, - "p99": 259.42400097846985 - }, - "isolatedSum": { - "p50": 141.695998609066, - "p90": 174.94399845600128, - "p95": 189.18400257825851, - "p99": 284.7359925508499 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2408448, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 23, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.5600014925003, - "p90": 100.12800246477127, - "p95": 106.78400099277496, - "p99": 138.11199367046356 - }, - "combine": { - "p50": 69.08799707889557, - "p90": 81.28000050783157, - "p95": 86.81599795818329, - "p99": 96.67199850082397 - }, - "roundtrip": { - "p50": 123.23199957609177, - "p90": 151.58399939537048, - "p95": 159.87199544906616, - "p99": 174.6560037136078 - }, - "isolatedSum": { - "p50": 143.64799857139587, - "p90": 181.40800297260284, - "p95": 193.59999895095825, - "p99": 234.78399217128754 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 74.65600222349167, - "p90": 99.32799637317657, - "p95": 105.56799918413162, - "p99": 127.20000743865967 - }, - "combine": { - "p50": 69.88800317049026, - "p90": 83.10399949550629, - "p95": 88.639996945858, - "p99": 99.35999661684036 - }, - "roundtrip": { - "p50": 124.9919980764389, - "p90": 151.48800611495972, - "p95": 159.5200002193451, - "p99": 197.88800179958344 - }, - "isolatedSum": { - "p50": 144.54400539398193, - "p90": 182.43199586868286, - "p95": 194.20799612998962, - "p99": 226.56000405550003 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9605120, - "combineLogicalBytes": 9605120, - "fanoutMean": 5.234375, - "recvTokensMax": 93, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 80.35200089216232, - "p90": 101.02400183677673, - "p95": 105.6319996714592, - "p99": 116.7680025100708 - }, - "combine": { - "p50": 76.80000364780426, - "p90": 88.86399865150452, - "p95": 94.17600184679031, - "p99": 101.56799852848053 - }, - "roundtrip": { - "p50": 135.04000008106232, - "p90": 155.29599785804749, - "p95": 165.50399363040924, - "p99": 190.43199717998505 - }, - "isolatedSum": { - "p50": 157.15200453996658, - "p90": 189.88800048828125, - "p95": 199.8080015182495, - "p99": 218.33600103855133 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19367936, - "combineLogicalBytes": 19367936, - "fanoutMean": 5.27734375, - "recvTokensMax": 182, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 97.120001912117, - "p90": 111.00800335407257, - "p95": 117.11999773979187, - "p99": 134.39999520778656 - }, - "combine": { - "p50": 87.39200234413147, - "p90": 99.32799637317657, - "p95": 105.6319996714592, - "p99": 121.18399888277054 - }, - "roundtrip": { - "p50": 159.2320054769516, - "p90": 177.2480010986328, - "p95": 184.28799510002136, - "p99": 207.71199464797974 - }, - "isolatedSum": { - "p50": 184.51200425624847, - "p90": 210.33599972724915, - "p95": 222.75199741125107, - "p99": 255.5839940905571 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38535168, - "combineLogicalBytes": 38535168, - "fanoutMean": 5.25, - "recvTokensMax": 358, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.23199826478958, - "p90": 132.4159950017929, - "p95": 140.47999680042267, - "p99": 171.64799571037292 - }, - "combine": { - "p50": 102.84800082445145, - "p90": 114.07999694347382, - "p95": 119.1679984331131, - "p99": 129.60000336170197 - }, - "roundtrip": { - "p50": 195.90400159358978, - "p90": 210.11200547218323, - "p95": 217.15199947357178, - "p99": 243.74400079250336 - }, - "isolatedSum": { - "p50": 218.07999908924103, - "p90": 246.49599194526672, - "p95": 259.64799523353577, - "p99": 301.2479990720749 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76869632, - "combineLogicalBytes": 76869632, - "fanoutMean": 5.236328125, - "recvTokensMax": 688, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-49529f9d", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", - "colorKey": "h200_f2b19f62", - "comparisonKey": "cc27e02aea0a210a", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:04.313162+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:120a8dc1dba92ca9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272072315", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272072315", - "createdAt": "2026-06-27T00:04:04.313162+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.95999675989151, - "p90": 100.73599964380264, - "p95": 110.88000237941742, - "p99": 152.99199521541595 - }, - "combine": { - "p50": 65.2799978852272, - "p90": 80.9599980711937, - "p95": 85.28000116348267, - "p99": 102.1760031580925 - }, - "roundtrip": { - "p50": 121.08799815177917, - "p90": 155.20000457763672, - "p95": 166.27199947834015, - "p99": 225.11999309062958 - }, - "isolatedSum": { - "p50": 138.2399946451187, - "p90": 181.69599771499634, - "p95": 196.16000354290009, - "p99": 255.16799837350845 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.2479989528656, - "p90": 105.47199845314026, - "p95": 114.84800279140472, - "p99": 135.74400544166565 - }, - "combine": { - "p50": 67.61600077152252, - "p90": 79.83999699354172, - "p95": 83.5840031504631, - "p99": 92.99200028181076 - }, - "roundtrip": { - "p50": 119.64800208806992, - "p90": 145.56799829006195, - "p95": 150.91200172901154, - "p99": 165.18400609493256 - }, - "isolatedSum": { - "p50": 140.86399972438812, - "p90": 185.31199544668198, - "p95": 198.43200594186783, - "p99": 228.7360057234764 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.79200309515, - "p90": 102.88000106811523, - "p95": 112.0000034570694, - "p99": 131.8719983100891 - }, - "combine": { - "p50": 67.80800223350525, - "p90": 78.8159966468811, - "p95": 83.29600095748901, - "p99": 102.08000242710114 - }, - "roundtrip": { - "p50": 120.60800194740295, - "p90": 144.44799721240997, - "p95": 152.67199277877808, - "p99": 166.59200191497803 - }, - "isolatedSum": { - "p50": 141.60000532865524, - "p90": 181.69599771499634, - "p95": 195.2960044145584, - "p99": 233.95200073719025 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.79200309515, - "p90": 97.75999933481216, - "p95": 105.92000186443329, - "p99": 117.69600212574005 - }, - "combine": { - "p50": 68.06399673223495, - "p90": 81.56800270080566, - "p95": 87.39200234413147, - "p99": 104.44799810647964 - }, - "roundtrip": { - "p50": 121.31199985742569, - "p90": 153.98399531841278, - "p95": 162.78399527072906, - "p99": 199.5519995689392 - }, - "isolatedSum": { - "p50": 141.85599982738495, - "p90": 179.32800203561783, - "p95": 193.31200420856476, - "p99": 222.1440002322197 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.44000041484833, - "p90": 97.69599884748459, - "p95": 103.74400019645691, - "p99": 117.15199798345566 - }, - "combine": { - "p50": 69.98399645090103, - "p90": 83.16799998283386, - "p95": 88.51200342178345, - "p99": 98.59199821949005 - }, - "roundtrip": { - "p50": 125.91999769210815, - "p90": 152.0320028066635, - "p95": 167.7439957857132, - "p99": 200.54399967193604 - }, - "isolatedSum": { - "p50": 143.42399686574936, - "p90": 180.86399883031845, - "p95": 192.25600361824036, - "p99": 215.7439962029457 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 85.4400023818016, - "p90": 105.05600273609161, - "p95": 111.93600296974182, - "p99": 135.48800349235535 - }, - "combine": { - "p50": 76.12799853086472, - "p90": 88.60799670219421, - "p95": 92.41600334644318, - "p99": 124.06399846076965 - }, - "roundtrip": { - "p50": 136.4479959011078, - "p90": 159.04000401496887, - "p95": 166.81599617004395, - "p99": 204.12799715995789 - }, - "isolatedSum": { - "p50": 161.56800091266632, - "p90": 193.66399943828583, - "p95": 204.352006316185, - "p99": 259.552001953125 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 99.55199807882309, - "p90": 115.84000289440155, - "p95": 124.79999661445618, - "p99": 159.5200002193451 - }, - "combine": { - "p50": 86.65599673986435, - "p90": 98.68799895048141, - "p95": 104.032002389431, - "p99": 120.28799951076508 - }, - "roundtrip": { - "p50": 162.23999857902527, - "p90": 177.7919977903366, - "p95": 186.62400543689728, - "p99": 207.58399367332458 - }, - "isolatedSum": { - "p50": 186.20799481868744, - "p90": 214.52800184488297, - "p95": 228.83199900388718, - "p99": 279.80799973011017 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 123.16799908876419, - "p90": 138.08000087738037, - "p95": 149.59999918937683, - "p99": 160.35200655460358 - }, - "combine": { - "p50": 112.47999966144562, - "p90": 122.36800044775009, - "p95": 127.45599448680878, - "p99": 136.9280070066452 - }, - "roundtrip": { - "p50": 213.4079933166504, - "p90": 239.16800320148468, - "p95": 253.6959946155548, - "p99": 450.3040015697479 - }, - "isolatedSum": { - "p50": 235.6479987502098, - "p90": 260.44800132513046, - "p95": 277.0559936761856, - "p99": 297.2800135612488 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-904f847b", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h200_bac4102c", - "comparisonKey": "6234055b9069f2f2", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:21.213602+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:120a8dc1dba92ca9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272075655", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272075655", - "createdAt": "2026-06-27T00:04:21.213602+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 71.99999690055847, - "p90": 99.35999661684036, - "p95": 108.47999900579453, - "p99": 130.8480054140091 - }, - "combine": { - "p50": 67.1359971165657, - "p90": 80.64000308513641, - "p95": 84.44800227880478, - "p99": 108.12799632549286 - }, - "roundtrip": { - "p50": 121.08799815177917, - "p90": 149.4079977273941, - "p95": 161.24799847602844, - "p99": 199.8080015182495 - }, - "isolatedSum": { - "p50": 139.13599401712418, - "p90": 179.99999970197678, - "p95": 192.9280012845993, - "p99": 238.97600173950195 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.98399710655212, - "p90": 101.79200023412704, - "p95": 111.7120012640953, - "p99": 146.33600413799286 - }, - "combine": { - "p50": 68.7360018491745, - "p90": 82.04799890518188, - "p95": 88.73599767684937, - "p99": 105.21599650382996 - }, - "roundtrip": { - "p50": 124.41600114107132, - "p90": 160.0320041179657, - "p95": 172.86400496959686, - "p99": 196.44799828529358 - }, - "isolatedSum": { - "p50": 142.71999895572662, - "p90": 183.83999913930893, - "p95": 200.44799894094467, - "p99": 251.55200064182281 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 76.22399926185608, - "p90": 108.76800119876862, - "p95": 123.1359988451004, - "p99": 148.8640010356903 - }, - "combine": { - "p50": 68.7360018491745, - "p90": 82.14399963617325, - "p95": 88.54400366544724, - "p99": 105.02400249242783 - }, - "roundtrip": { - "p50": 124.25599992275238, - "p90": 160.0320041179657, - "p95": 170.01600563526154, - "p99": 244.89599466323853 - }, - "isolatedSum": { - "p50": 144.96000111103058, - "p90": 190.91200083494186, - "p95": 211.68000251054764, - "p99": 253.88800352811813 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.62400197982788, - "p90": 101.21600329875946, - "p95": 111.10399663448334, - "p99": 145.47200500965118 - }, - "combine": { - "p50": 69.34399902820587, - "p90": 84.70399677753448, - "p95": 89.50400352478027, - "p99": 104.44799810647964 - }, - "roundtrip": { - "p50": 125.37600100040436, - "p90": 159.4880074262619, - "p95": 170.1119989156723, - "p99": 203.23200523853302 - }, - "isolatedSum": { - "p50": 143.96800100803375, - "p90": 185.92000007629395, - "p95": 200.6080001592636, - "p99": 249.92000311613083 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 72.73600250482559, - "p90": 97.75999933481216, - "p95": 108.03200304508209, - "p99": 141.9840008020401 - }, - "combine": { - "p50": 70.36799937486649, - "p90": 88.28800171613693, - "p95": 94.68799829483032, - "p99": 104.54399883747101 - }, - "roundtrip": { - "p50": 127.00800597667694, - "p90": 156.12800419330597, - "p95": 166.9439971446991, - "p99": 198.33600521087646 - }, - "isolatedSum": { - "p50": 143.10400187969208, - "p90": 186.0480010509491, - "p95": 202.72000133991241, - "p99": 246.5279996395111 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.98399841785431, - "p90": 106.65600001811981, - "p95": 116.22399836778641, - "p99": 165.69599509239197 - }, - "combine": { - "p50": 76.9599974155426, - "p90": 90.87999910116196, - "p95": 97.120001912117, - "p99": 118.23999881744385 - }, - "roundtrip": { - "p50": 135.74400544166565, - "p90": 164.48000073432922, - "p95": 176.70400440692902, - "p99": 220.22399306297302 - }, - "isolatedSum": { - "p50": 158.9439958333969, - "p90": 197.53599911928177, - "p95": 213.3440002799034, - "p99": 283.9359939098358 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 97.21600264310837, - "p90": 125.50400197505951, - "p95": 140.99200069904327, - "p99": 185.85599958896637 - }, - "combine": { - "p50": 87.77599781751633, - "p90": 105.53599894046783, - "p95": 113.0559965968132, - "p99": 125.63200294971466 - }, - "roundtrip": { - "p50": 159.7760021686554, - "p90": 186.65599822998047, - "p95": 201.53599977493286, - "p99": 221.69600427150726 - }, - "isolatedSum": { - "p50": 184.9920004606247, - "p90": 231.04000091552734, - "p95": 254.04799729585648, - "p99": 311.48800253868103 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.40000003576279, - "p90": 141.7279988527298, - "p95": 151.296004652977, - "p99": 174.84800517559052 - }, - "combine": { - "p50": 103.74400019645691, - "p90": 121.21599912643433, - "p95": 128.60800325870514, - "p99": 147.13600277900696 - }, - "roundtrip": { - "p50": 198.08000326156616, - "p90": 219.7760045528412, - "p95": 227.55199670791626, - "p99": 265.3760015964508 - }, - "isolatedSum": { - "p50": 222.1440002322197, - "p90": 262.9439979791641, - "p95": 279.90400791168213, - "p99": 321.9840079545975 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-06bd64b9", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", - "colorKey": "h200_1eda221e", - "comparisonKey": "00e2c45e1159b581", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:16.896756+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272045914", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272045914", - "createdAt": "2026-06-27T00:03:16.896756+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.15999811887741, - "p90": 99.0080013871193, - "p95": 105.56799918413162, - "p99": 131.80799782276154 - }, - "combine": { - "p50": 68.70400160551071, - "p90": 83.23200047016144, - "p95": 88.8959988951683, - "p99": 117.40799993276596 - }, - "roundtrip": { - "p50": 121.60000205039978, - "p90": 151.8079936504364, - "p95": 162.88000345230103, - "p99": 197.63199985027313 - }, - "isolatedSum": { - "p50": 140.86399972438812, - "p90": 182.24000185728073, - "p95": 194.46399807929993, - "p99": 249.2159977555275 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.7600028514862, - "p90": 99.96800124645233, - "p95": 106.97600245475769, - "p99": 125.63200294971466 - }, - "combine": { - "p50": 67.58400052785873, - "p90": 79.52000200748444, - "p95": 84.35200154781342, - "p99": 95.61599791049957 - }, - "roundtrip": { - "p50": 121.95199728012085, - "p90": 150.52799880504608, - "p95": 158.9760035276413, - "p99": 188.51199746131897 - }, - "isolatedSum": { - "p50": 141.34400337934494, - "p90": 179.48800325393677, - "p95": 191.3280040025711, - "p99": 221.24800086021423 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.23999905586243, - "p90": 98.36799651384354, - "p95": 105.8880016207695, - "p99": 117.60000139474869 - }, - "combine": { - "p50": 68.57600063085556, - "p90": 81.82399719953537, - "p95": 86.496002972126, - "p99": 94.62399780750275 - }, - "roundtrip": { - "p50": 123.19999933242798, - "p90": 152.92799472808838, - "p95": 164.12800550460815, - "p99": 221.98399901390076 - }, - "isolatedSum": { - "p50": 142.815999686718, - "p90": 180.1919937133789, - "p95": 192.3840045928955, - "p99": 212.22399920225143 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 76.06399804353714, - "p90": 117.8240031003952, - "p95": 132.03200697898865, - "p99": 183.45600366592407 - }, - "combine": { - "p50": 69.37599927186966, - "p90": 85.02399921417236, - "p95": 89.66399729251862, - "p99": 100.3199964761734 - }, - "roundtrip": { - "p50": 123.16799908876419, - "p90": 152.8639942407608, - "p95": 160.96000373363495, - "p99": 184.1920018196106 - }, - "isolatedSum": { - "p50": 145.4399973154068, - "p90": 202.84800231456757, - "p95": 221.69600427150726, - "p99": 283.7760001420975 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 76.67200267314911, - "p90": 107.10400342941284, - "p95": 114.20799791812897, - "p99": 128.9599984884262 - }, - "combine": { - "p50": 72.25599884986877, - "p90": 88.76799792051315, - "p95": 96.00000083446503, - "p99": 114.75200206041336 - }, - "roundtrip": { - "p50": 128.31999361515045, - "p90": 158.6879938840866, - "p95": 168.89600455760956, - "p99": 192.89599359035492 - }, - "isolatedSum": { - "p50": 148.92800152301788, - "p90": 195.872001349926, - "p95": 210.207998752594, - "p99": 243.71200054883957 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.39200168848038, - "p90": 103.80800068378448, - "p95": 109.43999886512756, - "p99": 126.71999633312225 - }, - "combine": { - "p50": 77.18399912118912, - "p90": 89.79199826717377, - "p95": 95.10400146245956, - "p99": 105.98400235176086 - }, - "roundtrip": { - "p50": 134.783998131752, - "p90": 157.79200196266174, - "p95": 167.13599860668182, - "p99": 210.94399690628052 - }, - "isolatedSum": { - "p50": 160.5760008096695, - "p90": 193.59999895095825, - "p95": 204.54400032758713, - "p99": 232.70399868488312 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 93.56799721717834, - "p90": 113.63200098276138, - "p95": 120.2239990234375, - "p99": 133.4719955921173 - }, - "combine": { - "p50": 86.40000224113464, - "p90": 101.72799974679947, - "p95": 105.6319996714592, - "p99": 116.48000031709671 - }, - "roundtrip": { - "p50": 157.9200029373169, - "p90": 181.34400248527527, - "p95": 187.42400407791138, - "p99": 211.87199652194977 - }, - "isolatedSum": { - "p50": 179.967999458313, - "p90": 215.36000072956085, - "p95": 225.8559986948967, - "p99": 249.95199590921402 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.15199798345566, - "p90": 137.1839940547943, - "p95": 142.33599603176117, - "p99": 165.79200327396393 - }, - "combine": { - "p50": 106.84800148010254, - "p90": 119.32799965143204, - "p95": 122.81599640846252, - "p99": 133.53599607944489 - }, - "roundtrip": { - "p50": 197.56799936294556, - "p90": 213.85599672794342, - "p95": 221.3120013475418, - "p99": 245.37600576877594 - }, - "isolatedSum": { - "p50": 223.9999994635582, - "p90": 256.51199370622635, - "p95": 265.1519924402237, - "p99": 299.3279993534088 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0d6ef23b", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_c851a534", - "comparisonKey": "6b4f4d7f65293019", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:45.312905+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254392935", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", - "createdAt": "2026-06-26T17:29:45.312905+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 74.11199808120728, - "p90": 94.11200135946274, - "p95": 104.35199737548828, - "p99": 138.0160003900528 - }, - "combine": { - "p50": 68.41599941253662, - "p90": 78.72000336647034, - "p95": 83.48800241947174, - "p99": 105.72800040245056 - }, - "roundtrip": { - "p50": 124.4800016283989, - "p90": 144.31999623775482, - "p95": 156.3200056552887, - "p99": 193.53599846363068 - }, - "isolatedSum": { - "p50": 142.5279974937439, - "p90": 172.83200472593307, - "p95": 187.83999979496002, - "p99": 243.74400079250336 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.33599978685379, - "p90": 99.42399710416794, - "p95": 109.66400057077408, - "p99": 131.71200454235077 - }, - "combine": { - "p50": 69.85600292682648, - "p90": 83.00799876451492, - "p95": 90.40000289678574, - "p99": 114.33599889278412 - }, - "roundtrip": { - "p50": 122.43200093507767, - "p90": 144.6080058813095, - "p95": 154.62400019168854, - "p99": 173.69599640369415 - }, - "isolatedSum": { - "p50": 144.19200271368027, - "p90": 182.43199586868286, - "p95": 200.06400346755981, - "p99": 246.0480034351349 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 74.97599720954895, - "p90": 95.29600292444229, - "p95": 104.12800312042236, - "p99": 139.74399864673615 - }, - "combine": { - "p50": 69.40799951553345, - "p90": 81.63200318813324, - "p95": 88.22400122880936, - "p99": 119.4240003824234 - }, - "roundtrip": { - "p50": 123.74400347471237, - "p90": 150.36800503730774, - "p95": 160.3199988603592, - "p99": 204.8960030078888 - }, - "isolatedSum": { - "p50": 144.3839967250824, - "p90": 176.92800611257553, - "p95": 192.35200434923172, - "p99": 259.16799902915955 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.78400319814682, - "p90": 92.25600212812424, - "p95": 102.91200131177902, - "p99": 123.16799908876419 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 81.95199817419052, - "p95": 87.48800307512283, - "p99": 100.51199793815613 - }, - "roundtrip": { - "p50": 124.03199821710587, - "p90": 147.20000326633453, - "p95": 153.9199948310852, - "p99": 180.00000715255737 - }, - "isolatedSum": { - "p50": 145.31200379133224, - "p90": 174.20800030231476, - "p95": 190.40000438690186, - "p99": 223.67999702692032 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.18399846553802, - "p90": 92.83199906349182, - "p95": 103.61599922180176, - "p99": 195.93599438667297 - }, - "combine": { - "p50": 71.32799923419952, - "p90": 86.33600175380707, - "p95": 92.03200042247772, - "p99": 120.80000340938568 - }, - "roundtrip": { - "p50": 129.72800433635712, - "p90": 161.31199896335602, - "p95": 172.86400496959686, - "p99": 215.10399878025055 - }, - "isolatedSum": { - "p50": 144.51199769973755, - "p90": 179.1680008172989, - "p95": 195.64799964427948, - "p99": 316.73599779605865 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 82.2720006108284, - "p90": 100.80000013113022, - "p95": 108.92800241708755, - "p99": 134.88000631332397 - }, - "combine": { - "p50": 76.03199779987335, - "p90": 89.40800279378891, - "p95": 94.97600048780441, - "p99": 117.95199662446976 - }, - "roundtrip": { - "p50": 130.8480054140091, - "p90": 154.33600544929504, - "p95": 164.73600268363953, - "p99": 204.0639966726303 - }, - "isolatedSum": { - "p50": 158.30399841070175, - "p90": 190.20800292491913, - "p95": 203.90400290489197, - "p99": 252.83200293779373 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 91.32800251245499, - "p90": 110.04800349473953, - "p95": 116.86400324106216, - "p99": 146.84799313545227 - }, - "combine": { - "p50": 87.2960016131401, - "p90": 98.36799651384354, - "p95": 104.70400005578995, - "p99": 124.92799758911133 - }, - "roundtrip": { - "p50": 156.031996011734, - "p90": 173.24799299240112, - "p95": 180.38399517536163, - "p99": 215.39199352264404 - }, - "isolatedSum": { - "p50": 178.6240041255951, - "p90": 208.41600000858307, - "p95": 221.5680032968521, - "p99": 271.7759907245636 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.03199690580368, - "p90": 129.7599971294403, - "p95": 136.57599687576294, - "p99": 149.24800395965576 - }, - "combine": { - "p50": 103.42399775981903, - "p90": 116.54400080442429, - "p95": 123.3920007944107, - "p99": 141.95199310779572 - }, - "roundtrip": { - "p50": 192.54399836063385, - "p90": 208.8959962129593, - "p95": 215.64799547195435, - "p99": 228.7359982728958 - }, - "isolatedSum": { - "p50": 219.4559946656227, - "p90": 246.3039979338646, - "p95": 259.96799767017365, - "p99": 291.1999970674515 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-0f126172", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500", - "colorKey": "h200_a1e795ec", - "comparisonKey": "467cf4a4daff1cff", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:47.472039+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ffa946582edb500", - "workloadId": "set:8:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254443915", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", - "createdAt": "2026-06-26T17:30:47.472039+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.95199686288834, - "p90": 88.0960002541542, - "p95": 97.24800288677216, - "p99": 108.25599730014801 - }, - "combine": { - "p50": 70.91200351715088, - "p90": 81.60000294446945, - "p95": 87.26400136947632, - "p99": 97.28000313043594 - }, - "roundtrip": { - "p50": 125.2480000257492, - "p90": 149.63200688362122, - "p95": 157.85600244998932, - "p99": 175.04000663757324 - }, - "isolatedSum": { - "p50": 144.86400038003922, - "p90": 169.69600319862366, - "p95": 184.51200425624847, - "p99": 205.53600043058395 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 74.23999905586243, - "p90": 91.00800007581711, - "p95": 98.88000041246414, - "p99": 130.23999333381653 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 79.71200346946716, - "p95": 85.50400286912918, - "p99": 106.46399855613708 - }, - "roundtrip": { - "p50": 123.6800029873848, - "p90": 142.07999408245087, - "p95": 152.99199521541595, - "p99": 184.35199558734894 - }, - "isolatedSum": { - "p50": 144.76799964904785, - "p90": 170.72000354528427, - "p95": 184.38400328159332, - "p99": 236.7039918899536 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 75.03999769687653, - "p90": 97.9200005531311, - "p95": 108.47999900579453, - "p99": 140.09599387645721 - }, - "combine": { - "p50": 70.11199742555618, - "p90": 81.34400099515915, - "p95": 86.496002972126, - "p99": 99.29600358009338 - }, - "roundtrip": { - "p50": 125.69600343704224, - "p90": 151.36000514030457, - "p95": 159.55199301242828, - "p99": 178.3359944820404 - }, - "isolatedSum": { - "p50": 145.1519951224327, - "p90": 179.26400154829025, - "p95": 194.97600197792053, - "p99": 239.3919974565506 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 73.56800138950348, - "p90": 94.17600184679031, - "p95": 102.62399911880493, - "p99": 126.14400684833527 - }, - "combine": { - "p50": 70.72000205516815, - "p90": 82.04799890518188, - "p95": 86.43200248479843, - "p99": 96.47999703884125 - }, - "roundtrip": { - "p50": 125.69600343704224, - "p90": 148.0640023946762, - "p95": 156.76799416542053, - "p99": 182.72000551223755 - }, - "isolatedSum": { - "p50": 144.28800344467163, - "p90": 176.2240007519722, - "p95": 189.05600160360336, - "p99": 222.6240038871765 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 74.46400076150894, - "p90": 90.71999788284302, - "p95": 96.44799679517746, - "p99": 112.19199746847153 - }, - "combine": { - "p50": 76.03199779987335, - "p90": 84.70399677753448, - "p95": 91.16800129413605, - "p99": 104.54399883747101 - }, - "roundtrip": { - "p50": 129.60000336170197, - "p90": 153.6960005760193, - "p95": 161.3440066576004, - "p99": 196.28800451755524 - }, - "isolatedSum": { - "p50": 150.4959985613823, - "p90": 175.4239946603775, - "p95": 187.6159980893135, - "p99": 216.73599630594254 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.20000022649765, - "p90": 100.12800246477127, - "p95": 107.45599865913391, - "p99": 122.3360002040863 - }, - "combine": { - "p50": 80.79999685287476, - "p90": 89.88799899816513, - "p95": 95.36000341176987, - "p99": 100.54399818181992 - }, - "roundtrip": { - "p50": 142.17600226402283, - "p90": 155.45600652694702, - "p95": 165.3439998626709, - "p99": 182.0800006389618 - }, - "isolatedSum": { - "p50": 163.9999970793724, - "p90": 190.0160014629364, - "p95": 202.81600207090378, - "p99": 222.87999838590622 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 8, - "recvTokensMax": 256, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 107.61599987745285, - "p90": 121.0239976644516, - "p95": 127.07200646400452, - "p99": 148.73600006103516 - }, - "combine": { - "p50": 95.87199985980988, - "p90": 105.3759977221489, - "p95": 112.60800063610077, - "p99": 123.29600006341934 - }, - "roundtrip": { - "p50": 176.67199671268463, - "p90": 191.80800020694733, - "p95": 203.5840004682541, - "p99": 225.98400712013245 - }, - "isolatedSum": { - "p50": 203.48799973726273, - "p90": 226.3999953866005, - "p95": 239.68000710010529, - "p99": 272.0320001244545 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 8, - "recvTokensMax": 512, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 133.66399705410004, - "p90": 146.04799449443817, - "p95": 152.41600573062897, - "p99": 162.56000101566315 - }, - "combine": { - "p50": 118.52800101041794, - "p90": 127.68000364303589, - "p95": 130.91200590133667, - "p99": 144.67200636863708 - }, - "roundtrip": { - "p50": 225.92000663280487, - "p90": 240.48000574111938, - "p95": 251.3279914855957, - "p99": 700.223982334137 - }, - "isolatedSum": { - "p50": 252.19199806451797, - "p90": 273.72799813747406, - "p95": 283.32801163196564, - "p99": 307.23200738430023 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8e3ecfeb", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c", - "colorKey": "h200_0a93a01f", - "comparisonKey": "c7e35a057338b2fa", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:04.173894+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "14ded8461f2636c", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254452252", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", - "createdAt": "2026-06-26T17:31:04.173894+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 74.27199929952621, - "p90": 108.83200168609619, - "p95": 118.49600076675415, - "p99": 155.5200070142746 - }, - "combine": { - "p50": 68.38399916887283, - "p90": 84.03199911117554, - "p95": 90.20800143480301, - "p99": 114.88000303506851 - }, - "roundtrip": { - "p50": 123.07199835777283, - "p90": 153.08800339698792, - "p95": 165.8560037612915, - "p99": 205.9199959039688 - }, - "isolatedSum": { - "p50": 142.65599846839905, - "p90": 192.86400079727173, - "p95": 208.70400220155716, - "p99": 270.4000100493431 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.95199686288834, - "p90": 97.82399982213974, - "p95": 106.6880002617836, - "p99": 132.9919993877411 - }, - "combine": { - "p50": 68.64000111818314, - "p90": 80.51200211048126, - "p95": 85.37600189447403, - "p99": 98.49599748849869 - }, - "roundtrip": { - "p50": 123.36000055074692, - "p90": 150.176003575325, - "p95": 158.4639996290207, - "p99": 181.63199722766876 - }, - "isolatedSum": { - "p50": 142.59199798107147, - "p90": 178.336001932621, - "p95": 192.06400215625763, - "p99": 231.48799687623978 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 73.5040009021759, - "p90": 95.551997423172, - "p95": 104.86400127410889, - "p99": 123.4240010380745 - }, - "combine": { - "p50": 67.80800223350525, - "p90": 78.46400141716003, - "p95": 84.95999872684479, - "p99": 125.2799928188324 - }, - "roundtrip": { - "p50": 122.78400361537933, - "p90": 150.65599977970123, - "p95": 159.07199680805206, - "p99": 200.51200687885284 - }, - "isolatedSum": { - "p50": 141.31200313568115, - "p90": 174.01599884033203, - "p95": 189.82400000095367, - "p99": 248.7039938569069 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 75.23199915885925, - "p90": 103.32799702882767, - "p95": 111.87200248241425, - "p99": 143.26399564743042 - }, - "combine": { - "p50": 69.60000097751617, - "p90": 85.79199761152267, - "p95": 91.71199798583984, - "p99": 124.12799894809723 - }, - "roundtrip": { - "p50": 126.36800110340118, - "p90": 160.12799739837646, - "p95": 167.64800250530243, - "p99": 193.2159960269928 - }, - "isolatedSum": { - "p50": 144.83200013637543, - "p90": 189.11999464035034, - "p95": 203.5840004682541, - "p99": 267.39199459552765 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 77.40800082683563, - "p90": 104.63999956846237, - "p95": 113.43999952077866, - "p99": 144.0960019826889 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 87.23200112581253, - "p95": 90.94399958848953, - "p99": 101.1200025677681 - }, - "roundtrip": { - "p50": 127.6479959487915, - "p90": 161.85599565505981, - "p95": 175.7120043039322, - "p99": 230.27199506759644 - }, - "isolatedSum": { - "p50": 147.93600142002106, - "p90": 191.8720006942749, - "p95": 204.3839991092682, - "p99": 245.216004550457 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 83.0719992518425, - "p90": 109.50399935245514, - "p95": 115.61600118875504, - "p99": 128.1599998474121 - }, - "combine": { - "p50": 77.34400033950806, - "p90": 91.64799749851227, - "p95": 95.61599791049957, - "p99": 112.73600161075592 - }, - "roundtrip": { - "p50": 132.60799646377563, - "p90": 157.0879966020584, - "p95": 165.0560051202774, - "p99": 194.20799612998962 - }, - "isolatedSum": { - "p50": 160.41599959135056, - "p90": 201.1519968509674, - "p95": 211.2319990992546, - "p99": 240.89600145816803 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12859392, - "combineLogicalBytes": 12859392, - "fanoutMean": 3.50390625, - "recvTokensMax": 255, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 97.15200215578079, - "p90": 111.35999858379364, - "p95": 121.31199985742569, - "p99": 134.8479986190796 - }, - "combine": { - "p50": 87.5839963555336, - "p90": 99.80800002813339, - "p95": 104.06400263309479, - "p99": 116.95999652147293 - }, - "roundtrip": { - "p50": 161.9199961423874, - "p90": 177.72799730300903, - "p95": 184.67199802398682, - "p99": 235.61599850654602 - }, - "isolatedSum": { - "p50": 184.7359985113144, - "p90": 211.16799861192703, - "p95": 225.37600249052048, - "p99": 251.80799514055252 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 25145344, - "combineLogicalBytes": 25145344, - "fanoutMean": 3.42578125, - "recvTokensMax": 510, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.97599762678146, - "p90": 147.10399508476257, - "p95": 156.25600516796112, - "p99": 183.07200074195862 - }, - "combine": { - "p50": 110.49599945545197, - "p90": 123.87199699878693, - "p95": 129.40800189971924, - "p99": 150.751993060112 - }, - "roundtrip": { - "p50": 208.73600244522095, - "p90": 225.43999552726746, - "p95": 233.024001121521, - "p99": 256.415992975235 - }, - "isolatedSum": { - "p50": 233.47199708223343, - "p90": 270.9759920835495, - "p95": 285.66400706768036, - "p99": 333.8239938020706 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9efea369", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836", - "colorKey": "h200_993777bf", - "comparisonKey": "cdec001c60a84b85", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:46:59.245966+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "a8f501af7004836", - "workloadId": "set:8:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.927734375, - "eplbImbalanceAfter": 1.0006103515625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28255303840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", - "createdAt": "2026-06-26T17:46:59.245966+00:00", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 72.89600372314453, - "p90": 99.45599734783173, - "p95": 108.73600095510483, - "p99": 128.86400520801544 - }, - "combine": { - "p50": 67.19999760389328, - "p90": 78.3040001988411, - "p95": 82.46400207281113, - "p99": 102.65599936246872 - }, - "roundtrip": { - "p50": 119.32799965143204, - "p90": 147.77599275112152, - "p95": 155.07200360298157, - "p99": 171.03999853134155 - }, - "isolatedSum": { - "p50": 140.0960013270378, - "p90": 177.75999754667282, - "p95": 191.20000302791595, - "p99": 231.52000457048416 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 5.375, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 73.18399846553802, - "p90": 94.27200257778168, - "p95": 104.5759990811348, - "p99": 122.68800288438797 - }, - "combine": { - "p50": 68.09599697589874, - "p90": 81.15199953317642, - "p95": 86.17600053548813, - "p99": 113.3119985461235 - }, - "roundtrip": { - "p50": 120.31999975442886, - "p90": 147.45600521564484, - "p95": 157.82399475574493, - "p99": 190.08000195026398 - }, - "isolatedSum": { - "p50": 141.27999544143677, - "p90": 175.4240021109581, - "p95": 190.75199961662292, - "p99": 236.00000143051147 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 1204224, - "fanoutMean": 5.25, - "recvTokensMax": 14, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 78.62400263547897, - "p90": 130.5920034646988, - "p95": 144.54400539398193, - "p99": 178.847998380661 - }, - "combine": { - "p50": 69.08799707889557, - "p90": 80.51200211048126, - "p95": 87.87199854850769, - "p99": 104.19200360774994 - }, - "roundtrip": { - "p50": 124.70400333404541, - "p90": 154.14400398731232, - "p95": 165.15199840068817, - "p99": 194.68800723552704 - }, - "isolatedSum": { - "p50": 147.71199971437454, - "p90": 211.10400557518005, - "p95": 232.41600394248962, - "p99": 283.04000198841095 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 24, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 74.46400076150894, - "p90": 99.39199686050415, - "p95": 109.76000130176544, - "p99": 140.6400054693222 - }, - "combine": { - "p50": 68.76800209283829, - "p90": 83.64800363779068, - "p95": 90.14400094747543, - "p99": 115.35999923944473 - }, - "roundtrip": { - "p50": 124.54400211572647, - "p90": 155.7759940624237, - "p95": 170.56000232696533, - "p99": 186.91200017929077 - }, - "isolatedSum": { - "p50": 143.23200285434723, - "p90": 183.04000049829483, - "p95": 199.90400224924088, - "p99": 256.00000470876694 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4630528, - "combineLogicalBytes": 4630528, - "fanoutMean": 5.046875, - "recvTokensMax": 45, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 76.25599950551987, - "p90": 106.9440022110939, - "p95": 120.7360029220581, - "p99": 149.24800395965576 - }, - "combine": { - "p50": 70.52800059318542, - "p90": 85.24800091981888, - "p95": 90.04800021648407, - "p99": 104.5759990811348 - }, - "roundtrip": { - "p50": 129.98400628566742, - "p90": 161.05599701404572, - "p95": 173.8560050725937, - "p99": 205.21600544452667 - }, - "isolatedSum": { - "p50": 146.7840000987053, - "p90": 192.19200313091278, - "p95": 210.78400313854218, - "p99": 253.82400304079056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9447424, - "combineLogicalBytes": 9447424, - "fanoutMean": 5.1484375, - "recvTokensMax": 91, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 81.91999793052673, - "p90": 99.07200187444687, - "p95": 107.04000294208527, - "p99": 128.57599556446075 - }, - "combine": { - "p50": 76.03199779987335, - "p90": 89.63199704885483, - "p95": 96.54399752616882, - "p99": 106.08000308275223 - }, - "roundtrip": { - "p50": 129.08799946308136, - "p90": 156.76799416542053, - "p95": 167.29600727558136, - "p99": 217.3440009355545 - }, - "isolatedSum": { - "p50": 157.95199573040009, - "p90": 188.7039989233017, - "p95": 203.5840004682541, - "p99": 234.65599864721298 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19023872, - "combineLogicalBytes": 19023872, - "fanoutMean": 5.18359375, - "recvTokensMax": 178, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 101.79200023412704, - "p90": 136.6720050573349, - "p95": 146.36799693107605, - "p99": 175.10400712490082 - }, - "combine": { - "p50": 93.44000369310379, - "p90": 112.76800185441971, - "p95": 117.15199798345566, - "p99": 131.71200454235077 - }, - "roundtrip": { - "p50": 165.43999314308167, - "p90": 204.44799959659576, - "p95": 212.38400042057037, - "p99": 240.03200232982635 - }, - "isolatedSum": { - "p50": 195.23200392723083, - "p90": 249.4400069117546, - "p95": 263.5199949145317, - "p99": 306.8160116672516 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38148096, - "combineLogicalBytes": 38148096, - "fanoutMean": 5.197265625, - "recvTokensMax": 350, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.68000167608261, - "p90": 135.29600203037262, - "p95": 142.17600226402283, - "p99": 160.64000129699707 - }, - "combine": { - "p50": 104.96000200510025, - "p90": 118.04799735546112, - "p95": 122.68800288438797, - "p99": 147.64800667762756 - }, - "roundtrip": { - "p50": 194.97600197792053, - "p90": 212.64000236988068, - "p95": 220.19200026988983, - "p99": 234.78400707244873 - }, - "isolatedSum": { - "p50": 220.64000368118286, - "p90": 253.34399938583374, - "p95": 264.8640051484108, - "p99": 308.28800797462463 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 76955648, - "combineLogicalBytes": 76955648, - "fanoutMean": 5.2421875, - "recvTokensMax": 687, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-cee2e19b", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_edd92e38", - "comparisonKey": "4a9eb2a61bfd9462", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:08.901856+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254409438", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", - "createdAt": "2026-06-26T17:30:08.901856+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 62.97600269317627, - "p90": 86.40000224113464, - "p95": 94.14400160312653, - "p99": 136.9599997997284 - }, - "combine": { - "p50": 69.21599805355072, - "p90": 82.04799890518188, - "p95": 87.20000088214874, - "p99": 98.49599748849869 - }, - "roundtrip": { - "p50": 109.98400300741196, - "p90": 133.08799266815186, - "p95": 140.8960074186325, - "p99": 178.27199399471283 - }, - "isolatedSum": { - "p50": 132.192000746727, - "p90": 168.44800114631653, - "p95": 181.34400248527527, - "p99": 235.45599728822708 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 64.38399851322174, - "p90": 88.73599767684937, - "p95": 94.87999975681305, - "p99": 119.48800086975098 - }, - "combine": { - "p50": 69.2799985408783, - "p90": 83.52000266313553, - "p95": 88.95999938249588, - "p99": 107.10400342941284 - }, - "roundtrip": { - "p50": 110.20799726247787, - "p90": 138.2720023393631, - "p95": 145.37599682807922, - "p99": 175.55199563503265 - }, - "isolatedSum": { - "p50": 133.66399705410004, - "p90": 172.2560003399849, - "p95": 183.83999913930893, - "p99": 226.59200429916382 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 64.41599875688553, - "p90": 90.52799642086029, - "p95": 101.88800096511841, - "p99": 132.28799402713776 - }, - "combine": { - "p50": 70.62400132417679, - "p90": 85.34400165081024, - "p95": 90.71999788284302, - "p99": 102.27199643850327 - }, - "roundtrip": { - "p50": 113.43999952077866, - "p90": 141.79199934005737, - "p95": 148.22399616241455, - "p99": 183.58400464057922 - }, - "isolatedSum": { - "p50": 135.04000008106232, - "p90": 175.87199807167053, - "p95": 192.60799884796143, - "p99": 234.55999046564102 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 63.1679967045784, - "p90": 82.75199681520462, - "p95": 87.96799927949905, - "p99": 107.744000852108 - }, - "combine": { - "p50": 69.85600292682648, - "p90": 85.1840004324913, - "p95": 90.46400338411331, - "p99": 100.99200159311295 - }, - "roundtrip": { - "p50": 112.44799941778183, - "p90": 139.20000195503235, - "p95": 152.38399803638458, - "p99": 206.7520022392273 - }, - "isolatedSum": { - "p50": 133.02399963140488, - "p90": 167.93599724769592, - "p95": 178.43200266361237, - "p99": 208.73600244522095 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 60.35200133919716, - "p90": 85.02399921417236, - "p95": 91.67999774217606, - "p99": 111.13599687814713 - }, - "combine": { - "p50": 70.3359991312027, - "p90": 86.87999844551086, - "p95": 89.82399851083755, - "p99": 99.35999661684036 - }, - "roundtrip": { - "p50": 116.03199690580368, - "p90": 141.34399592876434, - "p95": 148.3519971370697, - "p99": 184.9920004606247 - }, - "isolatedSum": { - "p50": 130.68800047039986, - "p90": 171.90399765968323, - "p95": 181.5039962530136, - "p99": 210.4959934949875 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 71.74400240182877, - "p90": 90.71999788284302, - "p95": 96.73599898815155, - "p99": 118.23999881744385 - }, - "combine": { - "p50": 77.66400277614594, - "p90": 93.05600076913834, - "p95": 97.69599884748459, - "p99": 108.92800241708755 - }, - "roundtrip": { - "p50": 122.36800044775009, - "p90": 149.05600249767303, - "p95": 159.61599349975586, - "p99": 184.12800133228302 - }, - "isolatedSum": { - "p50": 149.4080051779747, - "p90": 183.77599865198135, - "p95": 194.43199783563614, - "p99": 227.1680012345314 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 82.65600353479385, - "p90": 100.3199964761734, - "p95": 109.15199667215347, - "p99": 139.39200341701508 - }, - "combine": { - "p50": 91.45600348711014, - "p90": 106.52799904346466, - "p95": 114.30399864912033, - "p99": 132.22399353981018 - }, - "roundtrip": { - "p50": 147.42399752140045, - "p90": 165.3439998626709, - "p95": 174.20800030231476, - "p99": 198.65599274635315 - }, - "isolatedSum": { - "p50": 174.112007021904, - "p90": 206.84799551963806, - "p95": 223.4559953212738, - "p99": 271.61599695682526 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 105.12000322341919, - "p90": 118.40000003576279, - "p95": 122.81599640846252, - "p99": 147.32800424098969 - }, - "combine": { - "p50": 104.73600029945374, - "p90": 122.11199849843979, - "p95": 126.75200402736664, - "p99": 138.84800672531128 - }, - "roundtrip": { - "p50": 184.38400328159332, - "p90": 200.41599869728088, - "p95": 207.96799659729004, - "p99": 272.44800329208374 - }, - "isolatedSum": { - "p50": 209.85600352287292, - "p90": 240.51199853420258, - "p95": 249.56800043582916, - "p99": 286.17601096630096 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8a74732f", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_76bb7d5d", - "comparisonKey": "b4a52819ec3c25b8", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:31.596673+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271608834", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271608834", - "createdAt": "2026-06-26T23:49:31.596673+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 62.144000083208084, - "p90": 86.62399649620056, - "p95": 98.49599748849869, - "p99": 125.5359947681427 - }, - "combine": { - "p50": 68.54400038719177, - "p90": 84.41600203514099, - "p95": 92.83199906349182, - "p99": 123.07199835777283 - }, - "roundtrip": { - "p50": 109.31199789047241, - "p90": 135.29600203037262, - "p95": 143.77599954605103, - "p99": 159.84000265598297 - }, - "isolatedSum": { - "p50": 130.68800047039986, - "p90": 171.03999853134155, - "p95": 191.3279965519905, - "p99": 248.60799312591553 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 63.74400109052658, - "p90": 91.26400202512741, - "p95": 99.87200051546097, - "p99": 171.9679981470108 - }, - "combine": { - "p50": 70.81600278615952, - "p90": 194.75199282169342, - "p95": 206.94400370121002, - "p99": 256.9279968738556 - }, - "roundtrip": { - "p50": 110.04800349473953, - "p90": 140.1599943637848, - "p95": 147.13600277900696, - "p99": 161.50400042533875 - }, - "isolatedSum": { - "p50": 134.5600038766861, - "p90": 286.01599484682083, - "p95": 306.816004216671, - "p99": 428.8959950208664 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 62.94400244951248, - "p90": 80.51200211048126, - "p95": 89.02399986982346, - "p99": 111.39199882745743 - }, - "combine": { - "p50": 68.38399916887283, - "p90": 79.8719972372055, - "p95": 88.54400366544724, - "p99": 100.54399818181992 - }, - "roundtrip": { - "p50": 111.16799712181091, - "p90": 139.80799913406372, - "p95": 148.41599762439728, - "p99": 167.07199811935425 - }, - "isolatedSum": { - "p50": 131.32800161838531, - "p90": 160.38399934768677, - "p95": 177.5680035352707, - "p99": 211.93599700927734 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 61.88800185918808, - "p90": 83.16799998283386, - "p95": 92.51199662685394, - "p99": 104.06400263309479 - }, - "combine": { - "p50": 68.67200136184692, - "p90": 82.84799754619598, - "p95": 88.639996945858, - "p99": 105.05600273609161 - }, - "roundtrip": { - "p50": 110.84800213575363, - "p90": 140.79999923706055, - "p95": 148.0640023946762, - "p99": 159.2639982700348 - }, - "isolatedSum": { - "p50": 130.560003221035, - "p90": 166.01599752902985, - "p95": 181.15199357271194, - "p99": 209.1200053691864 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 59.84000116586685, - "p90": 82.17599987983704, - "p95": 92.32000261545181, - "p99": 105.92000186443329 - }, - "combine": { - "p50": 69.72800195217133, - "p90": 84.19200032949448, - "p95": 90.68799763917923, - "p99": 106.91200196743011 - }, - "roundtrip": { - "p50": 112.12799698114395, - "p90": 134.62400436401367, - "p95": 145.9839940071106, - "p99": 164.09599781036377 - }, - "isolatedSum": { - "p50": 129.56800311803818, - "p90": 166.3680002093315, - "p95": 183.00800025463104, - "p99": 212.8320038318634 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 70.20799815654755, - "p90": 94.08000111579895, - "p95": 101.15200281143188, - "p99": 118.17599833011627 - }, - "combine": { - "p50": 76.64000242948532, - "p90": 91.2960022687912, - "p95": 97.43999689817429, - "p99": 105.27999699115753 - }, - "roundtrip": { - "p50": 123.77600371837616, - "p90": 148.3519971370697, - "p95": 155.29599785804749, - "p99": 175.135999917984 - }, - "isolatedSum": { - "p50": 146.84800058603287, - "p90": 185.37600338459015, - "p95": 198.59199970960617, - "p99": 223.4559953212738 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 86.43200248479843, - "p90": 99.48799759149551, - "p95": 106.84800148010254, - "p99": 127.42400169372559 - }, - "combine": { - "p50": 85.82399785518646, - "p90": 96.63999825716019, - "p95": 104.76800054311752, - "p99": 113.21599781513214 - }, - "roundtrip": { - "p50": 147.8399932384491, - "p90": 164.5440012216568, - "p95": 169.95200514793396, - "p99": 197.53600656986237 - }, - "isolatedSum": { - "p50": 172.2560003399849, - "p90": 196.1279958486557, - "p95": 211.61600202322006, - "p99": 240.63999950885773 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 103.7760004401207, - "p90": 118.9119964838028, - "p95": 127.16799974441528, - "p99": 134.97599959373474 - }, - "combine": { - "p50": 105.15200346708298, - "p90": 119.00799721479416, - "p95": 124.35200065374374, - "p99": 139.55199718475342 - }, - "roundtrip": { - "p50": 185.2799952030182, - "p90": 201.7280012369156, - "p95": 207.39200711250305, - "p99": 224.95999932289124 - }, - "isolatedSum": { - "p50": 208.92800390720367, - "p90": 237.91999369859695, - "p95": 251.52000039815903, - "p99": 274.52799677848816 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-274a06b0", - "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_c9aeae24", - "comparisonKey": "0abd2163f516521c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:44.931546+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271645585", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271645585", - "createdAt": "2026-06-26T23:50:44.931546+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 37.567999213933945, - "p90": 48.0320006608963, - "p95": 52.41600051522255, - "p99": 62.33600154519081 - }, - "combine": { - "p50": 33.663999289274216, - "p90": 44.38399896025658, - "p95": 46.879999339580536, - "p99": 61.85600161552429 - }, - "roundtrip": { - "p50": 51.231998950242996, - "p90": 70.14399766921997, - "p95": 77.31200009584427, - "p99": 100.0640019774437 - }, - "isolatedSum": { - "p50": 71.23199850320816, - "p90": 92.41599962115288, - "p95": 99.29599985480309, - "p99": 124.1920031607151 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 37.88800165057182, - "p90": 49.15200173854828, - "p95": 55.87200075387955, - "p99": 76.89599692821503 - }, - "combine": { - "p50": 32.896000891923904, - "p90": 43.83999854326248, - "p95": 47.07200080156326, - "p99": 67.74400174617767 - }, - "roundtrip": { - "p50": 51.00800096988678, - "p90": 67.9360032081604, - "p95": 74.20799881219864, - "p99": 96.83199971914291 - }, - "isolatedSum": { - "p50": 70.78400254249573, - "p90": 92.99200028181076, - "p95": 102.94400155544281, - "p99": 144.6399986743927 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 37.53599897027016, - "p90": 44.95999962091446, - "p95": 51.61599814891815, - "p99": 66.30399823188782 - }, - "combine": { - "p50": 29.791999608278275, - "p90": 39.16800022125244, - "p95": 44.064000248909, - "p99": 53.63199859857559 - }, - "roundtrip": { - "p50": 51.13599821925163, - "p90": 63.519999384880066, - "p95": 71.77600264549255, - "p99": 81.34400099515915 - }, - "isolatedSum": { - "p50": 67.32799857854843, - "p90": 84.1279998421669, - "p95": 95.67999839782715, - "p99": 119.93599683046341 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 38.27200084924698, - "p90": 51.32799968123436, - "p95": 57.08799883723259, - "p99": 66.97600334882736 - }, - "combine": { - "p50": 34.623999148607254, - "p90": 44.03200000524521, - "p95": 46.62400111556053, - "p99": 54.55999821424484 - }, - "roundtrip": { - "p50": 55.39200082421303, - "p90": 67.58400052785873, - "p95": 75.42400062084198, - "p99": 95.0080007314682 - }, - "isolatedSum": { - "p50": 72.89599999785423, - "p90": 95.35999968647957, - "p95": 103.71199995279312, - "p99": 121.5360015630722 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 38.816001266241074, - "p90": 54.17599901556969, - "p95": 57.72799998521805, - "p99": 75.00799745321274 - }, - "combine": { - "p50": 36.288000643253326, - "p90": 46.01600021123886, - "p95": 48.00000041723251, - "p99": 69.47200000286102 - }, - "roundtrip": { - "p50": 59.967998415231705, - "p90": 73.05599749088287, - "p95": 77.2159993648529, - "p99": 92.12800115346909 - }, - "isolatedSum": { - "p50": 75.1040019094944, - "p90": 100.19199922680855, - "p95": 105.72800040245056, - "p99": 144.47999745607376 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 45.40799930691719, - "p90": 55.32800033688545, - "p95": 60.15999987721443, - "p99": 70.88000327348709 - }, - "combine": { - "p50": 43.87199878692627, - "p90": 53.53600159287453, - "p95": 55.32800033688545, - "p99": 67.9360032081604 - }, - "roundtrip": { - "p50": 72.35199958086014, - "p90": 82.8159973025322, - "p95": 86.01599931716919, - "p99": 98.88000041246414 - }, - "isolatedSum": { - "p50": 89.27999809384346, - "p90": 108.86400192975998, - "p95": 115.48800021409988, - "p99": 138.8160064816475 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 55.296000093221664, - "p90": 66.6240006685257, - "p95": 70.36799937486649, - "p99": 88.16000074148178 - }, - "combine": { - "p50": 59.07199904322624, - "p90": 67.71200150251389, - "p95": 70.43199986219406, - "p99": 79.3600007891655 - }, - "roundtrip": { - "p50": 97.34400361776352, - "p90": 109.3439981341362, - "p95": 115.32799899578094, - "p99": 128.12800705432892 - }, - "isolatedSum": { - "p50": 114.3679991364479, - "p90": 134.33600217103958, - "p95": 140.79999923706055, - "p99": 167.52000153064728 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 81.05599880218506, - "p90": 91.26400202512741, - "p95": 95.77599912881851, - "p99": 104.38399761915207 - }, - "combine": { - "p50": 86.40000224113464, - "p90": 98.36799651384354, - "p95": 102.84800082445145, - "p99": 111.96800321340561 - }, - "roundtrip": { - "p50": 148.44800531864166, - "p90": 162.88000345230103, - "p95": 168.16000640392303, - "p99": 178.24000120162964 - }, - "isolatedSum": { - "p50": 167.4560010433197, - "p90": 189.63199853897095, - "p95": 198.62399995326996, - "p99": 216.35200083255768 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-81e223f4", - "identity": "h200|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_7cfa04c4", - "comparisonKey": "72cd529af4968fe8", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:48.529187+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271650161", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271650161", - "createdAt": "2026-06-26T23:50:48.529187+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 38.55999931693077, - "p90": 52.25599929690361, - "p95": 57.69599974155426, - "p99": 68.70400160551071 - }, - "combine": { - "p50": 33.440001308918, - "p90": 46.23999819159508, - "p95": 50.36799982190132, - "p99": 62.912002205848694 - }, - "roundtrip": { - "p50": 52.70399898290634, - "p90": 70.43199986219406, - "p95": 77.85599678754807, - "p99": 90.27200192213058 - }, - "isolatedSum": { - "p50": 72.00000062584877, - "p90": 98.49599748849869, - "p95": 108.06399956345558, - "p99": 131.6160038113594 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 37.76000067591667, - "p90": 48.25599864125252, - "p95": 55.93600124120712, - "p99": 79.68000322580338 - }, - "combine": { - "p50": 32.80000016093254, - "p90": 41.120000183582306, - "p95": 44.863998889923096, - "p99": 49.8879998922348 - }, - "roundtrip": { - "p50": 52.83199995756149, - "p90": 65.88800251483917, - "p95": 71.80800288915634, - "p99": 80.60800284147263 - }, - "isolatedSum": { - "p50": 70.56000083684921, - "p90": 89.37599882483482, - "p95": 100.80000013113022, - "p99": 129.56800311803818 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 38.495998829603195, - "p90": 52.44800075888634, - "p95": 56.543998420238495, - "p99": 76.4480009675026 - }, - "combine": { - "p50": 33.055998384952545, - "p90": 44.16000097990036, - "p95": 45.951999723911285, - "p99": 53.568001836538315 - }, - "roundtrip": { - "p50": 52.70399898290634, - "p90": 64.2239972949028, - "p95": 71.96799665689468, - "p99": 81.53600245714188 - }, - "isolatedSum": { - "p50": 71.55199721455574, - "p90": 96.6080017387867, - "p95": 102.49599814414978, - "p99": 130.0160028040409 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 39.07199949026108, - "p90": 52.960000932216644, - "p95": 56.992001831531525, - "p99": 65.43999910354614 - }, - "combine": { - "p50": 34.04799848794937, - "p90": 44.19200122356415, - "p95": 46.1760014295578, - "p99": 57.472001761198044 - }, - "roundtrip": { - "p50": 54.11199852824211, - "p90": 68.60800087451935, - "p95": 74.78400319814682, - "p99": 85.28000116348267 - }, - "isolatedSum": { - "p50": 73.11999797821045, - "p90": 97.15200215578079, - "p95": 103.16800326108932, - "p99": 122.91200086474419 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 40.12800008058548, - "p90": 55.00800162553787, - "p95": 59.29600074887276, - "p99": 66.81600213050842 - }, - "combine": { - "p50": 38.047999143600464, - "p90": 49.82399940490723, - "p95": 52.799999713897705, - "p99": 63.19999694824219 - }, - "roundtrip": { - "p50": 61.5679994225502, - "p90": 75.48800110816956, - "p95": 82.36800134181976, - "p99": 96.89600020647049 - }, - "isolatedSum": { - "p50": 78.17599922418594, - "p90": 104.8320010304451, - "p95": 112.09600046277046, - "p99": 130.0159990787506 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 46.23999819159508, - "p90": 56.57599866390228, - "p95": 62.30400130152702, - "p99": 70.8480030298233 - }, - "combine": { - "p50": 43.96799951791763, - "p90": 53.75999957323074, - "p95": 58.33600088953972, - "p99": 61.216000467538834 - }, - "roundtrip": { - "p50": 71.19999825954437, - "p90": 80.86399734020233, - "p95": 85.28000116348267, - "p99": 93.21600198745728 - }, - "isolatedSum": { - "p50": 90.20799770951271, - "p90": 110.33599823713303, - "p95": 120.64000219106674, - "p99": 132.06400349736214 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 56.60799890756607, - "p90": 77.53600180149078, - "p95": 85.31200140714645, - "p99": 192.03199446201324 - }, - "combine": { - "p50": 58.240000158548355, - "p90": 67.29599833488464, - "p95": 69.56800073385239, - "p99": 77.82399654388428 - }, - "roundtrip": { - "p50": 96.28800302743912, - "p90": 107.39199817180634, - "p95": 111.58400028944016, - "p99": 126.52799487113953 - }, - "isolatedSum": { - "p50": 114.84799906611443, - "p90": 144.83200013637543, - "p95": 154.88000214099884, - "p99": 269.8559910058975 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 81.7599967122078, - "p90": 92.67199784517288, - "p95": 95.13600170612335, - "p99": 128.38399410247803 - }, - "combine": { - "p50": 86.27200126647949, - "p90": 94.91200000047684, - "p95": 97.120001912117, - "p99": 105.27999699115753 - }, - "roundtrip": { - "p50": 147.2959965467453, - "p90": 157.56799280643463, - "p95": 162.36799955368042, - "p99": 174.9120056629181 - }, - "isolatedSum": { - "p50": 168.0319979786873, - "p90": 187.58399784564972, - "p95": 192.25600361824036, - "p99": 233.66399109363556 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-43b4144e", - "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_0a1a73b3", - "comparisonKey": "14196b9d68f90910", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:32.638567+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 LL (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254426529", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254426529", - "createdAt": "2026-06-26T17:30:32.638567+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 36.86400130391121, - "p90": 47.13600128889084, - "p95": 51.52000114321709, - "p99": 63.32799792289734 - }, - "combine": { - "p50": 33.440001308918, - "p90": 42.527999728918076, - "p95": 46.81599885225296, - "p99": 52.22399905323982 - }, - "roundtrip": { - "p50": 50.52800104022026, - "p90": 65.15199691057205, - "p95": 71.03999704122543, - "p99": 78.68800312280655 - }, - "isolatedSum": { - "p50": 70.30400261282921, - "p90": 89.66400101780891, - "p95": 98.33599999547005, - "p99": 115.55199697613716 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 37.408001720905304, - "p90": 48.608001321554184, - "p95": 54.687999188899994, - "p99": 65.2799978852272 - }, - "combine": { - "p50": 32.735999673604965, - "p90": 42.59200021624565, - "p95": 45.05600035190582, - "p99": 51.35999992489815 - }, - "roundtrip": { - "p50": 51.4880008995533, - "p90": 66.72000139951706, - "p95": 72.54400104284286, - "p99": 85.08799970149994 - }, - "isolatedSum": { - "p50": 70.14400139451027, - "p90": 91.20000153779984, - "p95": 99.74399954080582, - "p99": 116.63999781012535 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 37.79200091958046, - "p90": 49.27999898791313, - "p95": 54.91200089454651, - "p99": 61.08799949288368 - }, - "combine": { - "p50": 31.231999397277832, - "p90": 43.487999588251114, - "p95": 47.26399853825569, - "p99": 65.31199812889099 - }, - "roundtrip": { - "p50": 51.58400163054466, - "p90": 68.89600306749344, - "p95": 73.95199686288834, - "p99": 91.61599725484848 - }, - "isolatedSum": { - "p50": 69.02400031685829, - "p90": 92.76799857616425, - "p95": 102.1759994328022, - "p99": 126.39999762177467 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 37.53599897027016, - "p90": 48.128001391887665, - "p95": 54.75199967622757, - "p99": 62.111999839544296 - }, - "combine": { - "p50": 34.46400165557861, - "p90": 44.544000178575516, - "p95": 47.231998294591904, - "p99": 57.37600103020668 - }, - "roundtrip": { - "p50": 54.687999188899994, - "p90": 67.4239993095398, - "p95": 73.44000041484833, - "p99": 91.96799993515015 - }, - "isolatedSum": { - "p50": 72.00000062584877, - "p90": 92.67200157046318, - "p95": 101.98399797081947, - "p99": 119.48800086975098 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 39.29600119590759, - "p90": 51.35999992489815, - "p95": 55.64799904823303, - "p99": 64.96000289916992 - }, - "combine": { - "p50": 36.67199984192848, - "p90": 46.62400111556053, - "p95": 50.56000128388405, - "p99": 60.38400158286095 - }, - "roundtrip": { - "p50": 60.47999858856201, - "p90": 74.5920017361641, - "p95": 79.3600007891655, - "p99": 87.87199854850769 - }, - "isolatedSum": { - "p50": 75.96800103783607, - "p90": 97.98400104045868, - "p95": 106.20800033211708, - "p99": 125.34400448203087 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 45.05600035190582, - "p90": 55.00800162553787, - "p95": 57.95200169086456, - "p99": 66.01600348949432 - }, - "combine": { - "p50": 44.28799822926521, - "p90": 53.05600166320801, - "p95": 55.904000997543335, - "p99": 61.3120011985302 - }, - "roundtrip": { - "p50": 72.64000177383423, - "p90": 84.16000008583069, - "p95": 88.03199976682663, - "p99": 106.30399733781815 - }, - "isolatedSum": { - "p50": 89.34399858117104, - "p90": 108.06400328874588, - "p95": 113.8560026884079, - "p99": 127.32800468802452 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 55.23199960589409, - "p90": 65.63200056552887, - "p95": 71.48800045251846, - "p99": 79.55200225114822 - }, - "combine": { - "p50": 58.43200162053108, - "p90": 69.37599927186966, - "p95": 71.07199728488922, - "p99": 79.42400127649307 - }, - "roundtrip": { - "p50": 96.8639999628067, - "p90": 108.44799876213074, - "p95": 113.72800171375275, - "p99": 121.72800302505493 - }, - "isolatedSum": { - "p50": 113.66400122642517, - "p90": 135.00799983739853, - "p95": 142.55999773740768, - "p99": 158.9760035276413 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 79.26400005817413, - "p90": 88.44800293445587, - "p95": 92.6399976015091, - "p99": 101.69599950313568 - }, - "combine": { - "p50": 86.01599931716919, - "p90": 95.0080007314682, - "p95": 97.02400118112564, - "p99": 103.32799702882767 - }, - "roundtrip": { - "p50": 147.32800424098969, - "p90": 157.53600001335144, - "p95": 161.47199273109436, - "p99": 169.0240055322647 - }, - "isolatedSum": { - "p50": 165.27999937534332, - "p90": 183.45600366592407, - "p95": 189.66399878263474, - "p99": 205.02399653196335 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-878f6103", - "identity": "h200|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h200_9979edfc", - "comparisonKey": "539cbdfe3675c8d8", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:31.220360+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287507619", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287507619", - "createdAt": "2026-06-27T11:14:31.220360+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 66.6240006685257, - "p90": 96.6079980134964, - "p95": 105.40799796581268, - "p99": 139.8400068283081 - }, - "combine": { - "p50": 48.928000032901764, - "p90": 62.144000083208084, - "p95": 69.98399645090103, - "p99": 94.71999853849411 - }, - "roundtrip": { - "p50": 150.39999783039093, - "p90": 202.27199792861938, - "p95": 209.88799631595612, - "p99": 232.35200345516205 - }, - "isolatedSum": { - "p50": 115.55200070142746, - "p90": 158.75199809670448, - "p95": 175.39199441671371, - "p99": 234.56000536680222 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 63.13599646091461, - "p90": 90.81599861383438, - "p95": 99.84000027179718, - "p99": 112.86400258541107 - }, - "combine": { - "p50": 48.86399954557419, - "p90": 59.487998485565186, - "p95": 66.880002617836, - "p99": 72.67200201749802 - }, - "roundtrip": { - "p50": 141.82400703430176, - "p90": 184.1599941253662, - "p95": 192.1280026435852, - "p99": 211.64800226688385 - }, - "isolatedSum": { - "p50": 111.9999960064888, - "p90": 150.30399709939957, - "p95": 166.72000288963318, - "p99": 185.5360046029091 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 64.60800021886826, - "p90": 93.28000247478485, - "p95": 99.07200187444687, - "p99": 110.46399921178818 - }, - "combine": { - "p50": 49.375999718904495, - "p90": 60.447998344898224, - "p95": 67.61600077152252, - "p99": 73.27999919652939 - }, - "roundtrip": { - "p50": 142.752006649971, - "p90": 189.69599902629852, - "p95": 199.13600385189056, - "p99": 217.3440009355545 - }, - "isolatedSum": { - "p50": 113.98399993777275, - "p90": 153.72800081968307, - "p95": 166.6880026459694, - "p99": 183.74399840831757 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 61.85600161552429, - "p90": 86.496002972126, - "p95": 95.551997423172, - "p99": 104.00000214576721 - }, - "combine": { - "p50": 50.08000135421753, - "p90": 60.28800085186958, - "p95": 66.91200286149979, - "p99": 77.40800082683563 - }, - "roundtrip": { - "p50": 143.51999759674072, - "p90": 185.5040043592453, - "p95": 194.17600333690643, - "p99": 225.63199698925018 - }, - "isolatedSum": { - "p50": 111.93600296974182, - "p90": 146.7840038239956, - "p95": 162.46400028467178, - "p99": 181.40800297260284 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 67.87200272083282, - "p90": 97.72799909114838, - "p95": 107.10400342941284, - "p99": 163.64799439907074 - }, - "combine": { - "p50": 51.29599943757057, - "p90": 61.824001371860504, - "p95": 69.88800317049026, - "p99": 75.6480023264885 - }, - "roundtrip": { - "p50": 146.33600413799286, - "p90": 189.31199610233307, - "p95": 197.4399983882904, - "p99": 221.18400037288666 - }, - "isolatedSum": { - "p50": 119.1680021584034, - "p90": 159.55200046300888, - "p95": 176.9920065999031, - "p99": 239.29599672555923 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 67.45599955320358, - "p90": 91.61599725484848, - "p95": 100.54399818181992, - "p99": 115.48800021409988 - }, - "combine": { - "p50": 54.17599901556969, - "p90": 66.01600348949432, - "p95": 71.74400240182877, - "p99": 80.22399991750717 - }, - "roundtrip": { - "p50": 148.80000054836273, - "p90": 190.11199474334717, - "p95": 201.79200172424316, - "p99": 216.44799411296844 - }, - "isolatedSum": { - "p50": 121.63199856877327, - "p90": 157.6320007443428, - "p95": 172.28800058364868, - "p99": 195.71200013160706 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 71.80800288915634, - "p90": 112.67200112342834, - "p95": 120.15999853610992, - "p99": 136.28800213336945 - }, - "combine": { - "p50": 62.3680017888546, - "p90": 75.32799988985062, - "p95": 80.25600016117096, - "p99": 88.03199976682663 - }, - "roundtrip": { - "p50": 162.52799332141876, - "p90": 212.89600431919098, - "p95": 224.41600263118744, - "p99": 245.40799856185913 - }, - "isolatedSum": { - "p50": 134.17600467801094, - "p90": 188.00000101327896, - "p95": 200.41599869728088, - "p99": 224.32000190019608 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 76.48000121116638, - "p90": 95.36000341176987, - "p95": 103.64799946546555, - "p99": 109.15199667215347 - }, - "combine": { - "p50": 72.28799909353256, - "p90": 84.57600325345993, - "p95": 91.07200056314468, - "p99": 94.2080020904541 - }, - "roundtrip": { - "p50": 167.58400201797485, - "p90": 208.8959962129593, - "p95": 216.5759950876236, - "p99": 233.08800160884857 - }, - "isolatedSum": { - "p50": 148.76800030469894, - "p90": 179.9360066652298, - "p95": 194.72000002861023, - "p99": 203.35999876260757 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b5299c0b", - "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", - "colorKey": "h200_87683f6c", - "comparisonKey": "0d3b5b81799f76d5", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:33.916655+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "75530960a30b452", - "workloadId": "set:8:d1b92539bddfb570", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271736220", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271736220", - "createdAt": "2026-06-26T23:53:33.916655+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 221.15199267864227, - "p90": 287.26398944854736, - "p95": 315.39198756217957, - "p99": 401.98400616645813 - }, - "combine": { - "p50": 47.87199944257736, - "p90": 66.27199798822403, - "p95": 73.91999661922455, - "p99": 92.51199662685394 - }, - "roundtrip": { - "p50": 246.75199389457703, - "p90": 302.2400140762329, - "p95": 335.61599254608154, - "p99": 400.160014629364 - }, - "isolatedSum": { - "p50": 269.02399212121964, - "p90": 353.5359874367714, - "p95": 389.3119841814041, - "p99": 494.4960027933121 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 344064, - "fanoutMean": 5.25, - "recvTokensMax": 6, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 264.6079957485199, - "p90": 342.3680067062378, - "p95": 371.0399866104126, - "p99": 447.00801372528076 - }, - "combine": { - "p50": 54.46400120854378, - "p90": 68.03199648857117, - "p95": 74.8480036854744, - "p99": 88.83199840784073 - }, - "roundtrip": { - "p50": 257.2160065174103, - "p90": 336.4480137825012, - "p95": 375.10401010513306, - "p99": 443.93599033355713 - }, - "isolatedSum": { - "p50": 319.0719969570637, - "p90": 410.40000319480896, - "p95": 445.887990295887, - "p99": 535.8400121331215 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 352256, - "combineLogicalBytes": 704512, - "fanoutMean": 5.375, - "recvTokensMax": 12, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 210.14399826526642, - "p90": 260.0319981575012, - "p95": 276.99199318885803, - "p99": 401.856005191803 - }, - "combine": { - "p50": 49.02400076389313, - "p90": 61.983998864889145, - "p95": 68.57600063085556, - "p99": 82.43200182914734 - }, - "roundtrip": { - "p50": 252.73600220680237, - "p90": 308.51200222969055, - "p95": 325.76000690460205, - "p99": 404.2240083217621 - }, - "isolatedSum": { - "p50": 259.16799902915955, - "p90": 322.01599702239037, - "p95": 345.5679938197136, - "p99": 484.2880070209503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 692224, - "combineLogicalBytes": 1384448, - "fanoutMean": 5.28125, - "recvTokensMax": 26, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 229.40799593925476, - "p90": 285.91999411582947, - "p95": 302.97601222991943, - "p99": 384.799987077713 - }, - "combine": { - "p50": 50.6879985332489, - "p90": 65.95200300216675, - "p95": 71.48800045251846, - "p99": 85.56800335645676 - }, - "roundtrip": { - "p50": 262.7840042114258, - "p90": 331.9680094718933, - "p95": 359.6160113811493, - "p99": 441.0560131072998 - }, - "isolatedSum": { - "p50": 280.09599447250366, - "p90": 351.8719971179962, - "p95": 374.4640126824379, - "p99": 470.36799043416977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1372160, - "combineLogicalBytes": 2744320, - "fanoutMean": 5.234375, - "recvTokensMax": 49, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 211.67999505996704, - "p90": 262.0159983634949, - "p95": 281.5360128879547, - "p99": 434.4319999217987 - }, - "combine": { - "p50": 50.87999999523163, - "p90": 67.74400174617767, - "p95": 72.76800274848938, - "p99": 100.47999769449234 - }, - "roundtrip": { - "p50": 261.1199915409088, - "p90": 332.5119912624359, - "p95": 354.8800051212311, - "p99": 414.2720103263855 - }, - "isolatedSum": { - "p50": 262.55999505519867, - "p90": 329.76000010967255, - "p95": 354.3040156364441, - "p99": 534.911997616291 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2732032, - "combineLogicalBytes": 5464064, - "fanoutMean": 5.2109375, - "recvTokensMax": 94, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 210.68799495697021, - "p90": 258.91199707984924, - "p95": 279.87200021743774, - "p99": 326.1440098285675 - }, - "combine": { - "p50": 53.85600030422211, - "p90": 68.67200136184692, - "p95": 72.51200079917908, - "p99": 91.90399944782257 - }, - "roundtrip": { - "p50": 265.6959891319275, - "p90": 326.2079954147339, - "p95": 351.52000188827515, - "p99": 446.3360011577606 - }, - "isolatedSum": { - "p50": 264.5439952611923, - "p90": 327.58399844169617, - "p95": 352.3840010166168, - "p99": 418.0480092763901 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5562368, - "combineLogicalBytes": 11124736, - "fanoutMean": 5.3046875, - "recvTokensMax": 186, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 209.6640020608902, - "p90": 265.21599292755127, - "p95": 291.0720109939575, - "p99": 366.14400148391724 - }, - "combine": { - "p50": 61.43999844789505, - "p90": 73.91999661922455, - "p95": 79.42400127649307, - "p99": 92.06400066614151 - }, - "roundtrip": { - "p50": 262.2399926185608, - "p90": 317.7280128002167, - "p95": 350.7840037345886, - "p99": 447.9680061340332 - }, - "isolatedSum": { - "p50": 271.10400050878525, - "p90": 339.1359895467758, - "p95": 370.4960122704506, - "p99": 458.20800215005875 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 11096064, - "combineLogicalBytes": 22192128, - "fanoutMean": 5.291015625, - "recvTokensMax": 358, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 217.8879976272583, - "p90": 276.99199318885803, - "p95": 317.05600023269653, - "p99": 742.6559925079346 - }, - "combine": { - "p50": 72.67200201749802, - "p90": 88.54400366544724, - "p95": 92.47999638319016, - "p99": 113.02399635314941 - }, - "roundtrip": { - "p50": 273.44000339508057, - "p90": 323.5520124435425, - "p95": 345.0239896774292, - "p99": 420.3520119190216 - }, - "isolatedSum": { - "p50": 290.5599996447563, - "p90": 365.53599685430527, - "p95": 409.5359966158867, - "p99": 855.679988861084 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a3751d3c", - "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", - "colorKey": "h200_87683f6c", - "comparisonKey": "972ab14012f6276a", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:56.538326+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "be1b44a963bd4ef", - "workloadId": "set:8:34e5874082f8ea8f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271751941", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271751941", - "createdAt": "2026-06-26T23:53:56.538326+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 217.3759937286377, - "p90": 269.1839933395386, - "p95": 295.1360046863556, - "p99": 345.69600224494934 - }, - "combine": { - "p50": 50.592001527547836, - "p90": 66.46399945020676, - "p95": 71.74400240182877, - "p99": 89.34400230646133 - }, - "roundtrip": { - "p50": 245.60000002384186, - "p90": 292.64000058174133, - "p95": 306.0480058193207, - "p99": 346.8160033226013 - }, - "isolatedSum": { - "p50": 267.96799525618553, - "p90": 335.64799278974533, - "p95": 366.88000708818436, - "p99": 435.0400045514107 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 215040, - "combineLogicalBytes": 430080, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 206.2399983406067, - "p90": 254.94399666786194, - "p95": 270.4960107803345, - "p99": 337.21598982810974 - }, - "combine": { - "p50": 51.263999193906784, - "p90": 65.72800129652023, - "p95": 70.52800059318542, - "p99": 75.58400183916092 - }, - "roundtrip": { - "p50": 245.15199661254883, - "p90": 296.31999135017395, - "p95": 316.1279857158661, - "p99": 367.3279881477356 - }, - "isolatedSum": { - "p50": 257.5039975345135, - "p90": 320.6719979643822, - "p95": 341.0240113735199, - "p99": 412.79999166727066 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 440320, - "combineLogicalBytes": 880640, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 220.38400173187256, - "p90": 289.15199637413025, - "p95": 331.5519988536835, - "p99": 1036.1599922180176 - }, - "combine": { - "p50": 52.191998809576035, - "p90": 65.21599739789963, - "p95": 68.96000355482101, - "p99": 77.88799703121185 - }, - "roundtrip": { - "p50": 248.79999458789825, - "p90": 299.71200227737427, - "p95": 314.5279884338379, - "p99": 352.09599137306213 - }, - "isolatedSum": { - "p50": 272.5760005414486, - "p90": 354.3679937720299, - "p95": 400.5120024085045, - "p99": 1114.0479892492294 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 870400, - "combineLogicalBytes": 1740800, - "fanoutMean": 5.3125, - "recvTokensMax": 25, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 221.91999852657318, - "p90": 292.4480140209198, - "p95": 316.3520097732544, - "p99": 412.76800632476807 - }, - "combine": { - "p50": 54.84800040721893, - "p90": 71.61600142717361, - "p95": 80.64000308513641, - "p99": 102.1760031580925 - }, - "roundtrip": { - "p50": 249.24799799919128, - "p90": 305.5360019207001, - "p95": 325.1520097255707, - "p99": 406.9119989871979 - }, - "isolatedSum": { - "p50": 276.7679989337921, - "p90": 364.0640154480934, - "p95": 396.9920128583908, - "p99": 514.9440094828606 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1735680, - "combineLogicalBytes": 3471360, - "fanoutMean": 5.296875, - "recvTokensMax": 50, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 209.75999534130096, - "p90": 260.73598861694336, - "p95": 279.7119915485382, - "p99": 349.98399019241333 - }, - "combine": { - "p50": 54.88000065088272, - "p90": 69.34399902820587, - "p95": 73.91999661922455, - "p99": 101.08800232410431 - }, - "roundtrip": { - "p50": 254.36800718307495, - "p90": 305.2160143852234, - "p95": 330.55999875068665, - "p99": 445.72800397872925 - }, - "isolatedSum": { - "p50": 264.6399959921837, - "p90": 330.07998764514923, - "p95": 353.63198816776276, - "p99": 451.07199251651764 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3456000, - "combineLogicalBytes": 6912000, - "fanoutMean": 5.2734375, - "recvTokensMax": 93, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 212.16000616550446, - "p90": 261.34398579597473, - "p95": 274.4959890842438, - "p99": 355.9679985046387 - }, - "combine": { - "p50": 59.487998485565186, - "p90": 75.9039968252182, - "p95": 79.29600030183792, - "p99": 111.13599687814713 - }, - "roundtrip": { - "p50": 262.4320089817047, - "p90": 318.33600997924805, - "p95": 339.4559919834137, - "p99": 384.0320110321045 - }, - "isolatedSum": { - "p50": 271.64800465106964, - "p90": 337.24798262119293, - "p95": 353.7919893860817, - "p99": 467.1039953827858 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6988800, - "combineLogicalBytes": 13977600, - "fanoutMean": 5.33203125, - "recvTokensMax": 179, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 213.44000101089478, - "p90": 259.99999046325684, - "p95": 280.2880108356476, - "p99": 418.08000206947327 - }, - "combine": { - "p50": 67.26399809122086, - "p90": 79.1039988398552, - "p95": 86.94399893283844, - "p99": 97.59999811649323 - }, - "roundtrip": { - "p50": 273.98398518562317, - "p90": 361.2799942493439, - "p95": 384.0959966182709, - "p99": 485.24799942970276 - }, - "isolatedSum": { - "p50": 280.70399910211563, - "p90": 339.10398930311203, - "p95": 367.232009768486, - "p99": 515.6800001859665 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 13987840, - "combineLogicalBytes": 27975680, - "fanoutMean": 5.3359375, - "recvTokensMax": 355, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 217.53600239753723, - "p90": 271.9680070877075, - "p95": 288.8000011444092, - "p99": 367.71199107170105 - }, - "combine": { - "p50": 80.73599636554718, - "p90": 95.90400010347366, - "p95": 99.16800260543823, - "p99": 122.56000190973282 - }, - "roundtrip": { - "p50": 289.6000146865845, - "p90": 337.69598603248596, - "p95": 350.847989320755, - "p99": 431.4559996128082 - }, - "isolatedSum": { - "p50": 298.2719987630844, - "p90": 367.8720071911812, - "p95": 387.9680037498474, - "p99": 490.27199298143387 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-73819dd3", - "identity": "h200|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_9979edfc", - "comparisonKey": "3ee03cee0282c011", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:48.278988+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287496212", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287496212", - "createdAt": "2026-06-27T11:13:48.278988+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.14399766921997, - "p90": 101.59999877214432, - "p95": 114.72000181674957, - "p99": 231.32799565792084 - }, - "combine": { - "p50": 56.8000003695488, - "p90": 69.18399780988693, - "p95": 76.64000242948532, - "p99": 99.64799880981445 - }, - "roundtrip": { - "p50": 154.30399775505066, - "p90": 196.383997797966, - "p95": 217.47200191020966, - "p99": 263.90400528907776 - }, - "isolatedSum": { - "p50": 126.94399803876877, - "p90": 170.78399658203125, - "p95": 191.3600042462349, - "p99": 330.9759944677353 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 68.60800087451935, - "p90": 98.33600372076035, - "p95": 105.66399991512299, - "p99": 159.45599973201752 - }, - "combine": { - "p50": 56.60799890756607, - "p90": 71.55200093984604, - "p95": 75.48800110816956, - "p99": 96.22400254011154 - }, - "roundtrip": { - "p50": 158.07999670505524, - "p90": 207.39200711250305, - "p95": 222.3680019378662, - "p99": 268.15998554229736 - }, - "isolatedSum": { - "p50": 125.21599978208542, - "p90": 169.88800466060638, - "p95": 181.15200102329254, - "p99": 255.68000227212906 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 66.3679987192154, - "p90": 87.23200112581253, - "p95": 100.76799988746643, - "p99": 133.5040032863617 - }, - "combine": { - "p50": 56.41600117087364, - "p90": 65.72800129652023, - "p95": 73.27999919652939, - "p99": 81.98399841785431 - }, - "roundtrip": { - "p50": 150.4960060119629, - "p90": 190.36799669265747, - "p95": 200.32000541687012, - "p99": 249.37599897384644 - }, - "isolatedSum": { - "p50": 122.78399989008904, - "p90": 152.96000242233276, - "p95": 174.04799908399582, - "p99": 215.488001704216 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 67.80800223350525, - "p90": 100.80000013113022, - "p95": 111.00800335407257, - "p99": 136.99199259281158 - }, - "combine": { - "p50": 57.08799883723259, - "p90": 72.22399860620499, - "p95": 76.60800218582153, - "p99": 86.33600175380707 - }, - "roundtrip": { - "p50": 153.56799960136414, - "p90": 201.50400698184967, - "p95": 210.24000644683838, - "p99": 257.3759853839874 - }, - "isolatedSum": { - "p50": 124.89600107073784, - "p90": 173.0239987373352, - "p95": 187.6160055398941, - "p99": 223.32799434661865 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 65.76000154018402, - "p90": 92.6079973578453, - "p95": 103.13600301742554, - "p99": 142.81600713729858 - }, - "combine": { - "p50": 58.400001376867294, - "p90": 68.70400160551071, - "p95": 77.40800082683563, - "p99": 84.22400057315826 - }, - "roundtrip": { - "p50": 162.75200247764587, - "p90": 214.30400013923645, - "p95": 235.167995095253, - "p99": 264.70398902893066 - }, - "isolatedSum": { - "p50": 124.16000291705132, - "p90": 161.31199896335602, - "p95": 180.54400384426117, - "p99": 227.04000771045685 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 69.31199878454208, - "p90": 97.59999811649323, - "p95": 107.77600109577179, - "p99": 145.37599682807922 - }, - "combine": { - "p50": 64.67200070619583, - "p90": 80.35200089216232, - "p95": 85.4720026254654, - "p99": 101.88800096511841 - }, - "roundtrip": { - "p50": 164.32000696659088, - "p90": 208.48000049591064, - "p95": 222.88000583648682, - "p99": 259.71201062202454 - }, - "isolatedSum": { - "p50": 133.98399949073792, - "p90": 177.95199900865555, - "p95": 193.24800372123718, - "p99": 247.26399779319763 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 77.31200009584427, - "p90": 101.69599950313568, - "p95": 117.76000261306763, - "p99": 361.5039885044098 - }, - "combine": { - "p50": 72.35199958086014, - "p90": 85.63199639320374, - "p95": 92.51199662685394, - "p99": 103.64799946546555 - }, - "roundtrip": { - "p50": 168.2880073785782, - "p90": 209.9200040102005, - "p95": 219.67999637126923, - "p99": 266.84799790382385 - }, - "isolatedSum": { - "p50": 149.6639996767044, - "p90": 187.32799589633942, - "p95": 210.27199923992157, - "p99": 465.15198796987534 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 84.70399677753448, - "p90": 109.21599715948105, - "p95": 114.78400230407715, - "p99": 161.47199273109436 - }, - "combine": { - "p50": 88.25600147247314, - "p90": 105.0880029797554, - "p95": 113.11999708414078, - "p99": 147.42399752140045 - }, - "roundtrip": { - "p50": 195.5839991569519, - "p90": 248.28800559043884, - "p95": 262.4959945678711, - "p99": 325.56799054145813 - }, - "isolatedSum": { - "p50": 172.95999825000763, - "p90": 214.30400013923645, - "p95": 227.90399938821793, - "p99": 308.8959902524948 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1bedbd87", - "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_87683f6c", - "comparisonKey": "73242cc56a07dc73", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:22.337969+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:2e0df6a62cd0143e", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271767522", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271767522", - "createdAt": "2026-06-26T23:54:22.337969+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 217.43999421596527, - "p90": 302.7519881725311, - "p95": 334.4320058822632, - "p99": 396.06401324272156 - }, - "combine": { - "p50": 55.1999993622303, - "p90": 72.03199714422226, - "p95": 78.23999971151352, - "p99": 108.09600353240967 - }, - "roundtrip": { - "p50": 251.71199440956116, - "p90": 317.27999448776245, - "p95": 335.10398864746094, - "p99": 397.92001247406006 - }, - "isolatedSum": { - "p50": 272.6399935781956, - "p90": 374.7839853167534, - "p95": 412.6720055937767, - "p99": 504.1600167751312 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 270336, - "combineLogicalBytes": 540672, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 206.81600272655487, - "p90": 269.6639895439148, - "p95": 289.6000146865845, - "p99": 343.23200583457947 - }, - "combine": { - "p50": 55.135998874902725, - "p90": 71.77600264549255, - "p95": 77.47200131416321, - "p99": 96.09600156545639 - }, - "roundtrip": { - "p50": 247.93599545955658, - "p90": 305.63199520111084, - "p95": 323.168009519577, - "p99": 380.12799620628357 - }, - "isolatedSum": { - "p50": 261.9520016014576, - "p90": 341.43999218940735, - "p95": 367.0720160007477, - "p99": 439.32800740003586 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 528384, - "combineLogicalBytes": 1056768, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 211.04000508785248, - "p90": 283.32799673080444, - "p95": 302.65599489212036, - "p99": 377.6639997959137 - }, - "combine": { - "p50": 56.89600110054016, - "p90": 70.68800181150436, - "p95": 78.3040001988411, - "p99": 85.4400023818016 - }, - "roundtrip": { - "p50": 251.52000784873962, - "p90": 306.4959943294525, - "p95": 319.64799761772156, - "p99": 344.1599905490875 - }, - "isolatedSum": { - "p50": 267.93600618839264, - "p90": 354.0159985423088, - "p95": 380.95999509096146, - "p99": 463.1040021777153 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1062912, - "combineLogicalBytes": 2125824, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 204.92799580097198, - "p90": 272.09600806236267, - "p95": 291.29600524902344, - "p99": 364.3519878387451 - }, - "combine": { - "p50": 56.96000158786774, - "p90": 71.96799665689468, - "p95": 77.79199630022049, - "p99": 86.91199868917465 - }, - "roundtrip": { - "p50": 245.69599330425262, - "p90": 303.16799879074097, - "p95": 321.9519853591919, - "p99": 421.1199879646301 - }, - "isolatedSum": { - "p50": 261.8879973888397, - "p90": 344.06400471925735, - "p95": 369.0880015492439, - "p99": 451.26398652791977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131968, - "combineLogicalBytes": 4263936, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 211.61599457263947, - "p90": 274.3679881095886, - "p95": 311.2959861755371, - "p99": 390.8799886703491 - }, - "combine": { - "p50": 58.720000088214874, - "p90": 74.68800246715546, - "p95": 80.09599894285202, - "p99": 87.5839963555336 - }, - "roundtrip": { - "p50": 250.65600872039795, - "p90": 313.24800848960876, - "p95": 336.1920118331909, - "p99": 386.59200072288513 - }, - "isolatedSum": { - "p50": 270.33599466085434, - "p90": 349.0559905767441, - "p95": 391.39198511838913, - "p99": 478.4639850258827 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4251648, - "combineLogicalBytes": 8503296, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 204.92799580097198, - "p90": 262.62399554252625, - "p95": 280.5440127849579, - "p99": 327.4880051612854 - }, - "combine": { - "p50": 64.54399973154068, - "p90": 81.85599744319916, - "p95": 87.8399983048439, - "p99": 104.41599786281586 - }, - "roundtrip": { - "p50": 262.59198784828186, - "p90": 327.7440071105957, - "p95": 351.6159951686859, - "p99": 406.0800075531006 - }, - "isolatedSum": { - "p50": 269.47199553251266, - "p90": 344.4799929857254, - "p95": 368.3840110898018, - "p99": 431.90400302410126 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 8454144, - "combineLogicalBytes": 16908288, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 247.5840002298355, - "p90": 392.5119936466217, - "p95": 406.14399313926697, - "p99": 443.5200095176697 - }, - "combine": { - "p50": 71.84000313282013, - "p90": 89.85599875450134, - "p95": 94.68799829483032, - "p99": 119.32799965143204 - }, - "roundtrip": { - "p50": 261.85598969459534, - "p90": 329.24801111221313, - "p95": 345.15199065208435, - "p99": 426.1760115623474 - }, - "isolatedSum": { - "p50": 319.42400336265564, - "p90": 482.36799240112305, - "p95": 500.8319914340973, - "p99": 562.8480091691017 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 16711680, - "combineLogicalBytes": 33423360, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 218.33600103855133, - "p90": 282.6240062713623, - "p95": 299.1040050983429, - "p99": 340.831995010376 - }, - "combine": { - "p50": 87.16800063848495, - "p90": 104.67199981212616, - "p95": 109.18399691581726, - "p99": 127.32799351215363 - }, - "roundtrip": { - "p50": 291.83998703956604, - "p90": 343.6479866504669, - "p95": 355.48800230026245, - "p99": 407.1680009365082 - }, - "isolatedSum": { - "p50": 305.5040016770363, - "p90": 387.29600608348846, - "p95": 408.28800201416016, - "p99": 468.1599885225296 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1d12a6ce", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_9979edfc", - "comparisonKey": "057f864d1542d54f", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:28.109691+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286433802", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286433802", - "createdAt": "2026-06-27T10:26:28.109691+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 66.94400310516357, - "p90": 102.65599936246872, - "p95": 111.55200004577637, - "p99": 136.06399297714233 - }, - "combine": { - "p50": 61.503998935222626, - "p90": 75.99999755620956, - "p95": 80.64000308513641, - "p99": 118.33599954843521 - }, - "roundtrip": { - "p50": 168.7999963760376, - "p90": 279.00800108909607, - "p95": 304.03199791908264, - "p99": 436.41600012779236 - }, - "isolatedSum": { - "p50": 128.4480020403862, - "p90": 178.65599691867828, - "p95": 192.19200313091278, - "p99": 254.39999252557755 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 68.51200014352798, - "p90": 98.43199700117111, - "p95": 108.67200046777725, - "p99": 120.41600048542023 - }, - "combine": { - "p50": 61.69600039720535, - "p90": 78.14399898052216, - "p95": 82.0159986615181, - "p99": 97.9200005531311 - }, - "roundtrip": { - "p50": 167.04000532627106, - "p90": 214.88000452518463, - "p95": 225.63199698925018, - "p99": 264.8319900035858 - }, - "isolatedSum": { - "p50": 130.20800054073334, - "p90": 176.57599598169327, - "p95": 190.68799912929535, - "p99": 218.33600103855133 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 67.77600198984146, - "p90": 99.5199978351593, - "p95": 105.27999699115753, - "p99": 120.7680031657219 - }, - "combine": { - "p50": 60.99199876189232, - "p90": 76.9599974155426, - "p95": 81.37600123882294, - "p99": 85.28000116348267 - }, - "roundtrip": { - "p50": 158.36800634860992, - "p90": 202.4639993906021, - "p95": 213.34399282932281, - "p99": 470.46399116516113 - }, - "isolatedSum": { - "p50": 128.76800075173378, - "p90": 176.4799952507019, - "p95": 186.65599822998047, - "p99": 206.04800432920456 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 68.44799965620041, - "p90": 100.3199964761734, - "p95": 105.21599650382996, - "p99": 126.43200159072876 - }, - "combine": { - "p50": 63.45599889755249, - "p90": 79.0719985961914, - "p95": 84.99199897050858, - "p99": 93.02400052547455 - }, - "roundtrip": { - "p50": 166.78400337696075, - "p90": 212.0639979839325, - "p95": 220.09600698947906, - "p99": 258.8160037994385 - }, - "isolatedSum": { - "p50": 131.9039985537529, - "p90": 179.3919950723648, - "p95": 190.20799547433853, - "p99": 219.4560021162033 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 68.31999868154526, - "p90": 94.97600048780441, - "p95": 101.88800096511841, - "p99": 112.73600161075592 - }, - "combine": { - "p50": 63.80800157785416, - "p90": 75.58400183916092, - "p95": 82.97599852085114, - "p99": 96.70399874448776 - }, - "roundtrip": { - "p50": 165.47200083732605, - "p90": 223.29600155353546, - "p95": 241.98399484157562, - "p99": 347.9999899864197 - }, - "isolatedSum": { - "p50": 132.1280002593994, - "p90": 170.56000232696533, - "p95": 184.86399948596954, - "p99": 209.44000035524368 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 72.60800153017044, - "p90": 105.12000322341919, - "p95": 111.10399663448334, - "p99": 125.08800625801086 - }, - "combine": { - "p50": 70.23999840021133, - "p90": 83.52000266313553, - "p95": 88.35200220346451, - "p99": 93.1520015001297 - }, - "roundtrip": { - "p50": 169.37600076198578, - "p90": 216.25599265098572, - "p95": 225.15200078487396, - "p99": 254.59200143814087 - }, - "isolatedSum": { - "p50": 142.84799993038177, - "p90": 188.64000588655472, - "p95": 199.45599883794785, - "p99": 218.24000775814056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 79.0719985961914, - "p90": 104.54399883747101, - "p95": 109.76000130176544, - "p99": 320.99199295043945 - }, - "combine": { - "p50": 80.51200211048126, - "p90": 98.39999675750732, - "p95": 101.85600072145462, - "p99": 225.53600370883942 - }, - "roundtrip": { - "p50": 180.2240014076233, - "p90": 218.4319943189621, - "p95": 229.312002658844, - "p99": 268.70399713516235 - }, - "isolatedSum": { - "p50": 159.58400070667267, - "p90": 202.94399559497833, - "p95": 211.61600202322006, - "p99": 546.5279966592789 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.62399649620056, - "p90": 106.52799904346466, - "p95": 113.21599781513214, - "p99": 120.89599668979645 - }, - "combine": { - "p50": 98.1760025024414, - "p90": 115.13599753379822, - "p95": 118.6240017414093, - "p99": 130.5920034646988 - }, - "roundtrip": { - "p50": 210.207998752594, - "p90": 238.3359968662262, - "p95": 245.15199661254883, - "p99": 258.87998938560486 - }, - "isolatedSum": { - "p50": 184.79999899864197, - "p90": 221.66399657726288, - "p95": 231.83999955654144, - "p99": 251.48800015449524 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9a6e69f6", - "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_87683f6c", - "comparisonKey": "c387c5e642249761", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:29.289162+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271636896", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271636896", - "createdAt": "2026-06-26T23:50:29.289162+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 228.70400547981262, - "p90": 269.6959972381592, - "p95": 279.5200049877167, - "p99": 338.1119966506958 - }, - "combine": { - "p50": 61.08799949288368, - "p90": 73.5040009021759, - "p95": 82.20800012350082, - "p99": 98.33600372076035 - }, - "roundtrip": { - "p50": 271.232008934021, - "p90": 306.94401264190674, - "p95": 324.2560029029846, - "p99": 374.65599179267883 - }, - "isolatedSum": { - "p50": 289.7920049726963, - "p90": 343.1999981403351, - "p95": 361.7280051112175, - "p99": 436.44800037145615 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 216.06400609016418, - "p90": 246.33599817752838, - "p95": 261.3759934902191, - "p99": 341.40801429748535 - }, - "combine": { - "p50": 59.7120001912117, - "p90": 68.09599697589874, - "p95": 74.46400076150894, - "p99": 89.53599631786346 - }, - "roundtrip": { - "p50": 268.99200677871704, - "p90": 305.08801341056824, - "p95": 324.41601157188416, - "p99": 433.0880045890808 - }, - "isolatedSum": { - "p50": 275.7760062813759, - "p90": 314.4319951534271, - "p95": 335.83999425172806, - "p99": 430.9440106153488 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 229.98400032520294, - "p90": 283.07199478149414, - "p95": 300.00001192092896, - "p99": 371.2959885597229 - }, - "combine": { - "p50": 61.055999249219894, - "p90": 78.68800312280655, - "p95": 83.55200290679932, - "p99": 112.47999966144562 - }, - "roundtrip": { - "p50": 274.1119861602783, - "p90": 337.0879888534546, - "p95": 358.7520122528076, - "p99": 398.75200390815735 - }, - "isolatedSum": { - "p50": 291.03999957442284, - "p90": 361.7599979043007, - "p95": 383.55201482772827, - "p99": 483.7759882211685 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 218.87999773025513, - "p90": 251.55198574066162, - "p95": 265.855997800827, - "p99": 311.39200925827026 - }, - "combine": { - "p50": 62.111999839544296, - "p90": 71.6480016708374, - "p95": 77.11999863386154, - "p99": 90.40000289678574 - }, - "roundtrip": { - "p50": 266.9120132923126, - "p90": 300.57600140571594, - "p95": 317.8560137748718, - "p99": 357.02401399612427 - }, - "isolatedSum": { - "p50": 280.9919975697994, - "p90": 323.199987411499, - "p95": 342.97599643468857, - "p99": 401.792012155056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 219.29599344730377, - "p90": 267.61600375175476, - "p95": 287.00798749923706, - "p99": 346.8160033226013 - }, - "combine": { - "p50": 63.840001821517944, - "p90": 79.77599650621414, - "p95": 84.95999872684479, - "p99": 98.49599748849869 - }, - "roundtrip": { - "p50": 265.4719948768616, - "p90": 309.9519908428192, - "p95": 323.8399922847748, - "p99": 397.8559970855713 - }, - "isolatedSum": { - "p50": 283.1359952688217, - "p90": 347.3920002579689, - "p95": 371.96798622608185, - "p99": 445.3120008111 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 219.10400688648224, - "p90": 245.5040067434311, - "p95": 260.3200078010559, - "p99": 308.0959916114807 - }, - "combine": { - "p50": 69.50400024652481, - "p90": 78.33600044250488, - "p95": 83.96799862384796, - "p99": 95.8079993724823 - }, - "roundtrip": { - "p50": 275.2319872379303, - "p90": 308.9599907398224, - "p95": 331.07200264930725, - "p99": 425.6319999694824 - }, - "isolatedSum": { - "p50": 288.60800713300705, - "p90": 323.840007185936, - "p95": 344.28800642490387, - "p99": 403.903990983963 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 221.27999365329742, - "p90": 263.90400528907776, - "p95": 282.20799565315247, - "p99": 368.51200461387634 - }, - "combine": { - "p50": 79.77599650621414, - "p90": 91.32800251245499, - "p95": 96.6079980134964, - "p99": 106.52799904346466 - }, - "roundtrip": { - "p50": 288.4159982204437, - "p90": 336.41600608825684, - "p95": 353.7920117378235, - "p99": 471.1360037326813 - }, - "isolatedSum": { - "p50": 301.05599015951157, - "p90": 355.23200780153275, - "p95": 378.81599366664886, - "p99": 475.040003657341 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 233.024001121521, - "p90": 284.4479978084564, - "p95": 301.63198709487915, - "p99": 392.5760090351105 - }, - "combine": { - "p50": 97.50399738550186, - "p90": 109.76000130176544, - "p95": 115.99999666213989, - "p99": 127.93600559234619 - }, - "roundtrip": { - "p50": 316.6399896144867, - "p90": 356.06399178504944, - "p95": 368.5759902000427, - "p99": 464.352011680603 - }, - "isolatedSum": { - "p50": 330.52799850702286, - "p90": 394.20799911022186, - "p95": 417.63198375701904, - "p99": 520.5120146274567 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a04f9063", - "identity": "h200|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h200_9979edfc", - "comparisonKey": "7a8492db4d26e76b", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:07.695062+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287502149", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287502149", - "createdAt": "2026-06-27T11:14:07.695062+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 73.02399724721909, - "p90": 93.91999989748001, - "p95": 107.42399841547012, - "p99": 139.20000195503235 - }, - "combine": { - "p50": 59.93599817156792, - "p90": 70.36799937486649, - "p95": 75.93599706888199, - "p99": 93.44000369310379 - }, - "roundtrip": { - "p50": 157.69599378108978, - "p90": 192.09599494934082, - "p95": 211.32799983024597, - "p99": 397.7600038051605 - }, - "isolatedSum": { - "p50": 132.959995418787, - "p90": 164.2879992723465, - "p95": 183.3599954843521, - "p99": 232.64000564813614 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 71.3919997215271, - "p90": 89.91999924182892, - "p95": 99.71199929714203, - "p99": 113.79200220108032 - }, - "combine": { - "p50": 58.720000088214874, - "p90": 72.09599763154984, - "p95": 77.60000228881836, - "p99": 88.70399743318558 - }, - "roundtrip": { - "p50": 158.62399339675903, - "p90": 189.5039975643158, - "p95": 197.82400131225586, - "p99": 229.34399545192719 - }, - "isolatedSum": { - "p50": 130.11199980974197, - "p90": 162.01599687337875, - "p95": 177.3120015859604, - "p99": 202.4959996342659 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 72.7040022611618, - "p90": 106.04800283908844, - "p95": 114.81600254774094, - "p99": 146.84799313545227 - }, - "combine": { - "p50": 60.5119988322258, - "p90": 75.58400183916092, - "p95": 79.42400127649307, - "p99": 93.9520001411438 - }, - "roundtrip": { - "p50": 158.75199437141418, - "p90": 193.15199553966522, - "p95": 202.04800367355347, - "p99": 231.51999711990356 - }, - "isolatedSum": { - "p50": 133.2160010933876, - "p90": 181.63200467824936, - "p95": 194.240003824234, - "p99": 240.79999327659607 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 70.0799971818924, - "p90": 88.22400122880936, - "p95": 97.59999811649323, - "p99": 165.82399606704712 - }, - "combine": { - "p50": 60.54399907588959, - "p90": 70.68800181150436, - "p95": 78.07999849319458, - "p99": 89.05600011348724 - }, - "roundtrip": { - "p50": 159.32799875736237, - "p90": 187.6160055398941, - "p95": 201.24800503253937, - "p99": 239.58399891853333 - }, - "isolatedSum": { - "p50": 130.62399625778198, - "p90": 158.91200304031372, - "p95": 175.6799966096878, - "p99": 254.87999618053436 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 73.69600236415863, - "p90": 100.38399696350098, - "p95": 108.19199681282043, - "p99": 146.14400267601013 - }, - "combine": { - "p50": 63.840001821517944, - "p90": 74.87999647855759, - "p95": 80.38400113582611, - "p99": 123.23199957609177 - }, - "roundtrip": { - "p50": 161.43999993801117, - "p90": 194.97600197792053, - "p95": 208.67200195789337, - "p99": 259.68000292778015 - }, - "isolatedSum": { - "p50": 137.53600418567657, - "p90": 175.26399344205856, - "p95": 188.57599794864655, - "p99": 269.3760022521019 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 71.6480016708374, - "p90": 94.14400160312653, - "p95": 102.65599936246872, - "p99": 167.32800006866455 - }, - "combine": { - "p50": 69.37599927186966, - "p90": 80.32000064849854, - "p95": 88.19200098514557, - "p99": 124.22399967908859 - }, - "roundtrip": { - "p50": 167.55199432373047, - "p90": 192.32000410556793, - "p95": 208.54400098323822, - "p99": 261.50399446487427 - }, - "isolatedSum": { - "p50": 141.02400094270706, - "p90": 174.46400225162506, - "p95": 190.8480003476143, - "p99": 291.55199974775314 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 79.80799674987793, - "p90": 91.2960022687912, - "p95": 100.51199793815613, - "p99": 125.72799623012543 - }, - "combine": { - "p50": 78.11199873685837, - "p90": 84.70399677753448, - "p95": 90.7839983701706, - "p99": 105.56799918413162 - }, - "roundtrip": { - "p50": 183.80799889564514, - "p90": 212.96000480651855, - "p95": 232.7679991722107, - "p99": 262.688010931015 - }, - "isolatedSum": { - "p50": 157.9199954867363, - "p90": 175.99999904632568, - "p95": 191.29599630832672, - "p99": 231.29599541425705 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 88.3840024471283, - "p90": 100.3199964761734, - "p95": 105.56799918413162, - "p99": 115.77600240707397 - }, - "combine": { - "p50": 97.24800288677216, - "p90": 107.10400342941284, - "p95": 112.28799819946289, - "p99": 124.38400089740753 - }, - "roundtrip": { - "p50": 212.16000616550446, - "p90": 234.3360036611557, - "p95": 243.93600225448608, - "p99": 303.5520017147064 - }, - "isolatedSum": { - "p50": 185.63200533390045, - "p90": 207.42399990558624, - "p95": 217.8559973835945, - "p99": 240.1600033044815 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-180681db", - "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", - "colorKey": "h200_87683f6c", - "comparisonKey": "3006922c66758d92", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:15.049258+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d6c49ae98878760", - "workloadId": "set:8:9a27d0df4b17fa09", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271721386", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271721386", - "createdAt": "2026-06-26T23:53:15.049258+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 212.44800090789795, - "p90": 272.8320062160492, - "p95": 292.32001304626465, - "p99": 382.752001285553 - }, - "combine": { - "p50": 58.75200033187866, - "p90": 73.40800017118454, - "p95": 78.5600021481514, - "p99": 96.12800180912018 - }, - "roundtrip": { - "p50": 247.26399779319763, - "p90": 306.36799335479736, - "p95": 325.1200020313263, - "p99": 389.8560106754303 - }, - "isolatedSum": { - "p50": 271.2000012397766, - "p90": 346.24000638723373, - "p95": 370.88001519441605, - "p99": 478.88000309467316 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 301056, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 212.09600567817688, - "p90": 273.69600534439087, - "p95": 297.791987657547, - "p99": 586.5920186042786 - }, - "combine": { - "p50": 58.17599967122078, - "p90": 74.81600344181061, - "p95": 79.71200346946716, - "p99": 97.120001912117 - }, - "roundtrip": { - "p50": 265.3760015964508, - "p90": 339.6799862384796, - "p95": 375.5840063095093, - "p99": 458.8159918785095 - }, - "isolatedSum": { - "p50": 270.27200534939766, - "p90": 348.5120087862015, - "p95": 377.50399112701416, - "p99": 683.7120205163956 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 609280, - "combineLogicalBytes": 1218560, - "fanoutMean": 5.3125, - "recvTokensMax": 14, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 197.6960003376007, - "p90": 252.8960108757019, - "p95": 267.64801144599915, - "p99": 318.59201192855835 - }, - "combine": { - "p50": 57.920001447200775, - "p90": 70.49600034952164, - "p95": 76.4160007238388, - "p99": 87.36000210046768 - }, - "roundtrip": { - "p50": 246.91200256347656, - "p90": 306.2080144882202, - "p95": 339.1680121421814, - "p99": 585.1519703865051 - }, - "isolatedSum": { - "p50": 255.61600178480148, - "p90": 323.39201122522354, - "p95": 344.06401216983795, - "p99": 405.95201402902603 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1204224, - "combineLogicalBytes": 2408448, - "fanoutMean": 5.25, - "recvTokensMax": 26, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 211.93599700927734, - "p90": 265.1520073413849, - "p95": 276.6079902648926, - "p99": 336.5760147571564 - }, - "combine": { - "p50": 59.647999703884125, - "p90": 77.02399790287018, - "p95": 82.94399827718735, - "p99": 96.54399752616882 - }, - "roundtrip": { - "p50": 259.5840096473694, - "p90": 317.6639974117279, - "p95": 331.9680094718933, - "p99": 400.06399154663086 - }, - "isolatedSum": { - "p50": 271.58399671316147, - "p90": 342.17600524425507, - "p95": 359.5519885420799, - "p99": 433.1200122833252 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2415616, - "combineLogicalBytes": 4831232, - "fanoutMean": 5.265625, - "recvTokensMax": 48, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 214.01600539684296, - "p90": 275.90399980545044, - "p95": 303.9039969444275, - "p99": 374.30399656295776 - }, - "combine": { - "p50": 61.76000088453293, - "p90": 80.4160013794899, - "p95": 84.79999750852585, - "p99": 99.16800260543823 - }, - "roundtrip": { - "p50": 258.59200954437256, - "p90": 322.9120075702667, - "p95": 347.104012966156, - "p99": 422.39999771118164 - }, - "isolatedSum": { - "p50": 275.7760062813759, - "p90": 356.32000118494034, - "p95": 388.70399445295334, - "p99": 473.471999168396 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4924416, - "combineLogicalBytes": 9848832, - "fanoutMean": 5.3671875, - "recvTokensMax": 91, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 209.9200040102005, - "p90": 263.7439966201782, - "p95": 275.2639949321747, - "p99": 311.13600730895996 - }, - "combine": { - "p50": 67.58400052785873, - "p90": 84.09599959850311, - "p95": 87.42400258779526, - "p99": 103.90400141477585 - }, - "roundtrip": { - "p50": 263.5520100593567, - "p90": 318.30400228500366, - "p95": 334.5920145511627, - "p99": 403.80799770355225 - }, - "isolatedSum": { - "p50": 277.50400453805923, - "p90": 347.83999621868134, - "p95": 362.68799751996994, - "p99": 415.0400087237358 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 19496960, - "fanoutMean": 5.3125, - "recvTokensMax": 178, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 205.82400262355804, - "p90": 253.02401185035706, - "p95": 266.36800169944763, - "p99": 311.5200102329254 - }, - "combine": { - "p50": 78.40000092983246, - "p90": 92.76799857616425, - "p95": 98.04800152778625, - "p99": 111.07199639081955 - }, - "roundtrip": { - "p50": 272.7360129356384, - "p90": 325.50400495529175, - "p95": 342.6879942417145, - "p99": 378.6559998989105 - }, - "isolatedSum": { - "p50": 284.2240035533905, - "p90": 345.7920104265213, - "p95": 364.4160032272339, - "p99": 422.59200662374496 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19418112, - "combineLogicalBytes": 38836224, - "fanoutMean": 5.291015625, - "recvTokensMax": 372, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 223.23200106620789, - "p90": 271.61601185798645, - "p95": 281.98400139808655, - "p99": 319.96798515319824 - }, - "combine": { - "p50": 96.25600278377533, - "p90": 112.44799941778183, - "p95": 115.61600118875504, - "p99": 127.36000120639801 - }, - "roundtrip": { - "p50": 324.864000082016, - "p90": 388.63998651504517, - "p95": 415.3279960155487, - "p99": 494.3999946117401 - }, - "isolatedSum": { - "p50": 319.4880038499832, - "p90": 384.0640112757683, - "p95": 397.6000025868416, - "p99": 447.32798635959625 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b1b077c8", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_3a17d46b", - "comparisonKey": "f29f35383c05d38b", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:04.228393+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254401482", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", - "createdAt": "2026-06-26T17:30:04.228393+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 70.23999840021133, - "p90": 92.38400310277939, - "p95": 101.88800096511841, - "p99": 121.15199863910675 - }, - "combine": { - "p50": 58.88000130653381, - "p90": 70.3359991312027, - "p95": 78.65600287914276, - "p99": 101.43999755382538 - }, - "roundtrip": { - "p50": 159.32799875736237, - "p90": 200.3840059041977, - "p95": 213.69600296020508, - "p99": 243.58400702476501 - }, - "isolatedSum": { - "p50": 129.11999970674515, - "p90": 162.7200022339821, - "p95": 180.54400384426117, - "p99": 222.59199619293213 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 67.55200028419495, - "p90": 91.45600348711014, - "p95": 102.33599692583084, - "p99": 144.57599818706512 - }, - "combine": { - "p50": 59.42400172352791, - "p90": 71.6480016708374, - "p95": 81.24800026416779, - "p99": 105.43999820947647 - }, - "roundtrip": { - "p50": 156.12800419330597, - "p90": 199.13600385189056, - "p95": 215.32799303531647, - "p99": 382.4000060558319 - }, - "isolatedSum": { - "p50": 126.97600200772285, - "p90": 163.10400515794754, - "p95": 183.58399718999863, - "p99": 250.0159963965416 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 69.72800195217133, - "p90": 88.54400366544724, - "p95": 98.24000298976898, - "p99": 228.60799729824066 - }, - "combine": { - "p50": 60.92799827456474, - "p90": 72.92799651622772, - "p95": 77.7600035071373, - "p99": 90.91199934482574 - }, - "roundtrip": { - "p50": 160.67199409008026, - "p90": 186.20799481868744, - "p95": 196.44799828529358, - "p99": 242.14400351047516 - }, - "isolatedSum": { - "p50": 130.65600022673607, - "p90": 161.47200018167496, - "p95": 176.00000649690628, - "p99": 319.5199966430664 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 70.49600034952164, - "p90": 97.47199714183807, - "p95": 107.84000158309937, - "p99": 151.90400183200836 - }, - "combine": { - "p50": 61.47199869155884, - "p90": 76.89599692821503, - "p95": 85.28000116348267, - "p99": 107.64800012111664 - }, - "roundtrip": { - "p50": 155.8080017566681, - "p90": 187.45599687099457, - "p95": 205.24799823760986, - "p99": 242.88000166416168 - }, - "isolatedSum": { - "p50": 131.96799904108047, - "p90": 174.3679940700531, - "p95": 193.12000274658203, - "p99": 259.552001953125 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 68.4799998998642, - "p90": 86.94399893283844, - "p95": 95.58399766683578, - "p99": 126.08000636100769 - }, - "combine": { - "p50": 63.391998410224915, - "p90": 77.34400033950806, - "p95": 86.62399649620056, - "p99": 119.55200135707855 - }, - "roundtrip": { - "p50": 164.2879992723465, - "p90": 188.09600174427032, - "p95": 203.64800095558167, - "p99": 272.7999985218048 - }, - "isolatedSum": { - "p50": 131.8719983100891, - "p90": 164.2879992723465, - "p95": 182.20799416303635, - "p99": 245.63200771808624 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 68.25599819421768, - "p90": 91.13600105047226, - "p95": 98.91200065612793, - "p99": 114.78400230407715 - }, - "combine": { - "p50": 66.27199798822403, - "p90": 78.84799689054489, - "p95": 85.40800213813782, - "p99": 92.73599833250046 - }, - "roundtrip": { - "p50": 165.0879979133606, - "p90": 203.45599949359894, - "p95": 221.15199267864227, - "p99": 462.911993265152 - }, - "isolatedSum": { - "p50": 134.5279961824417, - "p90": 169.98399794101715, - "p95": 184.32000279426575, - "p99": 207.5200006365776 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 74.46400076150894, - "p90": 89.21600133180618, - "p95": 99.32799637317657, - "p99": 120.57600170373917 - }, - "combine": { - "p50": 80.44800162315369, - "p90": 89.75999802350998, - "p95": 94.65599805116653, - "p99": 122.30399996042252 - }, - "roundtrip": { - "p50": 183.45600366592407, - "p90": 210.78400313854218, - "p95": 228.5439968109131, - "p99": 287.4239981174469 - }, - "isolatedSum": { - "p50": 154.91200238466263, - "p90": 178.97599935531616, - "p95": 193.9839944243431, - "p99": 242.88000166416168 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 88.67199718952179, - "p90": 102.7199998497963, - "p95": 111.93600296974182, - "p99": 128.9920061826706 - }, - "combine": { - "p50": 96.83199971914291, - "p90": 108.86400192975998, - "p95": 114.43199962377548, - "p99": 124.1919994354248 - }, - "roundtrip": { - "p50": 208.99200439453125, - "p90": 229.34399545192719, - "p95": 239.9040013551712, - "p99": 260.22401452064514 - }, - "isolatedSum": { - "p50": 185.5039969086647, - "p90": 211.58400177955627, - "p95": 226.3680025935173, - "p99": 253.1840056180954 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a2649fd4", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_50a9ee63", - "comparisonKey": "aae31d5755e4ce66", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:20.768220+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254418007", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", - "createdAt": "2026-06-26T17:30:20.768220+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 56.09599873423576, - "p90": 86.7839977145195, - "p95": 94.97600048780441, - "p99": 109.98400300741196 - }, - "combine": { - "p50": 60.864001512527466, - "p90": 79.64800298213959, - "p95": 85.7279971241951, - "p99": 109.24799740314484 - }, - "roundtrip": { - "p50": 148.60799908638, - "p90": 199.42399859428406, - "p95": 207.45599269866943, - "p99": 260.5440020561218 - }, - "isolatedSum": { - "p50": 116.96000024676323, - "p90": 166.4320006966591, - "p95": 180.7039976119995, - "p99": 219.2320004105568 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 52.06400156021118, - "p90": 83.42400193214417, - "p95": 88.99199962615967, - "p99": 123.80799651145935 - }, - "combine": { - "p50": 59.808000922203064, - "p90": 77.91999727487564, - "p95": 84.48000252246857, - "p99": 130.78400492668152 - }, - "roundtrip": { - "p50": 145.82400023937225, - "p90": 194.91200149059296, - "p95": 215.10399878025055, - "p99": 273.79199862480164 - }, - "isolatedSum": { - "p50": 111.87200248241425, - "p90": 161.3439992070198, - "p95": 173.47200214862823, - "p99": 254.59200143814087 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 56.60799890756607, - "p90": 89.08800035715103, - "p95": 98.91200065612793, - "p99": 111.7440015077591 - }, - "combine": { - "p50": 60.7680007815361, - "p90": 78.52800190448761, - "p95": 84.22400057315826, - "p99": 97.95200079679489 - }, - "roundtrip": { - "p50": 143.74400675296783, - "p90": 192.7040070295334, - "p95": 212.0320051908493, - "p99": 294.46399211883545 - }, - "isolatedSum": { - "p50": 117.37599968910217, - "p90": 167.61600226163864, - "p95": 183.1360012292862, - "p99": 209.69600230455399 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 52.25599929690361, - "p90": 80.09599894285202, - "p95": 88.35200220346451, - "p99": 109.37599837779999 - }, - "combine": { - "p50": 60.736000537872314, - "p90": 79.48800176382065, - "p95": 85.60000360012054, - "p99": 108.64000022411346 - }, - "roundtrip": { - "p50": 141.12000167369843, - "p90": 183.87199938297272, - "p95": 195.23200392723083, - "p99": 286.24001145362854 - }, - "isolatedSum": { - "p50": 112.99199983477592, - "p90": 159.58400070667267, - "p95": 173.95200580358505, - "p99": 218.01599860191345 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 53.47200110554695, - "p90": 77.60000228881836, - "p95": 85.05599945783615, - "p99": 93.9520001411438 - }, - "combine": { - "p50": 62.49599903821945, - "p90": 77.34400033950806, - "p95": 82.11199939250946, - "p99": 95.77599912881851 - }, - "roundtrip": { - "p50": 142.17600226402283, - "p90": 183.77600610256195, - "p95": 197.79199361801147, - "p99": 241.5360063314438 - }, - "isolatedSum": { - "p50": 115.9680001437664, - "p90": 154.94400262832642, - "p95": 167.1679988503456, - "p99": 189.7279992699623 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 55.39200082421303, - "p90": 81.05599880218506, - "p95": 89.15200084447861, - "p99": 109.6000000834465 - }, - "combine": { - "p50": 66.39999896287918, - "p90": 84.927998483181, - "p95": 88.3840024471283, - "p99": 101.3759970664978 - }, - "roundtrip": { - "p50": 148.15999567508698, - "p90": 191.23199582099915, - "p95": 200.57600736618042, - "p99": 228.4799963235855 - }, - "isolatedSum": { - "p50": 121.79199978709221, - "p90": 165.98399728536606, - "p95": 177.5360032916069, - "p99": 210.9759971499443 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 65.08799642324448, - "p90": 90.97599983215332, - "p95": 100.63999891281128, - "p99": 148.28799664974213 - }, - "combine": { - "p50": 81.05599880218506, - "p90": 96.54399752616882, - "p95": 99.23200309276581, - "p99": 106.52799904346466 - }, - "roundtrip": { - "p50": 171.424001455307, - "p90": 216.8000042438507, - "p95": 232.1919947862625, - "p99": 288.38399052619934 - }, - "isolatedSum": { - "p50": 146.14399522542953, - "p90": 187.51999735832214, - "p95": 199.8720020055771, - "p99": 254.8159956932068 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 75.00799745321274, - "p90": 94.14400160312653, - "p95": 99.04000163078308, - "p99": 115.23199826478958 - }, - "combine": { - "p50": 97.34400361776352, - "p90": 115.84000289440155, - "p95": 119.03999745845795, - "p99": 133.56800377368927 - }, - "roundtrip": { - "p50": 197.79199361801147, - "p90": 227.80799865722656, - "p95": 237.8239929676056, - "p99": 276.8320143222809 - }, - "isolatedSum": { - "p50": 172.35200107097626, - "p90": 209.98400449752808, - "p95": 218.07999908924103, - "p99": 248.80000203847885 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-fdd09e42", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_4f483b60", - "comparisonKey": "95dcff383339100e", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:13.723754+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271629782", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271629782", - "createdAt": "2026-06-26T23:50:13.723754+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 51.04000121355057, - "p90": 76.64000242948532, - "p95": 84.48000252246857, - "p99": 115.32799899578094 - }, - "combine": { - "p50": 59.20000001788139, - "p90": 77.47200131416321, - "p95": 87.13600039482117, - "p99": 133.85599851608276 - }, - "roundtrip": { - "p50": 140.73599874973297, - "p90": 177.18400061130524, - "p95": 189.60000574588776, - "p99": 239.3919974565506 - }, - "isolatedSum": { - "p50": 110.24000123143196, - "p90": 154.11200374364853, - "p95": 171.61600291728973, - "p99": 249.1839975118637 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 54.71999943256378, - "p90": 82.40000158548355, - "p95": 88.16000074148178, - "p99": 115.10399729013443 - }, - "combine": { - "p50": 60.19200012087822, - "p90": 74.78400319814682, - "p95": 81.44000172615051, - "p99": 106.84800148010254 - }, - "roundtrip": { - "p50": 147.13600277900696, - "p90": 190.75199961662292, - "p95": 217.79200434684753, - "p99": 253.79198789596558 - }, - "isolatedSum": { - "p50": 114.911999553442, - "p90": 157.18400478363037, - "p95": 169.6000024676323, - "p99": 221.95199877023697 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 54.048001766204834, - "p90": 77.53600180149078, - "p95": 84.99199897050858, - "p99": 106.4319983124733 - }, - "combine": { - "p50": 60.70400029420853, - "p90": 75.83999633789062, - "p95": 82.36800134181976, - "p99": 106.84800148010254 - }, - "roundtrip": { - "p50": 144.31999623775482, - "p90": 184.4799965620041, - "p95": 193.9840018749237, - "p99": 240.83200097084045 - }, - "isolatedSum": { - "p50": 114.75200206041336, - "p90": 153.3759981393814, - "p95": 167.36000031232834, - "p99": 213.27999979257584 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 54.687999188899994, - "p90": 88.25600147247314, - "p95": 94.46399658918381, - "p99": 120.19199877977371 - }, - "combine": { - "p50": 61.824001371860504, - "p90": 77.02399790287018, - "p95": 83.26400071382523, - "p99": 101.88800096511841 - }, - "roundtrip": { - "p50": 140.35199582576752, - "p90": 180.09600043296814, - "p95": 193.53599846363068, - "p99": 230.5919975042343 - }, - "isolatedSum": { - "p50": 116.5120005607605, - "p90": 165.27999937534332, - "p95": 177.72799730300903, - "p99": 222.07999974489212 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 54.17599901556969, - "p90": 81.11999928951263, - "p95": 88.8959988951683, - "p99": 129.4720023870468 - }, - "combine": { - "p50": 62.3680017888546, - "p90": 78.36800068616867, - "p95": 82.56000280380249, - "p99": 101.21600329875946 - }, - "roundtrip": { - "p50": 140.47999680042267, - "p90": 177.66399681568146, - "p95": 196.99199497699738, - "p99": 237.7600073814392 - }, - "isolatedSum": { - "p50": 116.54400080442429, - "p90": 159.4879999756813, - "p95": 171.4560016989708, - "p99": 230.68800568580627 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 57.24800005555153, - "p90": 79.64800298213959, - "p95": 85.91999858617783, - "p99": 104.67199981212616 - }, - "combine": { - "p50": 68.41599941253662, - "p90": 82.33600109815598, - "p95": 85.7279971241951, - "p99": 99.10400211811066 - }, - "roundtrip": { - "p50": 145.1520025730133, - "p90": 178.1120002269745, - "p95": 187.6479983329773, - "p99": 228.7359982728958 - }, - "isolatedSum": { - "p50": 125.66399946808815, - "p90": 161.98400408029556, - "p95": 171.64799571037292, - "p99": 203.77600193023682 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 66.30399823188782, - "p90": 83.90399813652039, - "p95": 90.17600119113922, - "p99": 149.1840034723282 - }, - "combine": { - "p50": 78.72000336647034, - "p90": 93.79199892282486, - "p95": 98.88000041246414, - "p99": 114.01599645614624 - }, - "roundtrip": { - "p50": 164.8319959640503, - "p90": 199.48799908161163, - "p95": 211.2639993429184, - "p99": 271.93599939346313 - }, - "isolatedSum": { - "p50": 145.02400159835815, - "p90": 177.69599705934525, - "p95": 189.05600160360336, - "p99": 263.1999999284744 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 73.82400333881378, - "p90": 90.62399715185165, - "p95": 95.39200365543365, - "p99": 114.52800035476685 - }, - "combine": { - "p50": 97.24800288677216, - "p90": 112.31999844312668, - "p95": 115.77600240707397, - "p99": 130.49599528312683 - }, - "roundtrip": { - "p50": 199.77599382400513, - "p90": 228.32000255584717, - "p95": 247.29600548744202, - "p99": 297.88801074028015 - }, - "isolatedSum": { - "p50": 171.07200622558594, - "p90": 202.94399559497833, - "p95": 211.16800606250763, - "p99": 245.02399563789368 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-39796825", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_ff232ea5", - "comparisonKey": "643e1b15925a53af", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:34.222899+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271653486", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", - "createdAt": "2026-06-26T23:51:34.222899+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 29.08799983561039, - "p90": 36.41600161790848, - "p95": 44.28799822926521, - "p99": 63.551999628543854 - }, - "combine": { - "p50": 40.95999896526337, - "p90": 64.70400094985962, - "p95": 74.8480036854744, - "p99": 125.69600343704224 - }, - "roundtrip": { - "p50": 1856.8320274353027, - "p90": 1879.7760009765625, - "p95": 1894.495964050293, - "p99": 2116.607904434204 - }, - "isolatedSum": { - "p50": 70.04799880087376, - "p90": 101.1200025677681, - "p95": 119.13600191473961, - "p99": 189.2480030655861 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 28.76799926161766, - "p90": 36.22400015592575, - "p95": 42.11200028657913, - "p99": 48.767998814582825 - }, - "combine": { - "p50": 36.06399893760681, - "p90": 45.75999826192856, - "p95": 52.2879995405674, - "p99": 84.1279998421669 - }, - "roundtrip": { - "p50": 1847.4880456924438, - "p90": 1861.0880374908447, - "p95": 1871.3279962539673, - "p99": 2004.607915878296 - }, - "isolatedSum": { - "p50": 64.83199819922447, - "p90": 81.98399841785431, - "p95": 94.39999982714653, - "p99": 132.89599865674973 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 28.575999662280083, - "p90": 38.816001266241074, - "p95": 45.632001012563705, - "p99": 57.95200169086456 - }, - "combine": { - "p50": 41.69600084424019, - "p90": 59.93599817156792, - "p95": 68.06399673223495, - "p99": 170.30400037765503 - }, - "roundtrip": { - "p50": 1848.3840227127075, - "p90": 1869.920015335083, - "p95": 1881.9199800491333, - "p99": 1995.0400590896606 - }, - "isolatedSum": { - "p50": 70.27200050652027, - "p90": 98.75199943780899, - "p95": 113.69599774479866, - "p99": 228.2560020685196 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 29.37600016593933, - "p90": 37.21600025892258, - "p95": 50.65599828958511, - "p99": 62.65600025653839 - }, - "combine": { - "p50": 47.520000487565994, - "p90": 61.664000153541565, - "p95": 68.57600063085556, - "p99": 103.2319962978363 - }, - "roundtrip": { - "p50": 1859.2000007629395, - "p90": 1878.6879777908325, - "p95": 1886.1440420150757, - "p99": 1924.1600036621094 - }, - "isolatedSum": { - "p50": 76.89600065350533, - "p90": 98.88000041246414, - "p95": 119.23199892044067, - "p99": 165.8879965543747 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 31.039999797940254, - "p90": 43.83999854326248, - "p95": 53.63199859857559, - "p99": 66.01600348949432 - }, - "combine": { - "p50": 52.25599929690361, - "p90": 69.43999975919724, - "p95": 82.40000158548355, - "p99": 131.99999928474426 - }, - "roundtrip": { - "p50": 1864.0960454940796, - "p90": 1884.160041809082, - "p95": 1898.1759548187256, - "p99": 1969.1519737243652 - }, - "isolatedSum": { - "p50": 83.29599909484386, - "p90": 113.27999830245972, - "p95": 136.03200018405914, - "p99": 198.0160027742386 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 33.79200026392937, - "p90": 45.27999833226204, - "p95": 49.31199923157692, - "p99": 58.14399942755699 - }, - "combine": { - "p50": 47.839999198913574, - "p90": 64.25599753856659, - "p95": 70.36799937486649, - "p99": 101.53599828481674 - }, - "roundtrip": { - "p50": 1865.056037902832, - "p90": 1881.5360069274902, - "p95": 1888.8959884643555, - "p99": 1917.7600145339966 - }, - "isolatedSum": { - "p50": 81.63199946284294, - "p90": 109.53599587082863, - "p95": 119.6799986064434, - "p99": 159.67999771237373 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 40.44799879193306, - "p90": 49.6320016682148, - "p95": 52.799999713897705, - "p99": 64.96000289916992 - }, - "combine": { - "p50": 63.58399987220764, - "p90": 81.31200075149536, - "p95": 98.7199991941452, - "p99": 231.1680018901825 - }, - "roundtrip": { - "p50": 1885.632038116455, - "p90": 1903.3279418945312, - "p95": 1914.080023765564, - "p99": 2039.776086807251 - }, - "isolatedSum": { - "p50": 104.0319986641407, - "p90": 130.94400241971016, - "p95": 151.5199989080429, - "p99": 296.1280047893524 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 51.552001386880875, - "p90": 60.60799956321716, - "p95": 62.65600025653839, - "p99": 73.82400333881378 - }, - "combine": { - "p50": 86.81599795818329, - "p90": 96.19200229644775, - "p95": 108.47999900579453, - "p99": 146.7839926481247 - }, - "roundtrip": { - "p50": 1922.6560592651367, - "p90": 1938.4959936141968, - "p95": 1957.0879936218262, - "p99": 2130.3679943084717 - }, - "isolatedSum": { - "p50": 138.36799934506416, - "p90": 156.80000185966492, - "p95": 171.13599926233292, - "p99": 220.60799598693848 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-dbb437b5", - "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_7ec76e6d", - "comparisonKey": "9a87b27b98bf2d7a", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:51:35.330044+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 LL", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271656517", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", - "createdAt": "2026-06-26T23:51:35.330044+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 28.76799926161766, - "p90": 38.88000175356865, - "p95": 44.73600164055824, - "p99": 61.15199998021126 - }, - "combine": { - "p50": 36.768000572919846, - "p90": 48.287998884916306, - "p95": 57.53599852323532, - "p99": 90.81599861383438 - }, - "roundtrip": { - "p50": 1847.7439880371094, - "p90": 1855.6159734725952, - "p95": 1860.543966293335, - "p99": 1893.2160139083862 - }, - "isolatedSum": { - "p50": 65.5359998345375, - "p90": 87.16800063848495, - "p95": 102.27200016379356, - "p99": 151.96799859404564 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 28.991999104619026, - "p90": 33.376000821590424, - "p95": 37.02399879693985, - "p99": 41.05599969625473 - }, - "combine": { - "p50": 37.59999945759773, - "p90": 49.375999718904495, - "p95": 58.62399935722351, - "p99": 235.83999276161194 - }, - "roundtrip": { - "p50": 1847.6799726486206, - "p90": 1855.936050415039, - "p95": 1861.4720106124878, - "p99": 1959.007978439331 - }, - "isolatedSum": { - "p50": 66.59199856221676, - "p90": 82.75200054049492, - "p95": 95.64799815416336, - "p99": 276.89599245786667 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 28.16000021994114, - "p90": 52.41600051522255, - "p95": 58.59199911355972, - "p99": 83.23200047016144 - }, - "combine": { - "p50": 36.959998309612274, - "p90": 48.06400090456009, - "p95": 54.59199845790863, - "p99": 94.59199756383896 - }, - "roundtrip": { - "p50": 1848.3200073242188, - "p90": 1858.62398147583, - "p95": 1864.5440340042114, - "p99": 1925.9519577026367 - }, - "isolatedSum": { - "p50": 65.11999852955341, - "p90": 100.48000141978264, - "p95": 113.18399757146835, - "p99": 177.8239980340004 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 29.7279991209507, - "p90": 36.3520011305809, - "p95": 41.08799993991852, - "p99": 52.191998809576035 - }, - "combine": { - "p50": 37.88800165057182, - "p90": 50.52800104022026, - "p95": 61.24800071120262, - "p99": 175.7120043039322 - }, - "roundtrip": { - "p50": 1849.4080305099487, - "p90": 1862.7519607543945, - "p95": 1875.4240274429321, - "p99": 1930.5599927902222 - }, - "isolatedSum": { - "p50": 67.61600077152252, - "p90": 86.88000217080116, - "p95": 102.33600065112114, - "p99": 227.90400311350822 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 31.775999814271927, - "p90": 37.856001406908035, - "p95": 43.007999658584595, - "p99": 52.2879995405674 - }, - "combine": { - "p50": 41.280001401901245, - "p90": 52.319999784231186, - "p95": 64.41599875688553, - "p99": 140.28799533843994 - }, - "roundtrip": { - "p50": 1854.848027229309, - "p90": 1876.3200044631958, - "p95": 1915.3599739074707, - "p99": 1982.6879501342773 - }, - "isolatedSum": { - "p50": 73.05600121617317, - "p90": 90.17600119113922, - "p95": 107.42399841547012, - "p99": 192.57599487900734 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 33.344000577926636, - "p90": 36.159999668598175, - "p95": 38.30400109291077, - "p99": 46.14400118589401 - }, - "combine": { - "p50": 46.30399867892265, - "p90": 56.223999708890915, - "p95": 66.49599969387054, - "p99": 109.24799740314484 - }, - "roundtrip": { - "p50": 1862.8159761428833, - "p90": 1875.2959966659546, - "p95": 1890.6559944152832, - "p99": 1946.6559886932373 - }, - "isolatedSum": { - "p50": 79.64799925684929, - "p90": 92.38399937748909, - "p95": 104.80000078678131, - "p99": 155.39199858903885 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 39.68000039458275, - "p90": 51.58400163054466, - "p95": 57.72799998521805, - "p99": 97.63199836015701 - }, - "combine": { - "p50": 60.70400029420853, - "p90": 75.29599964618683, - "p95": 94.2080020904541, - "p99": 319.7759985923767 - }, - "roundtrip": { - "p50": 1882.3360204696655, - "p90": 1892.0639753341675, - "p95": 1907.5520038604736, - "p99": 1997.3440170288086 - }, - "isolatedSum": { - "p50": 100.38400068879128, - "p90": 126.88000127673149, - "p95": 151.93600207567215, - "p99": 417.4079969525337 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 51.552001386880875, - "p90": 55.07199838757515, - "p95": 59.007998555898666, - "p99": 66.11199676990509 - }, - "combine": { - "p50": 86.43200248479843, - "p90": 93.08800101280212, - "p95": 100.89600086212158, - "p99": 167.10400581359863 - }, - "roundtrip": { - "p50": 1921.3759899139404, - "p90": 1930.4640293121338, - "p95": 1935.968041419983, - "p99": 1968.6399698257446 - }, - "isolatedSum": { - "p50": 137.9840038716793, - "p90": 148.15999940037727, - "p95": 159.90399941802025, - "p99": 233.21600258350372 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-1caa7ff5", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "colorKey": "h200_df102230", - "comparisonKey": "2ce1d8f2e79d5005", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:08.227503+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "ll", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 LL (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "fixed-kernel", - "conformanceClass": "not-applicable", - "fixedKernel": true, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254435010", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", - "createdAt": "2026-06-26T17:31:08.227503+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 28.736000880599022, - "p90": 42.24000126123428, - "p95": 44.76799815893173, - "p99": 50.97600072622299 - }, - "combine": { - "p50": 37.087999284267426, - "p90": 44.256001710891724, - "p95": 49.6320016682148, - "p99": 65.60000032186508 - }, - "roundtrip": { - "p50": 1824.4800567626953, - "p90": 1831.7760229110718, - "p95": 1838.3680582046509, - "p99": 1884.1919898986816 - }, - "isolatedSum": { - "p50": 65.82400016486645, - "p90": 86.496002972126, - "p95": 94.39999982714653, - "p99": 116.57600104808807 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 14, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 28.00000086426735, - "p90": 33.31200033426285, - "p95": 37.66399994492531, - "p99": 50.36799982190132 - }, - "combine": { - "p50": 36.86400130391121, - "p90": 45.27999833226204, - "p95": 51.29599943757057, - "p99": 124.1919994354248 - }, - "roundtrip": { - "p50": 1824.9599933624268, - "p90": 1835.4239463806152, - "p95": 1843.8400030136108, - "p99": 1961.7279767990112 - }, - "isolatedSum": { - "p50": 64.86400216817856, - "p90": 78.59199866652489, - "p95": 88.95999938249588, - "p99": 174.55999925732613 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 21, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 28.48000079393387, - "p90": 33.59999880194664, - "p95": 36.41600161790848, - "p99": 42.33599826693535 - }, - "combine": { - "p50": 37.53599897027016, - "p90": 47.839999198913574, - "p95": 62.144000083208084, - "p99": 136.4479959011078 - }, - "roundtrip": { - "p50": 1825.8240222930908, - "p90": 1833.9519500732422, - "p95": 1842.0480489730835, - "p99": 1925.0880479812622 - }, - "isolatedSum": { - "p50": 66.01599976420403, - "p90": 81.43999800086021, - "p95": 98.56000170111656, - "p99": 178.78399416804314 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240064, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 39, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 28.28799933195114, - "p90": 32.00000151991844, - "p95": 34.55999866127968, - "p99": 39.744000881910324 - }, - "combine": { - "p50": 37.43999823927879, - "p90": 46.78399860858917, - "p95": 53.69599908590317, - "p99": 124.64000284671783 - }, - "roundtrip": { - "p50": 1826.3360261917114, - "p90": 1834.1439962387085, - "p95": 1840.1600122451782, - "p99": 1865.6320571899414 - }, - "isolatedSum": { - "p50": 65.72799757122993, - "p90": 78.78400012850761, - "p95": 88.25599774718285, - "p99": 164.38400372862816 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487296, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 74, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 31.136000528931618, - "p90": 34.94400158524513, - "p95": 37.856001406908035, - "p99": 46.39999940991402 - }, - "combine": { - "p50": 39.264000952243805, - "p90": 44.28799822926521, - "p95": 46.46399989724159, - "p99": 77.85599678754807 - }, - "roundtrip": { - "p50": 1830.4959535598755, - "p90": 1838.304042816162, - "p95": 1842.78404712677, - "p99": 1957.919955253601 - }, - "isolatedSum": { - "p50": 70.40000148117542, - "p90": 79.23199981451035, - "p95": 84.32000130414963, - "p99": 124.25599619746208 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4960256, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 145, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 32.896000891923904, - "p90": 35.96799820661545, - "p95": 39.135999977588654, - "p99": 45.56800052523613 - }, - "combine": { - "p50": 45.791998505592346, - "p90": 54.016001522541046, - "p95": 83.0719992518425, - "p99": 153.56799960136414 - }, - "roundtrip": { - "p50": 1840.1600122451782, - "p90": 1847.5840091705322, - "p95": 1853.9199829101562, - "p99": 1896.1600065231323 - }, - "isolatedSum": { - "p50": 78.68799939751625, - "p90": 89.9839997291565, - "p95": 122.20799922943115, - "p99": 199.13600012660027 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9863168, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 287, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 38.84800150990486, - "p90": 42.43199899792671, - "p95": 47.16800153255463, - "p99": 62.144000083208084 - }, - "combine": { - "p50": 59.67999994754791, - "p90": 66.14399701356888, - "p95": 83.16799998283386, - "p99": 121.21599912643433 - }, - "roundtrip": { - "p50": 1859.5199584960938, - "p90": 1866.495966911316, - "p95": 1875.264048576355, - "p99": 1916.1280393600464 - }, - "isolatedSum": { - "p50": 98.52800145745277, - "p90": 108.57599601149559, - "p95": 130.3360015153885, - "p99": 183.3599992096424 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19496960, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 564, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 51.80799961090088, - "p90": 55.135998874902725, - "p95": 59.776000678539276, - "p99": 68.83200258016586 - }, - "combine": { - "p50": 86.40000224113464, - "p90": 92.03200042247772, - "p95": 95.74399888515472, - "p99": 156.41599893569946 - }, - "roundtrip": { - "p50": 1899.392008781433, - "p90": 1905.2480459213257, - "p95": 1909.440040588379, - "p99": 1973.3760356903076 - }, - "isolatedSum": { - "p50": 138.20800185203552, - "p90": 147.16799929738045, - "p95": 155.519999563694, - "p99": 225.24800151586533 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 1104, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-858b05cb", - "identity": "h200|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_2b594dfd", - "comparisonKey": "a4b473bf0791db70", - "schemaVersion": 3, - "generatedAt": "2026-06-27T15:56:11.323618+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8-directcast", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8-directcast", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28294159741", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294159741", - "createdAt": "2026-06-27T15:56:11.323618+00:00", - "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 66.75200164318085, - "p90": 95.23200243711472, - "p95": 102.75200009346008, - "p99": 119.13599818944931 - }, - "combine": { - "p50": 59.007998555898666, - "p90": 76.1599987745285, - "p95": 82.0159986615181, - "p99": 103.00800204277039 - }, - "roundtrip": { - "p50": 152.54400670528412, - "p90": 193.12000274658203, - "p95": 204.8960030078888, - "p99": 230.68800568580627 - }, - "isolatedSum": { - "p50": 125.76000019907951, - "p90": 171.39200121164322, - "p95": 184.76799875497818, - "p99": 222.1440002322197 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 63.58399987220764, - "p90": 91.00800007581711, - "p95": 99.80800002813339, - "p99": 118.52800101041794 - }, - "combine": { - "p50": 58.94400179386139, - "p90": 70.592001080513, - "p95": 77.82399654388428, - "p99": 87.77599781751633 - }, - "roundtrip": { - "p50": 151.32799744606018, - "p90": 191.96799397468567, - "p95": 202.4639993906021, - "p99": 234.17599499225616 - }, - "isolatedSum": { - "p50": 122.52800166606903, - "p90": 161.6000011563301, - "p95": 177.63199657201767, - "p99": 206.30399882793427 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 66.01600348949432, - "p90": 93.9520001411438, - "p95": 105.59999942779541, - "p99": 121.8239963054657 - }, - "combine": { - "p50": 60.35200133919716, - "p90": 74.72000271081924, - "p95": 78.5600021481514, - "p99": 88.73599767684937 - }, - "roundtrip": { - "p50": 154.84799444675446, - "p90": 194.5600062608719, - "p95": 203.19999754428864, - "p99": 230.335995554924 - }, - "isolatedSum": { - "p50": 126.36800482869148, - "p90": 168.67200285196304, - "p95": 184.1600015759468, - "p99": 210.55999398231506 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 67.10399687290192, - "p90": 94.4959968328476, - "p95": 104.76800054311752, - "p99": 123.00799787044525 - }, - "combine": { - "p50": 61.08799949288368, - "p90": 78.04799824953079, - "p95": 82.17599987983704, - "p99": 98.75199943780899 - }, - "roundtrip": { - "p50": 155.93600273132324, - "p90": 198.2399970293045, - "p95": 208.03199708461761, - "p99": 242.8479939699173 - }, - "isolatedSum": { - "p50": 128.1919963657856, - "p90": 172.5439950823784, - "p95": 186.94400042295456, - "p99": 221.75999730825424 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 67.16799736022949, - "p90": 97.31200337409973, - "p95": 106.9440022110939, - "p99": 129.37599420547485 - }, - "combine": { - "p50": 61.5679994225502, - "p90": 77.44000107049942, - "p95": 81.66400343179703, - "p99": 91.64799749851227 - }, - "roundtrip": { - "p50": 154.4319987297058, - "p90": 195.3279972076416, - "p95": 206.68800175189972, - "p99": 227.7120053768158 - }, - "isolatedSum": { - "p50": 128.7359967827797, - "p90": 174.75200444459915, - "p95": 188.60800564289093, - "p99": 221.02399170398712 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 67.80800223350525, - "p90": 92.19200164079666, - "p95": 102.4319976568222, - "p99": 133.7279975414276 - }, - "combine": { - "p50": 67.74400174617767, - "p90": 82.84799754619598, - "p95": 87.61599659919739, - "p99": 97.120001912117 - }, - "roundtrip": { - "p50": 159.13599729537964, - "p90": 200.06400346755981, - "p95": 211.84000372886658, - "p99": 244.6720004081726 - }, - "isolatedSum": { - "p50": 135.55200397968292, - "p90": 175.03999918699265, - "p95": 190.0479942560196, - "p99": 230.84799945354462 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 77.60000228881836, - "p90": 101.59999877214432, - "p95": 109.11999642848969, - "p99": 125.91999769210815 - }, - "combine": { - "p50": 78.68800312280655, - "p90": 94.08000111579895, - "p95": 100.47999769449234, - "p99": 115.52000045776367 - }, - "roundtrip": { - "p50": 180.16000092029572, - "p90": 224.95999932289124, - "p95": 240.79999327659607, - "p99": 329.75998520851135 - }, - "isolatedSum": { - "p50": 156.2880054116249, - "p90": 195.67999988794327, - "p95": 209.59999412298203, - "p99": 241.43999814987183 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.7839977145195, - "p90": 109.79200154542923, - "p95": 122.78400361537933, - "p99": 158.11200439929962 - }, - "combine": { - "p50": 96.3520035147667, - "p90": 111.84000223875046, - "p95": 115.77600240707397, - "p99": 128.22400033473969 - }, - "roundtrip": { - "p50": 209.88799631595612, - "p90": 239.1359955072403, - "p95": 253.9840042591095, - "p99": 331.84000849723816 - }, - "isolatedSum": { - "p50": 183.1360012292862, - "p90": 221.6320037841797, - "p95": 238.5600060224533, - "p99": 286.3360047340393 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-339f09b5", - "identity": "h200|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_7351c157", - "comparisonKey": "156f1708b9a7b98d", - "schemaVersion": 3, - "generatedAt": "2026-06-27T15:56:14.997520+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8-pertoken", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8-pertoken", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28294163450", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294163450", - "createdAt": "2026-06-27T15:56:14.997520+00:00", - "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 66.81600213050842, - "p90": 88.67199718952179, - "p95": 102.9760017991066, - "p99": 120.60800194740295 - }, - "combine": { - "p50": 59.29600074887276, - "p90": 72.86400347948074, - "p95": 78.75200361013412, - "p99": 86.84799820184708 - }, - "roundtrip": { - "p50": 154.6880006790161, - "p90": 198.2720047235489, - "p95": 219.55199539661407, - "p99": 281.69599175453186 - }, - "isolatedSum": { - "p50": 126.11200287938118, - "p90": 161.53600066900253, - "p95": 181.72800540924072, - "p99": 207.45600014925003 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 70.43199986219406, - "p90": 104.73600029945374, - "p95": 119.32799965143204, - "p99": 193.7279999256134 - }, - "combine": { - "p50": 59.10399928689003, - "p90": 71.32799923419952, - "p95": 80.28800040483475, - "p99": 100.16000270843506 - }, - "roundtrip": { - "p50": 155.03999590873718, - "p90": 205.53599298000336, - "p95": 231.58399760723114, - "p99": 357.08799958229065 - }, - "isolatedSum": { - "p50": 129.5359991490841, - "p90": 176.06399953365326, - "p95": 199.61600005626678, - "p99": 293.88800263404846 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 68.92800331115723, - "p90": 96.38399630784988, - "p95": 109.66400057077408, - "p99": 130.97600638866425 - }, - "combine": { - "p50": 61.02399900555611, - "p90": 75.68000257015228, - "p95": 83.61600339412689, - "p99": 102.78400033712387 - }, - "roundtrip": { - "p50": 158.04800391197205, - "p90": 202.94399559497833, - "p95": 213.53599429130554, - "p99": 251.19999051094055 - }, - "isolatedSum": { - "p50": 129.95200231671333, - "p90": 172.06399887800217, - "p95": 193.28000396490097, - "p99": 233.76000672578812 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 66.01600348949432, - "p90": 96.25600278377533, - "p95": 106.72000050544739, - "p99": 128.86400520801544 - }, - "combine": { - "p50": 60.19200012087822, - "p90": 72.92799651622772, - "p95": 79.03999835252762, - "p99": 88.19200098514557 - }, - "roundtrip": { - "p50": 153.85599434375763, - "p90": 197.56799936294556, - "p95": 215.64799547195435, - "p99": 285.2480113506317 - }, - "isolatedSum": { - "p50": 126.20800361037254, - "p90": 169.18399930000305, - "p95": 185.759998857975, - "p99": 217.056006193161 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 66.23999774456024, - "p90": 92.99200028181076, - "p95": 102.81600058078766, - "p99": 128.9599984884262 - }, - "combine": { - "p50": 63.1679967045784, - "p90": 78.36800068616867, - "p95": 84.35200154781342, - "p99": 111.00800335407257 - }, - "roundtrip": { - "p50": 161.79199516773224, - "p90": 204.48000729084015, - "p95": 219.26400065422058, - "p99": 282.4319899082184 - }, - "isolatedSum": { - "p50": 129.40799444913864, - "p90": 171.36000096797943, - "p95": 187.16800212860107, - "p99": 239.96800184249878 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 71.68000191450119, - "p90": 102.75200009346008, - "p95": 115.68000167608261, - "p99": 132.89600610733032 - }, - "combine": { - "p50": 68.7360018491745, - "p90": 83.42400193214417, - "p95": 88.25600147247314, - "p99": 106.72000050544739 - }, - "roundtrip": { - "p50": 166.04800522327423, - "p90": 211.64800226688385, - "p95": 225.79200565814972, - "p99": 305.7920038700104 - }, - "isolatedSum": { - "p50": 140.4160037636757, - "p90": 186.17600202560425, - "p95": 203.93600314855576, - "p99": 239.6160066127777 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 77.95199751853943, - "p90": 101.69599950313568, - "p95": 111.32799834012985, - "p99": 139.0720009803772 - }, - "combine": { - "p50": 79.26400005817413, - "p90": 92.57599711418152, - "p95": 98.91200065612793, - "p99": 126.36800110340118 - }, - "roundtrip": { - "p50": 175.48799514770508, - "p90": 220.32000124454498, - "p95": 231.64799809455872, - "p99": 279.4559895992279 - }, - "isolatedSum": { - "p50": 157.21599757671356, - "p90": 194.2719966173172, - "p95": 210.23999899625778, - "p99": 265.4400020837784 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 87.07199990749359, - "p90": 109.56799983978271, - "p95": 121.21599912643433, - "p99": 166.20799899101257 - }, - "combine": { - "p50": 96.6079980134964, - "p90": 113.66400122642517, - "p95": 119.64800208806992, - "p99": 157.1200042963028 - }, - "roundtrip": { - "p50": 212.44800090789795, - "p90": 258.36798548698425, - "p95": 284.41599011421204, - "p99": 348.9600121974945 - }, - "isolatedSum": { - "p50": 183.67999792099, - "p90": 223.23200106620789, - "p95": 240.86400121450424, - "p99": 323.32800328731537 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-16f8b2e1", - "identity": "h200|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h200_d982b749", - "comparisonKey": "465ef3841664f1ea", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:26.678836+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287506806", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287506806", - "createdAt": "2026-06-27T11:14:26.678836+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 89.79199826717377, - "p90": 110.84800213575363, - "p95": 119.00799721479416, - "p99": 141.92000031471252 - }, - "combine": { - "p50": 83.3280012011528, - "p90": 95.96800059080124, - "p95": 100.38399696350098, - "p99": 112.35199868679047 - }, - "roundtrip": { - "p50": 150.81599354743958, - "p90": 175.64800381660461, - "p95": 183.96799266338348, - "p99": 206.59199357032776 - }, - "isolatedSum": { - "p50": 173.11999946832657, - "p90": 206.81600272655487, - "p95": 219.39199417829514, - "p99": 254.271999001503 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 110.23999750614166, - "p90": 128.48000228405, - "p95": 134.88000631332397, - "p99": 166.143998503685 - }, - "combine": { - "p50": 104.86400127410889, - "p90": 116.95999652147293, - "p95": 122.52800166606903, - "p99": 139.3280029296875 - }, - "roundtrip": { - "p50": 193.95199418067932, - "p90": 219.32800114154816, - "p95": 232.16000199317932, - "p99": 261.79200410842896 - }, - "isolatedSum": { - "p50": 215.10399878025055, - "p90": 245.43999880552292, - "p95": 257.408007979393, - "p99": 305.4720014333725 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 146.2080031633377, - "p90": 167.64800250530243, - "p95": 177.37600207328796, - "p99": 212.38400042057037 - }, - "combine": { - "p50": 152.63999998569489, - "p90": 164.48000073432922, - "p95": 170.68800330162048, - "p99": 188.960000872612 - }, - "roundtrip": { - "p50": 272.99201488494873, - "p90": 291.1359965801239, - "p95": 302.0159900188446, - "p99": 328.575998544693 - }, - "isolatedSum": { - "p50": 298.8480031490326, - "p90": 332.12800323963165, - "p95": 348.06400537490845, - "p99": 401.3440012931824 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 215.488001704216, - "p90": 237.34399676322937, - "p95": 246.94399535655975, - "p99": 288.03199529647827 - }, - "combine": { - "p50": 248.35200607776642, - "p90": 259.71201062202454, - "p95": 266.4639949798584, - "p99": 279.00800108909607 - }, - "roundtrip": { - "p50": 438.4959936141968, - "p90": 459.80799198150635, - "p95": 470.71999311447144, - "p99": 498.4000027179718 - }, - "isolatedSum": { - "p50": 463.8400077819824, - "p90": 497.0560073852539, - "p95": 513.4079903364182, - "p99": 567.0399963855743 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 349.5680093765259, - "p90": 368.4160113334656, - "p95": 377.82400846481323, - "p99": 437.824010848999 - }, - "combine": { - "p50": 416.703999042511, - "p90": 430.9439957141876, - "p95": 437.18400597572327, - "p99": 455.1680088043213 - }, - "roundtrip": { - "p50": 740.2560114860535, - "p90": 760.7359886169434, - "p95": 771.3599801063538, - "p99": 818.4639811515808 - }, - "isolatedSum": { - "p50": 766.2720084190369, - "p90": 799.3600070476532, - "p95": 815.0080144405365, - "p99": 892.9920196533203 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 630.8159828186035, - "p90": 655.0719738006592, - "p95": 665.440022945404, - "p99": 703.3920288085938 - }, - "combine": { - "p50": 754.7199726104736, - "p90": 771.1359858512878, - "p95": 779.6480059623718, - "p99": 856.9279909133911 - }, - "roundtrip": { - "p50": 1357.0560216903687, - "p90": 1393.8560485839844, - "p95": 1428.4160137176514, - "p99": 1616.320013999939 - }, - "isolatedSum": { - "p50": 1385.5359554290771, - "p90": 1426.207959651947, - "p95": 1445.0880289077759, - "p99": 1560.3200197219849 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-5888aff1", - "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "a14fc35e02b01662", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:49.842184+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271748233", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271748233", - "createdAt": "2026-06-26T23:53:49.842184+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 91.96799993515015, - "p90": 112.70400136709213, - "p95": 120.70400267839432, - "p99": 143.8400000333786 - }, - "combine": { - "p50": 83.29600095748901, - "p90": 93.40800344944, - "p95": 99.29600358009338, - "p99": 117.44000017642975 - }, - "roundtrip": { - "p50": 151.2639969587326, - "p90": 170.78399658203125, - "p95": 179.32799458503723, - "p99": 211.93599700927734 - }, - "isolatedSum": { - "p50": 175.26400089263916, - "p90": 206.11200481653214, - "p95": 220.0000062584877, - "p99": 261.28000020980835 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44564480, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 111.7120012640953, - "p90": 129.82399761676788, - "p95": 141.59999787807465, - "p99": 159.58400070667267 - }, - "combine": { - "p50": 104.35199737548828, - "p90": 119.93599683046341, - "p95": 123.83999675512314, - "p99": 136.22400164604187 - }, - "roundtrip": { - "p50": 195.42400538921356, - "p90": 218.4000015258789, - "p95": 231.51999711990356, - "p99": 307.16800689697266 - }, - "isolatedSum": { - "p50": 216.0639986395836, - "p90": 249.7599944472313, - "p95": 265.4399946331978, - "p99": 295.80800235271454 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89726976, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 147.23199605941772, - "p90": 165.12000560760498, - "p95": 172.992005944252, - "p99": 204.6079933643341 - }, - "combine": { - "p50": 153.53600680828094, - "p90": 168.2240068912506, - "p95": 175.90400576591492, - "p99": 192.09599494934082 - }, - "roundtrip": { - "p50": 270.8800137042999, - "p90": 295.1680123806, - "p95": 303.77599596977234, - "p99": 446.8800127506256 - }, - "isolatedSum": { - "p50": 300.76800286769867, - "p90": 333.3440124988556, - "p95": 348.89601171016693, - "p99": 396.7039883136749 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179503104, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 214.52799439430237, - "p90": 237.63200640678406, - "p95": 244.9920028448105, - "p99": 282.5919985771179 - }, - "combine": { - "p50": 249.08800423145294, - "p90": 261.0880136489868, - "p95": 267.8079903125763, - "p99": 287.7439856529236 - }, - "roundtrip": { - "p50": 438.27199935913086, - "p90": 458.24000239372253, - "p95": 469.88800168037415, - "p99": 508.1599950790405 - }, - "isolatedSum": { - "p50": 463.6159986257553, - "p90": 498.7200200557709, - "p95": 512.7999931573868, - "p99": 570.3359842300415 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 359022592, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 352.1279990673065, - "p90": 375.0720024108887, - "p95": 387.1999979019165, - "p99": 523.360013961792 - }, - "combine": { - "p50": 419.9039936065674, - "p90": 433.8560104370117, - "p95": 441.536009311676, - "p99": 501.6319751739502 - }, - "roundtrip": { - "p50": 744.5759773254395, - "p90": 766.4960026741028, - "p95": 777.3119807243347, - "p99": 837.7919793128967 - }, - "isolatedSum": { - "p50": 772.0319926738739, - "p90": 808.9280128479004, - "p95": 828.7360072135925, - "p99": 1024.9919891357422 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716111872, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 629.6319961547852, - "p90": 648.9279866218567, - "p95": 656.2560200691223, - "p99": 715.1039838790894 - }, - "combine": { - "p50": 754.368007183075, - "p90": 767.1359777450562, - "p95": 774.5919823646545, - "p99": 917.5040125846863 - }, - "roundtrip": { - "p50": 1354.0480136871338, - "p90": 1376.4159679412842, - "p95": 1387.8079652786255, - "p99": 1428.8320541381836 - }, - "isolatedSum": { - "p50": 1384.00000333786, - "p90": 1416.0639643669128, - "p95": 1430.8480024337769, - "p99": 1632.6079964637756 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1432395776, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-b183f57f", - "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "6953183723230449", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:18.715974+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271763623", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271763623", - "createdAt": "2026-06-26T23:54:18.715974+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 97.69599884748459, - "p90": 105.95200210809708, - "p95": 110.68800091743469, - "p99": 117.37599968910217 - }, - "combine": { - "p50": 90.33600240945816, - "p90": 95.64799815416336, - "p95": 98.65599870681763, - "p99": 108.03200304508209 - }, - "roundtrip": { - "p50": 164.32000696659088, - "p90": 174.01599884033203, - "p95": 181.0240000486374, - "p99": 201.56799256801605 - }, - "isolatedSum": { - "p50": 188.03200125694275, - "p90": 201.60000026226044, - "p95": 209.34399962425232, - "p99": 225.40800273418427 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55674880, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 124.79999661445618, - "p90": 143.96800100803375, - "p95": 151.96800231933594, - "p99": 176.57600343227386 - }, - "combine": { - "p50": 119.71200257539749, - "p90": 133.56800377368927, - "p95": 140.09599387645721, - "p99": 156.70399367809296 - }, - "roundtrip": { - "p50": 216.48000180721283, - "p90": 235.35999655723572, - "p95": 243.00800263881683, - "p99": 263.71198892593384 - }, - "isolatedSum": { - "p50": 244.51199918985367, - "p90": 277.536004781723, - "p95": 292.06399619579315, - "p99": 333.2799971103668 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 111104000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 161.6320013999939, - "p90": 176.54399573802948, - "p95": 185.47199666500092, - "p99": 204.96000349521637 - }, - "combine": { - "p50": 177.47199535369873, - "p90": 187.74400651454926, - "p95": 193.88799369335175, - "p99": 218.27200055122375 - }, - "roundtrip": { - "p50": 309.2159926891327, - "p90": 327.2320032119751, - "p95": 333.1199884414673, - "p99": 373.1519877910614 - }, - "isolatedSum": { - "p50": 339.1039967536926, - "p90": 364.28800225257874, - "p95": 379.35999035835266, - "p99": 423.2320040464401 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 223098880, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 242.97599494457245, - "p90": 263.264000415802, - "p95": 271.10400795936584, - "p99": 296.54398560523987 - }, - "combine": { - "p50": 279.6800136566162, - "p90": 291.55200719833374, - "p95": 296.7039942741394, - "p99": 321.82401418685913 - }, - "roundtrip": { - "p50": 498.30400943756104, - "p90": 516.0959959030151, - "p95": 529.4719934463501, - "p99": 696.6400146484375 - }, - "isolatedSum": { - "p50": 522.6560086011887, - "p90": 554.8160076141357, - "p95": 567.8080022335052, - "p99": 618.367999792099 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 446730240, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 402.52798795700073, - "p90": 414.46399688720703, - "p95": 420.28799653053284, - "p99": 450.72001218795776 - }, - "combine": { - "p50": 478.7839949131012, - "p90": 488.22399973869324, - "p95": 490.4960095882416, - "p99": 499.07198548316956 - }, - "roundtrip": { - "p50": 857.6639890670776, - "p90": 869.3439960479736, - "p95": 882.3680281639099, - "p99": 1592.25594997406 - }, - "isolatedSum": { - "p50": 881.3119828701019, - "p90": 902.6879966259003, - "p95": 910.7840061187744, - "p99": 949.7919976711273 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 893634560, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 750.4640221595764, - "p90": 770.9119915962219, - "p95": 780.6079983711243, - "p99": 812.3199939727783 - }, - "combine": { - "p50": 873.1840252876282, - "p90": 885.6319785118103, - "p95": 893.4080004692078, - "p99": 941.9839978218079 - }, - "roundtrip": { - "p50": 1586.143970489502, - "p90": 1606.112003326416, - "p95": 1623.5840320587158, - "p99": 1662.7839803695679 - }, - "isolatedSum": { - "p50": 1623.6480474472046, - "p90": 1656.5439701080322, - "p95": 1674.015998840332, - "p99": 1754.3039917945862 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1786265600, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-dfdf595d", - "identity": "h200|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_d982b749", - "comparisonKey": "089552474e5d15cf", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:13:50.694218+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287495061", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287495061", - "createdAt": "2026-06-27T11:13:50.694218+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 107.13600367307663, - "p90": 125.66399574279785, - "p95": 131.23199343681335, - "p99": 139.29599523544312 - }, - "combine": { - "p50": 95.51999717950821, - "p90": 110.81600189208984, - "p95": 115.39199948310852, - "p99": 158.07999670505524 - }, - "roundtrip": { - "p50": 180.83199858665466, - "p90": 198.04799556732178, - "p95": 205.59999346733093, - "p99": 217.1200066804886 - }, - "isolatedSum": { - "p50": 202.65600085258484, - "p90": 236.4799976348877, - "p95": 246.62399291992188, - "p99": 297.37599194049835 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 132.89600610733032, - "p90": 149.05600249767303, - "p95": 155.13600409030914, - "p99": 168.64000260829926 - }, - "combine": { - "p50": 128.03199887275696, - "p90": 142.91200041770935, - "p95": 147.71200716495514, - "p99": 169.27999258041382 - }, - "roundtrip": { - "p50": 236.89599335193634, - "p90": 251.23199820518494, - "p95": 261.6640031337738, - "p99": 302.68800258636475 - }, - "isolatedSum": { - "p50": 260.9280049800873, - "p90": 291.9680029153824, - "p95": 302.8480112552643, - "p99": 337.9199951887131 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 182.68799781799316, - "p90": 202.81599462032318, - "p95": 213.1199985742569, - "p99": 237.98400163650513 - }, - "combine": { - "p50": 200.57600736618042, - "p90": 216.09599888324738, - "p95": 226.623997092247, - "p99": 267.36000180244446 - }, - "roundtrip": { - "p50": 357.31199383735657, - "p90": 381.3439905643463, - "p95": 394.8479890823364, - "p99": 424.127995967865 - }, - "isolatedSum": { - "p50": 383.2640051841736, - "p90": 418.91199350357056, - "p95": 439.7439956665039, - "p99": 505.3440034389496 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 275.6800055503845, - "p90": 293.0560111999512, - "p95": 299.19999837875366, - "p99": 318.04800033569336 - }, - "combine": { - "p50": 319.8719918727875, - "p90": 332.41599798202515, - "p95": 340.2239978313446, - "p99": 369.4719970226288 - }, - "roundtrip": { - "p50": 570.2400207519531, - "p90": 585.919976234436, - "p95": 596.8000292778015, - "p99": 636.7040276527405 - }, - "isolatedSum": { - "p50": 595.551997423172, - "p90": 625.4720091819763, - "p95": 639.4239962100983, - "p99": 687.5199973583221 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 462.2719883918762, - "p90": 478.87998819351196, - "p95": 489.79198932647705, - "p99": 548.7679839134216 - }, - "combine": { - "p50": 548.5119819641113, - "p90": 561.5040063858032, - "p95": 568.3199763298035, - "p99": 726.7199754714966 - }, - "roundtrip": { - "p50": 983.0080270767212, - "p90": 996.6400265693665, - "p95": 1016.3520574569702, - "p99": 1202.5279998779297 - }, - "isolatedSum": { - "p50": 1010.7839703559875, - "p90": 1040.3839945793152, - "p95": 1058.1119656562805, - "p99": 1275.4879593849182 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 868.2559728622437, - "p90": 894.5599794387817, - "p95": 901.9839763641357, - "p99": 927.3279905319214 - }, - "combine": { - "p50": 1004.7680139541626, - "p90": 1020.8319425582886, - "p95": 1037.503957748413, - "p99": 1106.7520380020142 - }, - "roundtrip": { - "p50": 1834.112048149109, - "p90": 1855.2639484405518, - "p95": 1866.6880130767822, - "p99": 2027.26411819458 - }, - "isolatedSum": { - "p50": 1873.0239868164062, - "p90": 1915.3919219970703, - "p95": 1939.4879341125488, - "p99": 2034.0800285339355 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-96267e21", - "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "27afbf0ad63e86ca", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:55:01.688428+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271778692", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271778692", - "createdAt": "2026-06-26T23:55:01.688428+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 108.64000022411346, - "p90": 120.70400267839432, - "p95": 131.58400356769562, - "p99": 146.2399959564209 - }, - "combine": { - "p50": 95.71199864149094, - "p90": 103.67999970912933, - "p95": 112.73600161075592, - "p99": 121.50400131940842 - }, - "roundtrip": { - "p50": 181.0240000486374, - "p90": 199.2959976196289, - "p95": 207.16799795627594, - "p99": 244.9280023574829 - }, - "isolatedSum": { - "p50": 204.3519988656044, - "p90": 224.38400238752365, - "p95": 244.32000517845154, - "p99": 267.7439972758293 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66576384, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 135.29600203037262, - "p90": 148.00000190734863, - "p95": 157.72800147533417, - "p99": 182.20800161361694 - }, - "combine": { - "p50": 128.31999361515045, - "p90": 139.74399864673615, - "p95": 145.7280069589615, - "p99": 158.75199437141418 - }, - "roundtrip": { - "p50": 235.6480062007904, - "p90": 248.6400008201599, - "p95": 259.16799902915955, - "p99": 301.60000920295715 - }, - "isolatedSum": { - "p50": 263.61599564552307, - "p90": 287.7440005540848, - "p95": 303.45600843429565, - "p99": 340.9599959850311 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133619712, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 184.1920018196106, - "p90": 199.8080015182495, - "p95": 208.48000049591064, - "p99": 231.90400004386902 - }, - "combine": { - "p50": 198.62399995326996, - "p90": 212.0320051908493, - "p95": 221.18400037288666, - "p99": 289.7599935531616 - }, - "roundtrip": { - "p50": 349.4719862937927, - "p90": 366.3040101528168, - "p95": 376.8320083618164, - "p99": 431.2959909439087 - }, - "isolatedSum": { - "p50": 382.81600177288055, - "p90": 411.8400067090988, - "p95": 429.6640008687973, - "p99": 521.6639935970306 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267657216, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 278.0799865722656, - "p90": 296.671986579895, - "p95": 305.759996175766, - "p99": 346.8799889087677 - }, - "combine": { - "p50": 313.1519854068756, - "p90": 324.6079981327057, - "p95": 331.9680094718933, - "p99": 350.5600094795227 - }, - "roundtrip": { - "p50": 563.1999969482422, - "p90": 577.9839754104614, - "p95": 589.5040035247803, - "p99": 688.9920234680176 - }, - "isolatedSum": { - "p50": 591.2319719791412, - "p90": 621.2799847126007, - "p95": 637.7280056476593, - "p99": 697.4399983882904 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 534380544, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 465.8240079879761, - "p90": 484.3200147151947, - "p95": 496.2559938430786, - "p99": 558.8799715042114 - }, - "combine": { - "p50": 544.3519949913025, - "p90": 560.1599812507629, - "p95": 564.9600028991699, - "p99": 624.0959763526917 - }, - "roundtrip": { - "p50": 981.0879826545715, - "p90": 996.3520169258118, - "p95": 1007.7439546585083, - "p99": 1077.1839618682861 - }, - "isolatedSum": { - "p50": 1010.1760029792786, - "p90": 1044.4799959659576, - "p95": 1061.2159967422485, - "p99": 1182.975947856903 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1066119168, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 858.2080006599426, - "p90": 877.9839873313904, - "p95": 884.0000033378601, - "p99": 925.6640076637268 - }, - "combine": { - "p50": 981.98401927948, - "p90": 994.4959878921509, - "p95": 1000.9280443191528, - "p99": 1111.9040250778198 - }, - "roundtrip": { - "p50": 1810.1119995117188, - "p90": 1826.0159492492676, - "p95": 1833.7279558181763, - "p99": 1947.551965713501 - }, - "isolatedSum": { - "p50": 1840.1920199394226, - "p90": 1872.4799752235413, - "p95": 1884.928047657013, - "p99": 2037.5680327415466 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2131722240, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-bc48bfe5", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63", - "colorKey": "h200_d982b749", - "comparisonKey": "6da1f9e2ab025dbe", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:28.417730+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "157ca81687ddb63", - "workloadId": "set:3:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271827040", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271827040", - "createdAt": "2026-06-26T23:56:28.417730+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.80000275373459, - "p90": 135.80800592899323, - "p95": 142.14399456977844, - "p99": 172.7679967880249 - }, - "combine": { - "p50": 104.35199737548828, - "p90": 121.56800180673599, - "p95": 125.72799623012543, - "p99": 150.65599977970123 - }, - "roundtrip": { - "p50": 195.77600061893463, - "p90": 216.22399985790253, - "p95": 222.9440063238144, - "p99": 267.67998933792114 - }, - "isolatedSum": { - "p50": 221.15200012922287, - "p90": 257.3760077357292, - "p95": 267.87199079990387, - "p99": 323.42399656772614 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 202.36800611019135, - "p90": 223.1999933719635, - "p95": 231.58399760723114, - "p99": 263.7439966201782 - }, - "combine": { - "p50": 223.93600642681122, - "p90": 236.32000386714935, - "p95": 241.88800156116486, - "p99": 258.7839961051941 - }, - "roundtrip": { - "p50": 399.58399534225464, - "p90": 417.279988527298, - "p95": 424.4160056114197, - "p99": 459.77601408958435 - }, - "isolatedSum": { - "p50": 426.30401253700256, - "p90": 459.51999723911285, - "p95": 473.471999168396, - "p99": 522.5279927253723 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 542.5919890403748, - "p90": 557.5039982795715, - "p95": 563.3280277252197, - "p99": 587.8080129623413 - }, - "combine": { - "p50": 619.1999912261963, - "p90": 634.5599889755249, - "p95": 646.3040113449097, - "p99": 683.8080286979675 - }, - "roundtrip": { - "p50": 1131.1999559402466, - "p90": 1146.720051765442, - "p95": 1155.743956565857, - "p99": 1289.952039718628 - }, - "isolatedSum": { - "p50": 1161.791980266571, - "p90": 1192.0639872550964, - "p95": 1209.6320390701294, - "p99": 1271.6160416603088 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-5553e87c", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_d982b749", - "comparisonKey": "6d1b97a966875452", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:28.382976+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286432534", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286432534", - "createdAt": "2026-06-27T10:26:28.382976+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.89600348472595, - "p90": 136.22400164604187, - "p95": 142.2719955444336, - "p99": 153.1199961900711 - }, - "combine": { - "p50": 106.11200332641602, - "p90": 122.17599898576736, - "p95": 125.85599720478058, - "p99": 131.77600502967834 - }, - "roundtrip": { - "p50": 195.77600061893463, - "p90": 213.95200490951538, - "p95": 220.15999257564545, - "p99": 227.77600586414337 - }, - "isolatedSum": { - "p50": 223.00800681114197, - "p90": 258.40000063180923, - "p95": 268.1279927492142, - "p99": 284.89600121974945 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 146.88000082969666, - "p90": 168.06399822235107, - "p95": 172.54400253295898, - "p99": 178.17600071430206 - }, - "combine": { - "p50": 145.53600549697876, - "p90": 157.0879966020584, - "p95": 163.90399634838104, - "p99": 171.7119961977005 - }, - "roundtrip": { - "p50": 264.92801308631897, - "p90": 279.4879972934723, - "p95": 288.4800136089325, - "p99": 297.60000109672546 - }, - "isolatedSum": { - "p50": 292.4160063266754, - "p90": 325.1519948244095, - "p95": 336.44799888134, - "p99": 349.88799691200256 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 202.04800367355347, - "p90": 215.5199944972992, - "p95": 223.68000447750092, - "p99": 234.52800512313843 - }, - "combine": { - "p50": 224.35200214385986, - "p90": 235.23199558258057, - "p95": 237.15199530124664, - "p99": 253.91998887062073 - }, - "roundtrip": { - "p50": 403.23200821876526, - "p90": 414.88000750541687, - "p95": 423.2639968395233, - "p99": 433.79199504852295 - }, - "isolatedSum": { - "p50": 426.40000581741333, - "p90": 450.75199007987976, - "p95": 460.83199977874756, - "p99": 488.44799399375916 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 314.62401151657104, - "p90": 331.13598823547363, - "p95": 335.80800890922546, - "p99": 343.80799531936646 - }, - "combine": { - "p50": 356.03201389312744, - "p90": 364.1600012779236, - "p95": 366.5600121021271, - "p99": 376.22401118278503 - }, - "roundtrip": { - "p50": 647.8400230407715, - "p90": 659.1359972953796, - "p95": 664.9600267410278, - "p99": 687.2320175170898 - }, - "isolatedSum": { - "p50": 670.6560254096985, - "p90": 695.2959895133972, - "p95": 702.3680210113525, - "p99": 720.0320065021515 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 531.0080051422119, - "p90": 542.527973651886, - "p95": 549.4719743728638, - "p99": 571.0399746894836 - }, - "combine": { - "p50": 619.871973991394, - "p90": 628.9920210838318, - "p95": 632.9280138015747, - "p99": 642.4639821052551 - }, - "roundtrip": { - "p50": 1122.8159666061401, - "p90": 1134.7839832305908, - "p95": 1140.8319473266602, - "p99": 1158.4320068359375 - }, - "isolatedSum": { - "p50": 1150.879979133606, - "p90": 1171.5199947357178, - "p95": 1182.3999881744385, - "p99": 1213.5039567947388 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1004.5440196990967, - "p90": 1023.7760543823242, - "p95": 1030.2400588989258, - "p99": 1042.464017868042 - }, - "combine": { - "p50": 1125.6959438323975, - "p90": 1136.1279487609863, - "p95": 1140.544056892395, - "p99": 1155.4239988327026 - }, - "roundtrip": { - "p50": 2086.143970489502, - "p90": 2106.048107147217, - "p95": 2112.096071243286, - "p99": 2332.0000171661377 - }, - "isolatedSum": { - "p50": 2130.239963531494, - "p90": 2159.9040031433105, - "p95": 2170.784115791321, - "p99": 2197.8880167007446 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-71f62108", - "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "c80c3e7446de9680", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:05.486154+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271618490", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271618490", - "createdAt": "2026-06-26T23:50:05.486154+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.68800222873688, - "p90": 127.3919939994812, - "p95": 133.31200182437897, - "p99": 144.57599818706512 - }, - "combine": { - "p50": 105.8880016207695, - "p90": 112.76800185441971, - "p95": 117.79200285673141, - "p99": 129.72800433635712 - }, - "roundtrip": { - "p50": 199.35999810695648, - "p90": 209.4399929046631, - "p95": 215.7440036535263, - "p99": 257.82400369644165 - }, - "isolatedSum": { - "p50": 224.57600384950638, - "p90": 240.1599958539009, - "p95": 251.10400468111038, - "p99": 274.30400252342224 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 145.4080045223236, - "p90": 154.91199493408203, - "p95": 159.2639982700348, - "p99": 170.6559956073761 - }, - "combine": { - "p50": 144.3520039319992, - "p90": 150.59199929237366, - "p95": 153.05599570274353, - "p99": 167.4879938364029 - }, - "roundtrip": { - "p50": 263.5200023651123, - "p90": 270.3680098056793, - "p95": 274.7200131416321, - "p99": 291.1039888858795 - }, - "isolatedSum": { - "p50": 289.7600084543228, - "p90": 305.5039942264557, - "p95": 312.3199939727783, - "p99": 338.143989443779 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 205.1839977502823, - "p90": 219.04000639915466, - "p95": 227.743998169899, - "p99": 242.5920069217682 - }, - "combine": { - "p50": 221.50400280952454, - "p90": 232.96000063419342, - "p95": 239.58399891853333, - "p99": 263.0400061607361 - }, - "roundtrip": { - "p50": 397.8239893913269, - "p90": 412.03200817108154, - "p95": 421.08801007270813, - "p99": 463.8400077819824 - }, - "isolatedSum": { - "p50": 426.6880005598068, - "p90": 452.0000070333481, - "p95": 467.3279970884323, - "p99": 505.6320130825043 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 316.0000145435333, - "p90": 324.0959942340851, - "p95": 328.99200916290283, - "p99": 351.6159951686859 - }, - "combine": { - "p50": 350.17600655555725, - "p90": 358.5599958896637, - "p95": 363.2960021495819, - "p99": 392.8639888763428 - }, - "roundtrip": { - "p50": 639.4559741020203, - "p90": 655.1039814949036, - "p95": 665.3760075569153, - "p99": 768.8000202178955 - }, - "isolatedSum": { - "p50": 666.1760210990906, - "p90": 682.6559901237488, - "p95": 692.2880113124847, - "p99": 744.4799840450287 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 527.4559855461121, - "p90": 543.936014175415, - "p95": 551.3280034065247, - "p99": 568.5439705848694 - }, - "combine": { - "p50": 612.384021282196, - "p90": 627.3279786109924, - "p95": 639.519989490509, - "p99": 984.5119714736938 - }, - "roundtrip": { - "p50": 1111.6160154342651, - "p90": 1130.6240558624268, - "p95": 1139.7759914398193, - "p99": 1297.5679636001587 - }, - "isolatedSum": { - "p50": 1139.840006828308, - "p90": 1171.2639927864075, - "p95": 1190.8479928970337, - "p99": 1553.0559420585632 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 994.4959878921509, - "p90": 1017.6960229873657, - "p95": 1024.7360467910767, - "p99": 1044.8640584945679 - }, - "combine": { - "p50": 1103.9680242538452, - "p90": 1115.7439947128296, - "p95": 1122.3039627075195, - "p99": 1306.1439990997314 - }, - "roundtrip": { - "p50": 2064.448118209839, - "p90": 2089.344024658203, - "p95": 2106.0800552368164, - "p99": 2285.504102706909 - }, - "isolatedSum": { - "p50": 2098.464012145996, - "p90": 2133.4400177001953, - "p95": 2147.040009498596, - "p99": 2351.0080575942993 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2e712f4f", - "identity": "h200|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h200_d982b749", - "comparisonKey": "cbe784eff055b137", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:16.208325+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287501303", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287501303", - "createdAt": "2026-06-27T11:14:16.208325+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.079998254776, - "p90": 145.60000598430634, - "p95": 166.4000004529953, - "p99": 212.38400042057037 - }, - "combine": { - "p50": 105.98400235176086, - "p90": 121.40800058841705, - "p95": 127.23200023174286, - "p99": 153.28000485897064 - }, - "roundtrip": { - "p50": 196.44799828529358, - "p90": 221.15199267864227, - "p95": 232.80000686645508, - "p99": 247.1040040254593 - }, - "isolatedSum": { - "p50": 228.06400060653687, - "p90": 267.0080065727234, - "p95": 293.63200068473816, - "p99": 365.664005279541 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 147.07200229167938, - "p90": 169.11999881267548, - "p95": 174.72000420093536, - "p99": 191.5840059518814 - }, - "combine": { - "p50": 142.5279974937439, - "p90": 154.59200739860535, - "p95": 160.19199788570404, - "p99": 169.95200514793396 - }, - "roundtrip": { - "p50": 261.75999641418457, - "p90": 279.4879972934723, - "p95": 287.07200288772583, - "p99": 312.99200654029846 - }, - "isolatedSum": { - "p50": 289.5999997854233, - "p90": 323.7120062112808, - "p95": 334.9120020866394, - "p99": 361.53601109981537 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 208.92800390720367, - "p90": 239.71199989318848, - "p95": 256.6080093383789, - "p99": 289.8240089416504 - }, - "combine": { - "p50": 226.84800624847412, - "p90": 248.9600032567978, - "p95": 259.8400115966797, - "p99": 303.6159873008728 - }, - "roundtrip": { - "p50": 399.9040126800537, - "p90": 421.7599928379059, - "p95": 431.2640130519867, - "p99": 470.91200947761536 - }, - "isolatedSum": { - "p50": 435.7760101556778, - "p90": 488.67200314998627, - "p95": 516.4480209350586, - "p99": 593.4399962425232 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 312.54398822784424, - "p90": 332.41599798202515, - "p95": 337.8239870071411, - "p99": 356.3520014286041 - }, - "combine": { - "p50": 352.03200578689575, - "p90": 364.03200030326843, - "p95": 370.88000774383545, - "p99": 386.49600744247437 - }, - "roundtrip": { - "p50": 642.1759724617004, - "p90": 663.8720035552979, - "p95": 672.4159717559814, - "p99": 710.2400064468384 - }, - "isolatedSum": { - "p50": 664.57599401474, - "p90": 696.4479982852936, - "p95": 708.7039947509766, - "p99": 742.8480088710785 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 524.2239832878113, - "p90": 540.3519868850708, - "p95": 545.7919836044312, - "p99": 580.672025680542 - }, - "combine": { - "p50": 604.1600108146667, - "p90": 617.6319718360901, - "p95": 624.8639822006226, - "p99": 663.2959842681885 - }, - "roundtrip": { - "p50": 1101.6960144042969, - "p90": 1123.4560012817383, - "p95": 1130.944013595581, - "p99": 1197.759985923767 - }, - "isolatedSum": { - "p50": 1128.383994102478, - "p90": 1157.983958721161, - "p95": 1170.6559658050537, - "p99": 1243.9680099487305 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 980.1279902458191, - "p90": 1001.3760328292847, - "p95": 1012.0639801025391, - "p99": 1055.3920269012451 - }, - "combine": { - "p50": 1095.0080156326294, - "p90": 1110.5279922485352, - "p95": 1121.8559741973877, - "p99": 1219.1040515899658 - }, - "roundtrip": { - "p50": 2037.1840000152588, - "p90": 2063.6160373687744, - "p95": 2101.50408744812, - "p99": 2307.6798915863037 - }, - "isolatedSum": { - "p50": 2075.1360058784485, - "p90": 2111.90402507782, - "p95": 2133.9199542999268, - "p99": 2274.496078491211 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a8fb4d9b", - "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h200_3a47b6c9", - "comparisonKey": "f6581a3621ac6cd2", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:25.459367+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271732597", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271732597", - "createdAt": "2026-06-26T23:53:25.459367+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.90400338172913, - "p90": 134.14399325847626, - "p95": 140.35199582576752, - "p99": 160.38399934768677 - }, - "combine": { - "p50": 104.09600287675858, - "p90": 119.71200257539749, - "p95": 124.64000284671783, - "p99": 145.31199634075165 - }, - "roundtrip": { - "p50": 195.64799964427948, - "p90": 212.8639966249466, - "p95": 219.9999988079071, - "p99": 230.3680032491684 - }, - "isolatedSum": { - "p50": 220.0000062584877, - "p90": 253.85599583387375, - "p95": 264.99199867248535, - "p99": 305.6959956884384 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77514752, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 145.53600549697876, - "p90": 162.30399906635284, - "p95": 170.3999936580658, - "p99": 184.64000523090363 - }, - "combine": { - "p50": 143.77599954605103, - "p90": 157.21599757671356, - "p95": 162.27200627326965, - "p99": 175.64800381660461 - }, - "roundtrip": { - "p50": 265.1199996471405, - "p90": 283.90398621559143, - "p95": 289.0239953994751, - "p99": 302.0159900188446 - }, - "isolatedSum": { - "p50": 289.3120050430298, - "p90": 319.5199966430664, - "p95": 332.67199993133545, - "p99": 360.28800904750824 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154570752, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 205.4399996995926, - "p90": 224.0000069141388, - "p95": 232.06399381160736, - "p99": 262.719988822937 - }, - "combine": { - "p50": 225.0880002975464, - "p90": 243.96799504756927, - "p95": 250.0160038471222, - "p99": 335.55200695991516 - }, - "roundtrip": { - "p50": 403.55199575424194, - "p90": 432.8959882259369, - "p95": 447.1360146999359, - "p99": 589.6000266075134 - }, - "isolatedSum": { - "p50": 430.527999997139, - "p90": 467.96800196170807, - "p95": 482.07999765872955, - "p99": 598.2719957828522 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309772288, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 312.79999017715454, - "p90": 334.7199857234955, - "p95": 340.1919901371002, - "p99": 368.73599886894226 - }, - "combine": { - "p50": 356.1280071735382, - "p90": 367.45598912239075, - "p95": 372.6719915866852, - "p99": 395.77600359916687 - }, - "roundtrip": { - "p50": 643.1999802589417, - "p90": 657.3759913444519, - "p95": 663.7439727783203, - "p99": 708.1599831581116 - }, - "isolatedSum": { - "p50": 668.9279973506927, - "p90": 702.1759748458862, - "p95": 712.8639817237854, - "p99": 764.5120024681091 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619501568, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 523.2639908790588, - "p90": 541.9520139694214, - "p95": 552.2559881210327, - "p99": 611.3280057907104 - }, - "combine": { - "p50": 611.0079884529114, - "p90": 623.0080127716064, - "p95": 630.3359866142273, - "p99": 657.2480201721191 - }, - "roundtrip": { - "p50": 1108.7679862976074, - "p90": 1123.9999532699585, - "p95": 1132.3200464248657, - "p99": 1233.63196849823 - }, - "isolatedSum": { - "p50": 1134.2719793319702, - "p90": 1164.9600267410278, - "p95": 1182.59197473526, - "p99": 1268.5760259628296 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239375872, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 984.5119714736938, - "p90": 1019.4560289382935, - "p95": 1036.128044128418, - "p99": 1103.0399799346924 - }, - "combine": { - "p50": 1114.6559715270996, - "p90": 1129.472017288208, - "p95": 1136.896014213562, - "p99": 1180.3200244903564 - }, - "roundtrip": { - "p50": 2057.408094406128, - "p90": 2091.423988342285, - "p95": 2103.264093399048, - "p99": 2406.8479537963867 - }, - "isolatedSum": { - "p50": 2099.1679430007935, - "p90": 2148.9280462265015, - "p95": 2173.02405834198, - "p99": 2283.360004425049 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2479669248, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-ad612267", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", - "colorKey": "h200_b5c683eb", - "comparisonKey": "b18bebc70bf6167d", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:03.036669+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272035224", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272035224", - "createdAt": "2026-06-27T00:03:03.036669+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 132.60799646377563, - "p90": 151.8400013446808, - "p95": 157.3760062456131, - "p99": 181.47200345993042 - }, - "combine": { - "p50": 125.40799379348755, - "p90": 146.59200608730316, - "p95": 152.73599326610565, - "p99": 228.5439968109131 - }, - "roundtrip": { - "p50": 230.20799458026886, - "p90": 244.51200664043427, - "p95": 253.4080147743225, - "p99": 302.2719919681549 - }, - "isolatedSum": { - "p50": 258.0159902572632, - "p90": 298.43200743198395, - "p95": 310.11199951171875, - "p99": 410.0160002708435 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 175.99999904632568, - "p90": 196.83200120925903, - "p95": 202.2400051355362, - "p99": 229.5680046081543 - }, - "combine": { - "p50": 175.58400332927704, - "p90": 189.82400000095367, - "p95": 193.79200041294098, - "p99": 265.5999958515167 - }, - "roundtrip": { - "p50": 323.0719864368439, - "p90": 339.29601311683655, - "p95": 345.3119993209839, - "p99": 369.4399893283844 - }, - "isolatedSum": { - "p50": 351.5840023756027, - "p90": 386.6560012102127, - "p95": 396.0320055484772, - "p99": 495.168000459671 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 275.35998821258545, - "p90": 301.472008228302, - "p95": 311.19999289512634, - "p99": 359.0080142021179 - }, - "combine": { - "p50": 268.5120105743408, - "p90": 284.38401222229004, - "p95": 289.3120050430298, - "p99": 321.6319978237152 - }, - "roundtrip": { - "p50": 519.9040174484253, - "p90": 549.2479801177979, - "p95": 559.6160292625427, - "p99": 602.4960279464722 - }, - "isolatedSum": { - "p50": 543.8719987869263, - "p90": 585.856020450592, - "p95": 600.5119979381561, - "p99": 680.6400120258331 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 458.75200629234314, - "p90": 610.4320287704468, - "p95": 643.1999802589417, - "p99": 663.7120246887207 - }, - "combine": { - "p50": 451.3919949531555, - "p90": 462.911993265152, - "p95": 471.23199701309204, - "p99": 480.8639883995056 - }, - "roundtrip": { - "p50": 882.0160031318665, - "p90": 899.4879722595215, - "p95": 906.6879749298096, - "p99": 926.688015460968 - }, - "isolatedSum": { - "p50": 910.1440012454987, - "p90": 1073.3440220355988, - "p95": 1114.4319772720337, - "p99": 1144.5760130882263 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 819.5520043373108, - "p90": 837.664008140564, - "p95": 856.3200235366821, - "p99": 920.5440282821655 - }, - "combine": { - "p50": 816.6080117225647, - "p90": 834.879994392395, - "p95": 846.9439744949341, - "p99": 919.264018535614 - }, - "roundtrip": { - "p50": 1605.247974395752, - "p90": 1634.1760158538818, - "p95": 1654.9760103225708, - "p99": 1745.8560466766357 - }, - "isolatedSum": { - "p50": 1636.1600160598755, - "p90": 1672.544002532959, - "p95": 1703.2639980316162, - "p99": 1839.8080468177795 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1564.7679567337036, - "p90": 1586.0799551010132, - "p95": 1601.8879413604736, - "p99": 1723.0720520019531 - }, - "combine": { - "p50": 1521.9199657440186, - "p90": 1538.7840270996094, - "p95": 1547.104001045227, - "p99": 1626.911997795105 - }, - "roundtrip": { - "p50": 3057.663917541504, - "p90": 3078.3679485321045, - "p95": 3098.1760025024414, - "p99": 3246.783971786499 - }, - "isolatedSum": { - "p50": 3086.687922477722, - "p90": 3124.8639822006226, - "p95": 3148.9919424057007, - "p99": 3349.984049797058 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-30f874f3", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8", - "colorKey": "h200_b5c683eb", - "comparisonKey": "b18bebc70bf6167d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:38.753854+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "9e6ac678a09f7f8", - "workloadId": "set:3:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271834221", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271834221", - "createdAt": "2026-06-26T23:56:38.753854+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 131.99999928474426, - "p90": 147.74399995803833, - "p95": 155.68000078201294, - "p99": 168.7680035829544 - }, - "combine": { - "p50": 126.01600587368011, - "p90": 139.74399864673615, - "p95": 146.08000218868256, - "p99": 156.73600137233734 - }, - "roundtrip": { - "p50": 229.8559993505478, - "p90": 251.583993434906, - "p95": 260.0319981575012, - "p99": 275.07200837135315 - }, - "isolatedSum": { - "p50": 258.0160051584244, - "p90": 287.4879986047745, - "p95": 301.7600029706955, - "p99": 325.50400495529175 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 270.7520127296448, - "p90": 292.89600253105164, - "p95": 304.9600124359131, - "p99": 352.6400029659271 - }, - "combine": { - "p50": 268.5759961605072, - "p90": 281.76000714302063, - "p95": 287.200003862381, - "p99": 301.31199955940247 - }, - "roundtrip": { - "p50": 514.4960284233093, - "p90": 532.7360033988953, - "p95": 542.1119928359985, - "p99": 571.615993976593 - }, - "isolatedSum": { - "p50": 539.328008890152, - "p90": 574.6560096740723, - "p95": 592.1600162982941, - "p99": 653.9520025253296 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 822.0800161361694, - "p90": 845.1840281486511, - "p95": 862.5919818878174, - "p99": 1313.3759498596191 - }, - "combine": { - "p50": 820.032000541687, - "p90": 837.7919793128967, - "p95": 846.3680148124695, - "p99": 873.3440041542053 - }, - "roundtrip": { - "p50": 1605.9520244598389, - "p90": 1629.3439865112305, - "p95": 1645.1200246810913, - "p99": 1737.1840476989746 - }, - "isolatedSum": { - "p50": 1642.1120166778564, - "p90": 1682.9760074615479, - "p95": 1708.9599967002869, - "p99": 2186.7199540138245 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-a2c76343", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", - "colorKey": "h200_d0dfa19a", - "comparisonKey": "4ade4ca52869383d", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:42.077253+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "7aa44c7b86748b9", - "workloadId": "set:3:388ff74baef05c72", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271841288", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271841288", - "createdAt": "2026-06-26T23:56:42.077253+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 85.08799970149994, - "p90": 109.40799862146378, - "p95": 117.47200042009354, - "p99": 164.38399255275726 - }, - "combine": { - "p50": 71.45600020885468, - "p90": 86.496002972126, - "p95": 91.26400202512741, - "p99": 106.20799660682678 - }, - "roundtrip": { - "p50": 132.51200318336487, - "p90": 162.7199947834015, - "p95": 173.8560050725937, - "p99": 221.5680032968521 - }, - "isolatedSum": { - "p50": 156.54399991035461, - "p90": 195.90400159358978, - "p95": 208.73600244522095, - "p99": 270.59198915958405 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 1, - "recvTokensMax": 128, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 111.00800335407257, - "p90": 130.43199479579926, - "p95": 136.80000603199005, - "p99": 165.75999557971954 - }, - "combine": { - "p50": 118.1119978427887, - "p90": 134.62400436401367, - "p95": 139.67999815940857, - "p99": 149.6639996767044 - }, - "roundtrip": { - "p50": 202.30400562286377, - "p90": 223.83999824523926, - "p95": 241.85599386692047, - "p99": 296.25600576400757 - }, - "isolatedSum": { - "p50": 229.12000119686127, - "p90": 265.0559991598129, - "p95": 276.4800041913986, - "p99": 315.42399525642395 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 1, - "recvTokensMax": 512, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 189.63199853897095, - "p90": 209.72800254821777, - "p95": 218.4319943189621, - "p99": 254.14401292800903 - }, - "combine": { - "p50": 284.960001707077, - "p90": 298.7520098686218, - "p95": 303.2959997653961, - "p99": 331.9999873638153 - }, - "roundtrip": { - "p50": 447.3919868469238, - "p90": 475.42399168014526, - "p95": 484.8639965057373, - "p99": 529.9519896507263 - }, - "isolatedSum": { - "p50": 474.592000246048, - "p90": 508.4800124168396, - "p95": 521.7279940843582, - "p99": 586.1440002918243 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 1, - "recvTokensMax": 2048, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-79209ba6", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", - "colorKey": "h200_06544e53", - "comparisonKey": "822fd37c7222ef9b", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:05.638717+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · balanced+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "balanced", - "routingLabel": "balanced+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "df54a9510825f71", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1, - "eplbImbalanceAfter": 1, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272038593", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272038593", - "createdAt": "2026-06-27T00:03:05.638717+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 95.83999961614609, - "p90": 112.73600161075592, - "p95": 121.15199863910675, - "p99": 153.4080058336258 - }, - "combine": { - "p50": 83.99999886751175, - "p90": 97.6639986038208, - "p95": 104.22399640083313, - "p99": 116.89600348472595 - }, - "roundtrip": { - "p50": 154.1759967803955, - "p90": 176.32000148296356, - "p95": 183.45600366592407, - "p99": 211.29600703716278 - }, - "isolatedSum": { - "p50": 179.83999848365784, - "p90": 210.40000021457672, - "p95": 225.37599503993988, - "p99": 270.30400931835175 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 29360128, - "combineLogicalBytes": 29360128, - "fanoutMean": 2, - "recvTokensMax": 384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 115.55200070142746, - "p90": 135.68000495433807, - "p95": 143.39199662208557, - "p99": 163.26400637626648 - }, - "combine": { - "p50": 103.35999727249146, - "p90": 120.03199756145477, - "p95": 127.32799351215363, - "p99": 154.4319987297058 - }, - "roundtrip": { - "p50": 197.2160041332245, - "p90": 215.58399498462677, - "p95": 223.26399385929108, - "p99": 242.5599992275238 - }, - "isolatedSum": { - "p50": 218.91199797391891, - "p90": 255.71200251579285, - "p95": 270.7199901342392, - "p99": 317.6960051059723 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 58720256, - "combineLogicalBytes": 58720256, - "fanoutMean": 2, - "recvTokensMax": 768, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 144.6080058813095, - "p90": 169.0559983253479, - "p95": 176.7680048942566, - "p99": 208.064004778862 - }, - "combine": { - "p50": 140.47999680042267, - "p90": 155.74400126934052, - "p95": 161.6639941930771, - "p99": 184.1920018196106 - }, - "roundtrip": { - "p50": 262.9440128803253, - "p90": 282.24000334739685, - "p95": 290.6560003757477, - "p99": 320.0640082359314 - }, - "isolatedSum": { - "p50": 285.0880026817322, - "p90": 324.7999995946884, - "p95": 338.4319990873337, - "p99": 392.2560065984726 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 2, - "recvTokensMax": 1536, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 205.4080069065094, - "p90": 229.21599447727203, - "p95": 238.49600553512573, - "p99": 255.67999482154846 - }, - "combine": { - "p50": 214.7199958562851, - "p90": 231.90400004386902, - "p95": 236.86400055885315, - "p99": 255.64798712730408 - }, - "roundtrip": { - "p50": 399.4239866733551, - "p90": 420.48001289367676, - "p95": 429.6000003814697, - "p99": 593.7280058860779 - }, - "isolatedSum": { - "p50": 420.1280027627945, - "p90": 461.11999452114105, - "p95": 475.3600060939789, - "p99": 511.32798194885254 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 2, - "recvTokensMax": 3072, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 331.743985414505, - "p90": 350.23999214172363, - "p95": 361.08800768852234, - "p99": 406.0479998588562 - }, - "combine": { - "p50": 360.54399609565735, - "p90": 375.90399384498596, - "p95": 382.78400897979736, - "p99": 404.4159948825836 - }, - "roundtrip": { - "p50": 664.0639901161194, - "p90": 679.9039840698242, - "p95": 693.5679912567139, - "p99": 743.1359887123108 - }, - "isolatedSum": { - "p50": 692.2879815101624, - "p90": 726.1439859867096, - "p95": 743.8720166683197, - "p99": 810.4639947414398 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 2, - "recvTokensMax": 6144, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 580.5439949035645, - "p90": 593.3759808540344, - "p95": 602.3679971694946, - "p99": 637.6640200614929 - }, - "combine": { - "p50": 628.3519864082336, - "p90": 641.1839723587036, - "p95": 648.3839750289917, - "p99": 680.9279918670654 - }, - "roundtrip": { - "p50": 1181.7920207977295, - "p90": 1199.295997619629, - "p95": 1210.3359699249268, - "p99": 1255.4240226745605 - }, - "isolatedSum": { - "p50": 1208.895981311798, - "p90": 1234.559953212738, - "p95": 1250.7519721984863, - "p99": 1318.5920119285583 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 2, - "recvTokensMax": 12288, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c14326f0", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32", - "colorKey": "h200_189562cd", - "comparisonKey": "b9475bb176588857", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:32.803411+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "38fd0bcf7109c32", - "workloadId": "set:3:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271862413", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271862413", - "createdAt": "2026-06-26T23:57:32.803411+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 125.11999905109406, - "p90": 140.00000059604645, - "p95": 149.88799393177032, - "p99": 171.23199999332428 - }, - "combine": { - "p50": 118.65600198507309, - "p90": 132.64000415802002, - "p95": 137.60000467300415, - "p99": 164.95999693870544 - }, - "roundtrip": { - "p50": 221.5680032968521, - "p90": 238.14399540424347, - "p95": 251.71199440956116, - "p99": 291.6480004787445 - }, - "isolatedSum": { - "p50": 243.77600103616714, - "p90": 272.64000475406647, - "p95": 287.4879986047745, - "p99": 336.1919969320297 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 245.02399563789368, - "p90": 257.31199979782104, - "p95": 265.8880054950714, - "p99": 298.72000217437744 - }, - "combine": { - "p50": 263.68001103401184, - "p90": 275.32801032066345, - "p95": 281.9199860095978, - "p99": 299.1679906845093 - }, - "roundtrip": { - "p50": 481.9839894771576, - "p90": 495.6800043582916, - "p95": 506.1759948730469, - "p99": 808.3199858665466 - }, - "isolatedSum": { - "p50": 508.7040066719055, - "p90": 532.6400101184845, - "p95": 547.8079915046692, - "p99": 597.8879928588867 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 725.1200079917908, - "p90": 748.0959892272949, - "p95": 763.0079984664917, - "p99": 812.0959997177124 - }, - "combine": { - "p50": 799.3280291557312, - "p90": 813.9839768409729, - "p95": 823.5200047492981, - "p99": 875.6160140037537 - }, - "roundtrip": { - "p50": 1494.3679571151733, - "p90": 1516.1919593811035, - "p95": 1528.2560586929321, - "p99": 1709.8560333251953 - }, - "isolatedSum": { - "p50": 1524.448037147522, - "p90": 1562.0799660682678, - "p95": 1586.5280032157898, - "p99": 1687.712013721466 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-17171887", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", - "colorKey": "h200_189562cd", - "comparisonKey": "b9475bb176588857", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:28.346517+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "bfbb64a166e9f1c", - "workloadId": "set:6:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272106904", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272106904", - "createdAt": "2026-06-27T00:05:28.346517+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 124.41600114107132, - "p90": 159.9999964237213, - "p95": 167.39200055599213, - "p99": 211.42399311065674 - }, - "combine": { - "p50": 118.01599711179733, - "p90": 146.5280055999756, - "p95": 150.27199685573578, - "p99": 162.9759967327118 - }, - "roundtrip": { - "p50": 220.2560007572174, - "p90": 253.91998887062073, - "p95": 258.432000875473, - "p99": 271.42399549484253 - }, - "isolatedSum": { - "p50": 242.43199825286865, - "p90": 306.5280020236969, - "p95": 317.6639974117279, - "p99": 374.39998984336853 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78102528, - "combineLogicalBytes": 78102528, - "fanoutMean": 5.3203125, - "recvTokensMax": 1024, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 162.432000041008, - "p90": 170.6240028142929, - "p95": 175.04000663757324, - "p99": 188.38399648666382 - }, - "combine": { - "p50": 165.0879979133606, - "p90": 175.7120043039322, - "p95": 179.83999848365784, - "p99": 191.77600741386414 - }, - "roundtrip": { - "p50": 301.66399478912354, - "p90": 317.3759877681732, - "p95": 322.6880133152008, - "p99": 333.69600772857666 - }, - "isolatedSum": { - "p50": 327.5199979543686, - "p90": 346.3360071182251, - "p95": 354.8800051212311, - "p99": 380.16000390052795 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156090368, - "combineLogicalBytes": 156090368, - "fanoutMean": 5.31640625, - "recvTokensMax": 2048, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 238.97600173950195, - "p90": 247.0400035381317, - "p95": 250.0160038471222, - "p99": 259.39199328422546 - }, - "combine": { - "p50": 261.9200050830841, - "p90": 275.2000093460083, - "p95": 279.58399057388306, - "p99": 300.4480004310608 - }, - "roundtrip": { - "p50": 482.33601450920105, - "p90": 499.1999864578247, - "p95": 507.3919892311096, - "p99": 570.527970790863 - }, - "isolatedSum": { - "p50": 500.89600682258606, - "p90": 522.24001288414, - "p95": 529.5999944210052, - "p99": 559.8399937152863 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311091200, - "combineLogicalBytes": 311091200, - "fanoutMean": 5.2978515625, - "recvTokensMax": 4096, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 392.8639888763428, - "p90": 402.72000432014465, - "p95": 406.23998641967773, - "p99": 445.3760087490082 - }, - "combine": { - "p50": 443.1680142879486, - "p90": 455.80801367759705, - "p95": 461.5040123462677, - "p99": 481.53600096702576 - }, - "roundtrip": { - "p50": 817.5680041313171, - "p90": 835.2320194244385, - "p95": 845.3760147094727, - "p99": 893.887996673584 - }, - "isolatedSum": { - "p50": 836.0320031642914, - "p90": 858.5280179977417, - "p95": 867.7439987659454, - "p99": 926.9120097160339 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620648448, - "combineLogicalBytes": 620648448, - "fanoutMean": 5.2847900390625, - "recvTokensMax": 8192, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 718.783974647522, - "p90": 730.3680181503296, - "p95": 737.280011177063, - "p99": 808.1920146942139 - }, - "combine": { - "p50": 797.4399924278259, - "p90": 810.8800053596497, - "p95": 820.032000541687, - "p99": 849.3760228157043 - }, - "roundtrip": { - "p50": 1490.3680086135864, - "p90": 1507.5839757919312, - "p95": 1519.2960500717163, - "p99": 1630.944013595581 - }, - "isolatedSum": { - "p50": 1516.223967075348, - "p90": 1541.2480235099792, - "p95": 1557.31201171875, - "p99": 1657.5680375099182 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1241511936, - "combineLogicalBytes": 1241511936, - "fanoutMean": 5.28570556640625, - "recvTokensMax": 16384, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1386.0160112380981, - "p90": 1401.0560512542725, - "p95": 1406.1440229415894, - "p99": 1621.7279434204102 - }, - "combine": { - "p50": 1483.199954032898, - "p90": 1497.5999593734741, - "p95": 1502.17604637146, - "p99": 1538.0480289459229 - }, - "roundtrip": { - "p50": 2845.855951309204, - "p90": 2863.840103149414, - "p95": 2879.647970199585, - "p99": 3068.063974380493 - }, - "isolatedSum": { - "p50": 2869.215965270996, - "p90": 2898.6560106277466, - "p95": 2908.3200693130493, - "p99": 3159.775972366333 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2484242432, - "combineLogicalBytes": 2484242432, - "fanoutMean": 5.288299560546875, - "recvTokensMax": 32768, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f354b9c6", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", - "colorKey": "h200_80a72891", - "comparisonKey": "52b3ac7f405659bf", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:25.966329+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_7", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "hotspot-single", - "routingLabel": "hotspot-single+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "29ae5ace13636f8", - "workloadId": "set:6:b952d4a43d688b50", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.8466796875, - "eplbImbalanceAfter": 1.0002700343276514, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272110404", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272110404", - "createdAt": "2026-06-27T00:05:25.966329+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.24799871444702, - "p90": 134.17600095272064, - "p95": 140.25600254535675, - "p99": 158.84800255298615 - }, - "combine": { - "p50": 107.68000036478043, - "p90": 119.39200013875961, - "p95": 123.99999797344208, - "p99": 129.82399761676788 - }, - "roundtrip": { - "p50": 196.60800695419312, - "p90": 215.16799926757812, - "p95": 223.07200729846954, - "p99": 271.232008934021 - }, - "isolatedSum": { - "p50": 224.92799907922745, - "p90": 253.56800109148026, - "p95": 264.2560005187988, - "p99": 288.672000169754 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77701120, - "combineLogicalBytes": 77701120, - "fanoutMean": 5.29296875, - "recvTokensMax": 697, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 145.9520012140274, - "p90": 165.69599509239197, - "p95": 176.35199427604675, - "p99": 214.49600160121918 - }, - "combine": { - "p50": 143.61600577831268, - "p90": 153.28000485897064, - "p95": 157.3439985513687, - "p99": 169.91999745368958 - }, - "roundtrip": { - "p50": 263.7439966201782, - "p90": 279.1680097579956, - "p95": 287.07200288772583, - "p99": 316.0960078239441 - }, - "isolatedSum": { - "p50": 289.5680069923401, - "p90": 318.9759999513626, - "p95": 333.69599282741547, - "p99": 384.41599905490875 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155187200, - "combineLogicalBytes": 155187200, - "fanoutMean": 5.28564453125, - "recvTokensMax": 1372, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 204.96000349521637, - "p90": 222.81600534915924, - "p95": 232.1919947862625, - "p99": 259.552001953125 - }, - "combine": { - "p50": 222.4320024251938, - "p90": 239.51999843120575, - "p95": 245.2480047941208, - "p99": 269.3760097026825 - }, - "roundtrip": { - "p50": 400.83199739456177, - "p90": 421.7279851436615, - "p95": 431.3279986381531, - "p99": 482.14399814605713 - }, - "isolatedSum": { - "p50": 427.39200592041016, - "p90": 462.336003780365, - "p95": 477.4399995803833, - "p99": 528.9280116558075 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311162880, - "combineLogicalBytes": 311162880, - "fanoutMean": 5.299072265625, - "recvTokensMax": 2761, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 313.76001238822937, - "p90": 342.24000573158264, - "p95": 361.11998558044434, - "p99": 480.3520143032074 - }, - "combine": { - "p50": 359.20000076293945, - "p90": 373.79199266433716, - "p95": 381.9519877433777, - "p99": 407.77599811553955 - }, - "roundtrip": { - "p50": 644.2880034446716, - "p90": 664.1600131988525, - "p95": 676.4799952507019, - "p99": 748.8639950752258 - }, - "isolatedSum": { - "p50": 672.9600131511688, - "p90": 716.0319983959198, - "p95": 743.071973323822, - "p99": 888.128012418747 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619974656, - "combineLogicalBytes": 619974656, - "fanoutMean": 5.279052734375, - "recvTokensMax": 5481, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 538.3679866790771, - "p90": 557.1839809417725, - "p95": 566.0160183906555, - "p99": 608.0639958381653 - }, - "combine": { - "p50": 618.9759969711304, - "p90": 630.3359866142273, - "p95": 636.2559795379639, - "p99": 653.5680294036865 - }, - "roundtrip": { - "p50": 1131.2960386276245, - "p90": 1151.263952255249, - "p95": 1159.0080261230469, - "p99": 1297.9520559310913 - }, - "isolatedSum": { - "p50": 1157.3439836502075, - "p90": 1187.5199675559998, - "p95": 1202.2719979286194, - "p99": 1261.6320252418518 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1240020992, - "combineLogicalBytes": 1240020992, - "fanoutMean": 5.27935791015625, - "recvTokensMax": 10883, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 984.2240214347839, - "p90": 1003.5840272903442, - "p95": 1015.2319669723511, - "p99": 1056.480050086975 - }, - "combine": { - "p50": 1093.9840078353882, - "p90": 1107.9679727554321, - "p95": 1119.9040412902832, - "p99": 1297.055959701538 - }, - "roundtrip": { - "p50": 2046.5600490570068, - "p90": 2070.3680515289307, - "p95": 2092.5118923187256, - "p99": 2573.024034500122 - }, - "isolatedSum": { - "p50": 2078.208029270172, - "p90": 2111.5520000457764, - "p95": 2135.1360082626343, - "p99": 2353.536009788513 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480414720, - "combineLogicalBytes": 2480414720, - "fanoutMean": 5.2801513671875, - "recvTokensMax": 21702, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-db979d37", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", - "colorKey": "h200_580d7b05", - "comparisonKey": "b1de1efab41abbdf", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:02:37.856020+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · uniform+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "uniform", - "routingLabel": "uniform+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2225dbbdab9bf2d", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 1.006072998046875, - "eplbImbalanceAfter": 1.0000152587890625, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272024348", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272024348", - "createdAt": "2026-06-27T00:02:37.856020+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 115.87200313806534, - "p90": 125.88800489902496, - "p95": 131.71200454235077, - "p99": 142.46399700641632 - }, - "combine": { - "p50": 103.96800190210342, - "p90": 115.48800021409988, - "p95": 122.68800288438797, - "p99": 204.3199986219406 - }, - "roundtrip": { - "p50": 195.5839991569519, - "p90": 206.65599405765533, - "p95": 212.25599944591522, - "p99": 236.03199422359467 - }, - "isolatedSum": { - "p50": 219.84000504016876, - "p90": 241.37600511312485, - "p95": 254.40000742673874, - "p99": 346.78399562835693 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77041664, - "combineLogicalBytes": 77041664, - "fanoutMean": 5.248046875, - "recvTokensMax": 686, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 147.39200472831726, - "p90": 165.50399363040924, - "p95": 174.20800030231476, - "p99": 197.11999595165253 - }, - "combine": { - "p50": 146.7839926481247, - "p90": 158.55999290943146, - "p95": 162.9440039396286, - "p99": 175.20000040531158 - }, - "roundtrip": { - "p50": 266.7520046234131, - "p90": 286.24001145362854, - "p95": 293.1840121746063, - "p99": 322.33598828315735 - }, - "isolatedSum": { - "p50": 294.17599737644196, - "p90": 324.0639865398407, - "p95": 337.15200424194336, - "p99": 372.3199963569641 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154542080, - "combineLogicalBytes": 154542080, - "fanoutMean": 5.263671875, - "recvTokensMax": 1365, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 203.74399423599243, - "p90": 221.11999988555908, - "p95": 229.95199263095856, - "p99": 253.08799743652344 - }, - "combine": { - "p50": 222.52799570560455, - "p90": 234.72000658512115, - "p95": 238.24000358581543, - "p99": 259.3280076980591 - }, - "roundtrip": { - "p50": 398.17601442337036, - "p90": 415.74400663375854, - "p95": 422.04800248146057, - "p99": 459.26401019096375 - }, - "isolatedSum": { - "p50": 426.271989941597, - "p90": 455.84000647068024, - "p95": 468.191996216774, - "p99": 512.4160051345825 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310589440, - "combineLogicalBytes": 310589440, - "fanoutMean": 5.289306640625, - "recvTokensMax": 2746, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 310.36800146102905, - "p90": 327.93599367141724, - "p95": 333.72798562049866, - "p99": 371.8079924583435 - }, - "combine": { - "p50": 355.9679985046387, - "p90": 369.4719970226288, - "p95": 383.07198882102966, - "p99": 431.4880073070526 - }, - "roundtrip": { - "p50": 641.9199705123901, - "p90": 660.9920263290405, - "p95": 668.9280271530151, - "p99": 718.9760208129883 - }, - "isolatedSum": { - "p50": 666.3359999656677, - "p90": 697.407990694046, - "p95": 716.7999744415283, - "p99": 803.2959997653961 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619171840, - "combineLogicalBytes": 619171840, - "fanoutMean": 5.272216796875, - "recvTokensMax": 5467, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 537.1519923210144, - "p90": 553.5680055618286, - "p95": 562.6559853553772, - "p99": 586.9759917259216 - }, - "combine": { - "p50": 612.1600270271301, - "p90": 625.0240206718445, - "p95": 633.8880062103271, - "p99": 660.863995552063 - }, - "roundtrip": { - "p50": 1119.968056678772, - "p90": 1136.064052581787, - "p95": 1145.2480554580688, - "p99": 1263.4880542755127 - }, - "isolatedSum": { - "p50": 1149.3120193481445, - "p90": 1178.592026233673, - "p95": 1196.5439915657043, - "p99": 1247.8399872779846 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1238945792, - "combineLogicalBytes": 1238945792, - "fanoutMean": 5.2747802734375, - "recvTokensMax": 10913, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1003.8080215454102, - "p90": 1027.008056640625, - "p95": 1034.432053565979, - "p99": 1060.1919889450073 - }, - "combine": { - "p50": 1111.0399961471558, - "p90": 1125.8879899978638, - "p95": 1135.3280544281006, - "p99": 1165.727972984314 - }, - "roundtrip": { - "p50": 2077.5039196014404, - "p90": 2101.6640663146973, - "p95": 2114.016056060791, - "p99": 2324.8000144958496 - }, - "isolatedSum": { - "p50": 2114.848017692566, - "p90": 2152.8960466384888, - "p95": 2169.7601079940796, - "p99": 2225.9199619293213 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481747968, - "combineLogicalBytes": 2481747968, - "fanoutMean": 5.282989501953125, - "recvTokensMax": 21789, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-59b7e35e", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786", - "colorKey": "h200_b6aa6110", - "comparisonKey": "b89b8b0279afe699", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:56:59.891356+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "4caecd33bedf786", - "workloadId": "set:3:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271848591", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271848591", - "createdAt": "2026-06-26T23:56:59.891356+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.46400117874146, - "p90": 133.85599851608276, - "p95": 141.15199446678162, - "p99": 168.12799870967865 - }, - "combine": { - "p50": 112.5440001487732, - "p90": 125.791996717453, - "p95": 132.1599930524826, - "p99": 143.327996134758 - }, - "roundtrip": { - "p50": 215.7440036535263, - "p90": 240.03200232982635, - "p95": 247.13599681854248, - "p99": 281.5360128879547 - }, - "isolatedSum": { - "p50": 235.00800132751465, - "p90": 259.64799523353577, - "p95": 273.3119875192642, - "p99": 311.45599484443665 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 238.0480021238327, - "p90": 249.439999461174, - "p95": 253.34399938583374, - "p99": 271.39198780059814 - }, - "combine": { - "p50": 259.3280076980591, - "p90": 273.6639976501465, - "p95": 278.1440019607544, - "p99": 748.5759854316711 - }, - "roundtrip": { - "p50": 472.7039933204651, - "p90": 492.76798963546753, - "p95": 497.5360035896301, - "p99": 524.8640179634094 - }, - "isolatedSum": { - "p50": 497.3760098218918, - "p90": 523.1039971113205, - "p95": 531.4880013465881, - "p99": 1019.9679732322693 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 671.0079908370972, - "p90": 682.7840209007263, - "p95": 691.6159987449646, - "p99": 782.4000120162964 - }, - "combine": { - "p50": 788.0319952964783, - "p90": 803.0400276184082, - "p95": 810.4000091552734, - "p99": 879.2639970779419 - }, - "roundtrip": { - "p50": 1432.5439929962158, - "p90": 1457.2800397872925, - "p95": 1470.2719449996948, - "p99": 1641.3120031356812 - }, - "isolatedSum": { - "p50": 1459.0399861335754, - "p90": 1485.8240485191345, - "p95": 1502.016007900238, - "p99": 1661.6640090942383 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-520b6c38", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h200_b6aa6110", - "comparisonKey": "b89b8b0279afe699", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:30.997265+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272049186", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272049186", - "createdAt": "2026-06-27T00:03:30.997265+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.14399874210358, - "p90": 134.14399325847626, - "p95": 146.01600170135498, - "p99": 162.62400150299072 - }, - "combine": { - "p50": 112.92800307273865, - "p90": 121.11999839544296, - "p95": 126.68800354003906, - "p99": 141.50400459766388 - }, - "roundtrip": { - "p50": 214.30400013923645, - "p90": 228.28799486160278, - "p95": 232.57599771022797, - "p99": 247.48800694942474 - }, - "isolatedSum": { - "p50": 235.07200181484222, - "p90": 255.26399165391922, - "p95": 272.70400524139404, - "p99": 304.1280061006546 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 170.43200135231018, - "p90": 187.71199882030487, - "p95": 196.0960030555725, - "p99": 223.00800681114197 - }, - "combine": { - "p50": 163.87200355529785, - "p90": 181.60000443458557, - "p95": 186.36800348758698, - "p99": 197.02400267124176 - }, - "roundtrip": { - "p50": 303.8400113582611, - "p90": 328.000009059906, - "p95": 333.0560028553009, - "p99": 366.2079870700836 - }, - "isolatedSum": { - "p50": 334.30400490760803, - "p90": 369.31200325489044, - "p95": 382.4640065431595, - "p99": 420.0320094823837 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 235.83999276161194, - "p90": 244.6720004081726, - "p95": 248.86399507522583, - "p99": 265.4080092906952 - }, - "combine": { - "p50": 259.90399718284607, - "p90": 269.6639895439148, - "p95": 276.06400847435, - "p99": 299.0399897098541 - }, - "roundtrip": { - "p50": 473.471999168396, - "p90": 492.12801456451416, - "p95": 498.3679950237274, - "p99": 528.544008731842 - }, - "isolatedSum": { - "p50": 495.743989944458, - "p90": 514.3359899520874, - "p95": 524.9280035495758, - "p99": 564.4479990005493 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 381.087988615036, - "p90": 397.47199416160583, - "p95": 404.35200929641724, - "p99": 493.4079945087433 - }, - "combine": { - "p50": 437.27999925613403, - "p90": 450.8799910545349, - "p95": 458.3039879798889, - "p99": 476.25601291656494 - }, - "roundtrip": { - "p50": 790.5600070953369, - "p90": 804.9920201301575, - "p95": 813.9200210571289, - "p99": 841.5359854698181 - }, - "isolatedSum": { - "p50": 818.36798787117, - "p90": 848.3519852161407, - "p95": 862.6559972763062, - "p99": 969.6640074253082 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 672.1280217170715, - "p90": 685.0879788398743, - "p95": 689.9200081825256, - "p99": 743.4560060501099 - }, - "combine": { - "p50": 783.1360101699829, - "p90": 793.0560111999512, - "p95": 796.6399788856506, - "p99": 806.5599799156189 - }, - "roundtrip": { - "p50": 1425.7919788360596, - "p90": 1442.0160055160522, - "p95": 1455.4879665374756, - "p99": 1550.75204372406 - }, - "isolatedSum": { - "p50": 1455.2640318870544, - "p90": 1478.1439900398254, - "p95": 1486.5599870681763, - "p99": 1550.0159859657288 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1269.1839933395386, - "p90": 1284.1919660568237, - "p95": 1291.8720245361328, - "p99": 1339.2640352249146 - }, - "combine": { - "p50": 1472.8000164031982, - "p90": 1489.8879528045654, - "p95": 1502.17604637146, - "p99": 1692.639946937561 - }, - "roundtrip": { - "p50": 2711.7760181427, - "p90": 2730.015993118286, - "p95": 2753.5040378570557, - "p99": 2926.464080810547 - }, - "isolatedSum": { - "p50": 2741.984009742737, - "p90": 2774.079918861389, - "p95": 2794.048070907593, - "p99": 3031.9039821624756 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f5907eae", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3", - "colorKey": "h200_c5b3365a", - "comparisonKey": "d19848fb38a35ed8", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:57:20.998823+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "3dd868cb33839a3", - "workloadId": "set:3:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271855852", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271855852", - "createdAt": "2026-06-26T23:57:20.998823+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.72800236940384, - "p90": 127.48800218105316, - "p95": 133.91999900341034, - "p99": 146.11199498176575 - }, - "combine": { - "p50": 107.29599744081497, - "p90": 117.3119992017746, - "p95": 122.43200093507767, - "p99": 134.11200046539307 - }, - "roundtrip": { - "p50": 205.85599541664124, - "p90": 220.09600698947906, - "p95": 228.5120040178299, - "p99": 244.09599602222443 - }, - "isolatedSum": { - "p50": 225.0239998102188, - "p90": 244.80000138282776, - "p95": 256.351999938488, - "p99": 280.2239954471588 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 232.89600014686584, - "p90": 245.12000381946564, - "p95": 253.4399926662445, - "p99": 292.03200340270996 - }, - "combine": { - "p50": 245.34399807453156, - "p90": 260.25599241256714, - "p95": 269.27998661994934, - "p99": 297.37600684165955 - }, - "roundtrip": { - "p50": 454.68801259994507, - "p90": 472.6080000400543, - "p95": 486.6560101509094, - "p99": 522.4639773368835 - }, - "isolatedSum": { - "p50": 478.2399982213974, - "p90": 505.3759962320328, - "p95": 522.7199792861938, - "p99": 589.4080102443695 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 662.335991859436, - "p90": 673.632025718689, - "p95": 681.2160015106201, - "p99": 744.5759773254395 - }, - "combine": { - "p50": 772.5759744644165, - "p90": 791.8720245361328, - "p95": 806.6239953041077, - "p99": 855.2640080451965 - }, - "roundtrip": { - "p50": 1405.9840440750122, - "p90": 1435.2960586547852, - "p95": 1455.7119607925415, - "p99": 1716.3519859313965 - }, - "isolatedSum": { - "p50": 1434.9119663238525, - "p90": 1465.5040502548218, - "p95": 1487.8399968147278, - "p99": 1599.839985370636 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-75dcaec2", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", - "colorKey": "h200_c5b3365a", - "comparisonKey": "d19848fb38a35ed8", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:55.820445+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "bbcd1d9d8d1e4fe", - "workloadId": "set:6:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272093905", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272093905", - "createdAt": "2026-06-27T00:04:55.820445+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.14399808645248, - "p90": 130.17599284648895, - "p95": 135.5839967727661, - "p99": 147.07200229167938 - }, - "combine": { - "p50": 108.83200168609619, - "p90": 120.57600170373917, - "p95": 127.55200266838074, - "p99": 140.73599874973297 - }, - "roundtrip": { - "p50": 206.65599405765533, - "p90": 219.04000639915466, - "p95": 224.48000311851501, - "p99": 242.0479953289032 - }, - "isolatedSum": { - "p50": 226.97599977254868, - "p90": 250.75199455022812, - "p95": 263.13599944114685, - "p99": 287.80800104141235 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22650880, - "combineLogicalBytes": 22650880, - "fanoutMean": 1.54296875, - "recvTokensMax": 1024, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 155.45600652694702, - "p90": 165.95199704170227, - "p95": 173.92000555992126, - "p99": 202.39999890327454 - }, - "combine": { - "p50": 150.94399452209473, - "p90": 162.59199380874634, - "p95": 170.3680008649826, - "p99": 186.24000251293182 - }, - "roundtrip": { - "p50": 287.6800000667572, - "p90": 302.94400453567505, - "p95": 309.7279965877533, - "p99": 357.7919900417328 - }, - "isolatedSum": { - "p50": 306.40000104904175, - "p90": 328.5439908504486, - "p95": 344.28800642490387, - "p99": 388.64000141620636 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 45688832, - "combineLogicalBytes": 45688832, - "fanoutMean": 1.55615234375, - "recvTokensMax": 2048, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 230.9119999408722, - "p90": 242.11199581623077, - "p95": 249.66399371623993, - "p99": 269.8880136013031 - }, - "combine": { - "p50": 247.16800451278687, - "p90": 260.5760097503662, - "p95": 264.6400034427643, - "p99": 289.66400027275085 - }, - "roundtrip": { - "p50": 456.86399936676025, - "p90": 473.28001260757446, - "p95": 481.1519980430603, - "p99": 534.8799824714661 - }, - "isolatedSum": { - "p50": 478.08000445365906, - "p90": 502.688005566597, - "p95": 514.3039971590042, - "p99": 559.552013874054 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 91521024, - "combineLogicalBytes": 91521024, - "fanoutMean": 1.55859375, - "recvTokensMax": 4096, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 374.2719888687134, - "p90": 386.6559863090515, - "p95": 397.5679874420166, - "p99": 506.0480237007141 - }, - "combine": { - "p50": 423.1039881706238, - "p90": 436.0319972038269, - "p95": 440.8319890499115, - "p99": 470.97599506378174 - }, - "roundtrip": { - "p50": 771.232008934021, - "p90": 783.9679718017578, - "p95": 795.5520153045654, - "p99": 828.4800052642822 - }, - "isolatedSum": { - "p50": 797.3759770393372, - "p90": 822.6879835128784, - "p95": 838.3999764919281, - "p99": 977.0240187644958 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 183916544, - "combineLogicalBytes": 183916544, - "fanoutMean": 1.5660400390625, - "recvTokensMax": 8192, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 661.8559956550598, - "p90": 673.9199757575989, - "p95": 679.6159744262695, - "p99": 697.5039839744568 - }, - "combine": { - "p50": 770.6559896469116, - "p90": 781.1520099639893, - "p95": 786.7839932441711, - "p99": 830.560028553009 - }, - "roundtrip": { - "p50": 1405.791997909546, - "p90": 1421.280026435852, - "p95": 1432.2559833526611, - "p99": 1481.6319942474365 - }, - "isolatedSum": { - "p50": 1432.5119853019714, - "p90": 1455.0719857215881, - "p95": 1466.3999676704407, - "p99": 1528.0640125274658 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 368062464, - "combineLogicalBytes": 368062464, - "fanoutMean": 1.5670166015625, - "recvTokensMax": 16384, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1246.2400197982788, - "p90": 1261.631965637207, - "p95": 1269.5679664611816, - "p99": 1482.5600385665894 - }, - "combine": { - "p50": 1440.384030342102, - "p90": 1459.455966949463, - "p95": 1471.519947052002, - "p99": 1634.0479850769043 - }, - "roundtrip": { - "p50": 2662.400007247925, - "p90": 2688.096046447754, - "p95": 2712.4478816986084, - "p99": 2846.719980239868 - }, - "isolatedSum": { - "p50": 2686.624050140381, - "p90": 2721.08793258667, - "p95": 2741.0879135131836, - "p99": 3116.6080236434937 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 734720000, - "combineLogicalBytes": 734720000, - "fanoutMean": 1.56402587890625, - "recvTokensMax": 32768, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-9bcc6cfd", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", - "colorKey": "h200_06aa1194", - "comparisonKey": "fe01776775c5fb5e", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:05:23.968491+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "46855e7fa6754eb", - "workloadId": "set:6:1ca614e23cc66be1", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 7.38995361328125, - "eplbImbalanceAfter": 1.0000210716610862, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272097307", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272097307", - "createdAt": "2026-06-27T00:05:23.968491+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.79200285673141, - "p90": 122.94399738311768, - "p95": 127.96799838542938, - "p99": 138.33600282669067 - }, - "combine": { - "p50": 104.38399761915207, - "p90": 111.35999858379364, - "p95": 117.79200285673141, - "p99": 128.63999605178833 - }, - "roundtrip": { - "p50": 197.82400131225586, - "p90": 205.85599541664124, - "p95": 212.351992726326, - "p99": 252.86400318145752 - }, - "isolatedSum": { - "p50": 222.17600047588348, - "p90": 234.30399596691132, - "p95": 245.7600012421608, - "p99": 266.975998878479 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 79206400, - "combineLogicalBytes": 79206400, - "fanoutMean": 5.3955078125, - "recvTokensMax": 713, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 145.60000598430634, - "p90": 154.14400398731232, - "p95": 158.39999914169312, - "p99": 173.63199591636658 - }, - "combine": { - "p50": 145.6959992647171, - "p90": 150.56000649929047, - "p95": 155.2640050649643, - "p99": 165.56799411773682 - }, - "roundtrip": { - "p50": 267.520010471344, - "p90": 276.99199318885803, - "p95": 283.03998708724976, - "p99": 307.3599934577942 - }, - "isolatedSum": { - "p50": 291.29600524902344, - "p90": 304.7040104866028, - "p95": 313.6640042066574, - "p99": 339.1999900341034 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 159330304, - "combineLogicalBytes": 159330304, - "fanoutMean": 5.4267578125, - "recvTokensMax": 1436, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 204.79999482631683, - "p90": 213.85599672794342, - "p95": 218.27200055122375, - "p99": 238.52799832820892 - }, - "combine": { - "p50": 219.4879949092865, - "p90": 226.9439995288849, - "p95": 233.66400599479675, - "p99": 274.944007396698 - }, - "roundtrip": { - "p50": 400.160014629364, - "p90": 409.7279906272888, - "p95": 419.16799545288086, - "p99": 445.6320106983185 - }, - "isolatedSum": { - "p50": 424.28798973560333, - "p90": 440.7999962568283, - "p95": 451.9360065460205, - "p99": 513.4720057249069 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 319535104, - "combineLogicalBytes": 319535104, - "fanoutMean": 5.441650390625, - "recvTokensMax": 2897, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 317.7599906921387, - "p90": 327.87200808525085, - "p95": 340.06398916244507, - "p99": 393.3440148830414 - }, - "combine": { - "p50": 356.1600148677826, - "p90": 364.6079897880554, - "p95": 369.82399225234985, - "p99": 396.8319892883301 - }, - "roundtrip": { - "p50": 649.6959924697876, - "p90": 660.3519916534424, - "p95": 664.7040247917175, - "p99": 683.4239959716797 - }, - "isolatedSum": { - "p50": 673.9200055599213, - "p90": 692.4799978733063, - "p95": 709.8879814147949, - "p99": 790.1760041713715 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 638410752, - "combineLogicalBytes": 638410752, - "fanoutMean": 5.43603515625, - "recvTokensMax": 5815, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 541.1199927330017, - "p90": 549.8560070991516, - "p95": 555.4239749908447, - "p99": 643.6160206794739 - }, - "combine": { - "p50": 614.8800253868103, - "p90": 626.3039708137512, - "p95": 632.2240233421326, - "p99": 680.8639764785767 - }, - "roundtrip": { - "p50": 1131.7440271377563, - "p90": 1142.7839994430542, - "p95": 1148.192048072815, - "p99": 1196.768045425415 - }, - "isolatedSum": { - "p50": 1156.000018119812, - "p90": 1176.1599779129028, - "p95": 1187.6479983329773, - "p99": 1324.4799971580505 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1275144192, - "combineLogicalBytes": 1275144192, - "fanoutMean": 5.42889404296875, - "recvTokensMax": 11606, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1027.9680490493774, - "p90": 1046.720027923584, - "p95": 1055.4239749908447, - "p99": 1100.000023841858 - }, - "combine": { - "p50": 1124.384045600891, - "p90": 1135.9679698944092, - "p95": 1140.8640146255493, - "p99": 1170.9760427474976 - }, - "roundtrip": { - "p50": 2114.5920753479004, - "p90": 2138.495922088623, - "p95": 2152.127981185913, - "p99": 2480.2560806274414 - }, - "isolatedSum": { - "p50": 2152.3520946502686, - "p90": 2182.687997817993, - "p95": 2196.287989616394, - "p99": 2270.9760665893555 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2546374656, - "combineLogicalBytes": 2546374656, - "fanoutMean": 5.420562744140625, - "recvTokensMax": 23170, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e075077e", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", - "colorKey": "h200_6a794fcd", - "comparisonKey": "b6c24dab2941895d", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:10.125267+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-mild", - "routingLabel": "zipf-mild", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cf93f8f6b52e428", - "workloadId": "set:6:a224603e5a1640b8", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272065129", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272065129", - "createdAt": "2026-06-27T00:04:10.125267+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 126.52799487113953, - "p90": 140.3840035200119, - "p95": 146.17599546909332, - "p99": 177.08800733089447 - }, - "combine": { - "p50": 116.73600226640701, - "p90": 128.86400520801544, - "p95": 133.63200426101685, - "p99": 143.8719928264618 - }, - "roundtrip": { - "p50": 216.35200083255768, - "p90": 234.3360036611557, - "p95": 240.25599658489227, - "p99": 277.3120105266571 - }, - "isolatedSum": { - "p50": 243.26399713754654, - "p90": 269.24800872802734, - "p95": 279.80799973011017, - "p99": 320.96000015735626 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 70160384, - "combineLogicalBytes": 70160384, - "fanoutMean": 4.779296875, - "recvTokensMax": 987, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 163.96799683570862, - "p90": 176.256000995636, - "p95": 180.4479956626892, - "p99": 201.50400698184967 - }, - "combine": { - "p50": 160.41600704193115, - "p90": 173.0560064315796, - "p95": 178.3680021762848, - "p99": 186.75200641155243 - }, - "roundtrip": { - "p50": 298.94399642944336, - "p90": 319.487988948822, - "p95": 328.0960023403168, - "p99": 354.65601086616516 - }, - "isolatedSum": { - "p50": 324.38400387763977, - "p90": 349.3120074272156, - "p95": 358.815997838974, - "p99": 388.2560133934021 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 140879872, - "combineLogicalBytes": 140879872, - "fanoutMean": 4.79833984375, - "recvTokensMax": 1972, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 237.72799968719482, - "p90": 252.48000025749207, - "p95": 263.8719975948334, - "p99": 307.16800689697266 - }, - "combine": { - "p50": 262.1760070323944, - "p90": 279.1999876499176, - "p95": 284.7999930381775, - "p99": 311.8399977684021 - }, - "roundtrip": { - "p50": 477.82400250434875, - "p90": 500.70399045944214, - "p95": 516.5759921073914, - "p99": 701.632022857666 - }, - "isolatedSum": { - "p50": 499.90400671958923, - "p90": 531.6799879074097, - "p95": 548.6719906330109, - "p99": 619.0080046653748 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 282333184, - "combineLogicalBytes": 282333184, - "fanoutMean": 4.80810546875, - "recvTokensMax": 3936, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 378.495991230011, - "p90": 390.04799723625183, - "p95": 399.58399534225464, - "p99": 429.6320080757141 - }, - "combine": { - "p50": 439.9360120296478, - "p90": 452.2880017757416, - "p95": 457.15200901031494, - "p99": 474.047988653183 - }, - "roundtrip": { - "p50": 797.4079847335815, - "p90": 816.32000207901, - "p95": 828.6399841308594, - "p99": 955.839991569519 - }, - "isolatedSum": { - "p50": 818.4320032596588, - "p90": 842.3359990119934, - "p95": 856.7360043525696, - "p99": 903.6799967288971 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 566716416, - "combineLogicalBytes": 566716416, - "fanoutMean": 4.8255615234375, - "recvTokensMax": 7855, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 679.4559955596924, - "p90": 694.208025932312, - "p95": 704.255998134613, - "p99": 742.8159713745117 - }, - "combine": { - "p50": 780.7040214538574, - "p90": 795.1679825782776, - "p95": 804.7360181808472, - "p99": 879.7439932823181 - }, - "roundtrip": { - "p50": 1432.0640563964844, - "p90": 1453.279972076416, - "p95": 1465.8559560775757, - "p99": 1602.3039817810059 - }, - "isolatedSum": { - "p50": 1460.1600170135498, - "p90": 1489.3760085105896, - "p95": 1508.9920163154602, - "p99": 1622.5599646568298 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1132285952, - "combineLogicalBytes": 1132285952, - "fanoutMean": 4.8206787109375, - "recvTokensMax": 15694, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1287.071943283081, - "p90": 1304.8959970474243, - "p95": 1310.7839822769165, - "p99": 1432.2240352630615 - }, - "combine": { - "p50": 1463.6160135269165, - "p90": 1483.8080406188965, - "p95": 1511.7119550704956, - "p99": 1699.0400552749634 - }, - "roundtrip": { - "p50": 2723.9038944244385, - "p90": 2744.607925415039, - "p95": 2758.2719326019287, - "p99": 2967.616081237793 - }, - "isolatedSum": { - "p50": 2750.6879568099976, - "p90": 2788.704037666321, - "p95": 2822.495937347412, - "p99": 3131.264090538025 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2267840512, - "combineLogicalBytes": 2267840512, - "fanoutMean": 4.82763671875, - "recvTokensMax": 31357, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f4768a96", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", - "colorKey": "h200_b2ffaf91", - "comparisonKey": "d826aaa5f1321f31", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:16.163335+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_12", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-mild", - "routingLabel": "zipf-mild+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "27ddc85ded0add9", - "workloadId": "set:6:a224603e5a1640b8", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 2.545684814453125, - "eplbImbalanceAfter": 1.0001495361328125, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272068834", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272068834", - "createdAt": "2026-06-27T00:04:16.163335+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.07999759912491, - "p90": 128.63999605178833, - "p95": 134.62400436401367, - "p99": 156.2879979610443 - }, - "combine": { - "p50": 105.47199845314026, - "p90": 114.43199962377548, - "p95": 119.19999867677689, - "p99": 136.09600067138672 - }, - "roundtrip": { - "p50": 197.24799692630768, - "p90": 206.01600408554077, - "p95": 211.0079973936081, - "p99": 226.01599991321564 - }, - "isolatedSum": { - "p50": 223.55199605226517, - "p90": 243.0719956755638, - "p95": 253.82400304079056, - "p99": 292.38399863243103 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 78159872, - "combineLogicalBytes": 78159872, - "fanoutMean": 5.32421875, - "recvTokensMax": 702, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 149.08799529075623, - "p90": 157.27999806404114, - "p95": 161.56800091266632, - "p99": 172.83199727535248 - }, - "combine": { - "p50": 143.77599954605103, - "p90": 148.99200201034546, - "p95": 152.12799608707428, - "p99": 163.68000209331512 - }, - "roundtrip": { - "p50": 265.28000831604004, - "p90": 273.50398898124695, - "p95": 279.35999631881714, - "p99": 293.37599873542786 - }, - "isolatedSum": { - "p50": 292.86399483680725, - "p90": 306.2720000743866, - "p95": 313.6959969997406, - "p99": 336.5119993686676 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156563456, - "combineLogicalBytes": 156563456, - "fanoutMean": 5.33251953125, - "recvTokensMax": 1393, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 206.2080055475235, - "p90": 216.99200570583344, - "p95": 223.03999960422516, - "p99": 264.44798707962036 - }, - "combine": { - "p50": 225.40800273418427, - "p90": 233.37599635124207, - "p95": 238.65599930286407, - "p99": 253.56799364089966 - }, - "roundtrip": { - "p50": 404.4800102710724, - "p90": 415.2959883213043, - "p95": 423.552006483078, - "p99": 451.9039988517761 - }, - "isolatedSum": { - "p50": 431.61600828170776, - "p90": 450.3680020570755, - "p95": 461.69599890708923, - "p99": 518.01598072052 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312410112, - "combineLogicalBytes": 312410112, - "fanoutMean": 5.3203125, - "recvTokensMax": 2773, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 313.27998638153076, - "p90": 324.8960077762604, - "p95": 334.7199857234955, - "p99": 349.2160141468048 - }, - "combine": { - "p50": 357.05599188804626, - "p90": 370.59199810028076, - "p95": 381.4080059528351, - "p99": 418.43199729919434 - }, - "roundtrip": { - "p50": 643.7439918518066, - "p90": 656.0959815979004, - "p95": 666.2399768829346, - "p99": 702.9759883880615 - }, - "isolatedSum": { - "p50": 670.335978269577, - "p90": 695.4880058765411, - "p95": 716.1279916763306, - "p99": 767.6480114459991 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 622712832, - "combineLogicalBytes": 622712832, - "fanoutMean": 5.3023681640625, - "recvTokensMax": 5498, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 530.1439762115479, - "p90": 539.5519733428955, - "p95": 543.008029460907, - "p99": 568.9600110054016 - }, - "combine": { - "p50": 611.5840077400208, - "p90": 622.048020362854, - "p95": 629.2799711227417, - "p99": 677.5040030479431 - }, - "roundtrip": { - "p50": 1115.488052368164, - "p90": 1129.248023033142, - "p95": 1135.583996772766, - "p99": 1275.6479978561401 - }, - "isolatedSum": { - "p50": 1141.7279839515686, - "p90": 1161.5999937057495, - "p95": 1172.2880005836487, - "p99": 1246.4640140533447 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1245038592, - "combineLogicalBytes": 1245038592, - "fanoutMean": 5.30072021484375, - "recvTokensMax": 10955, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 986.1119985580444, - "p90": 1002.2720098495483, - "p95": 1011.0080242156982, - "p99": 1069.0239667892456 - }, - "combine": { - "p50": 1125.3440380096436, - "p90": 1136.6080045700073, - "p95": 1142.3360109329224, - "p99": 1163.8400554656982 - }, - "roundtrip": { - "p50": 2081.088066101074, - "p90": 2097.9840755462646, - "p95": 2111.0079288482666, - "p99": 2311.743974685669 - }, - "isolatedSum": { - "p50": 2111.456036567688, - "p90": 2138.8800144195557, - "p95": 2153.3440351486206, - "p99": 2232.864022254944 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2489460736, - "combineLogicalBytes": 2489460736, - "fanoutMean": 5.299407958984375, - "recvTokensMax": 21864, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e1ecd1d4", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", - "colorKey": "h200_f2b19f62", - "comparisonKey": "a7c9c0202574b9d0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:45.749249+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:6709a02c31933a9f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272079152", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272079152", - "createdAt": "2026-06-27T00:04:45.749249+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 122.94399738311768, - "p90": 134.20799374580383, - "p95": 138.87999951839447, - "p99": 150.87999403476715 - }, - "combine": { - "p50": 111.90400272607803, - "p90": 122.43200093507767, - "p95": 128.38399410247803, - "p99": 136.4479959011078 - }, - "roundtrip": { - "p50": 213.8880044221878, - "p90": 230.43200373649597, - "p95": 236.735999584198, - "p99": 261.4080011844635 - }, - "isolatedSum": { - "p50": 234.8480001091957, - "p90": 256.6399946808815, - "p95": 267.2639936208725, - "p99": 287.32798993587494 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 161.72799468040466, - "p90": 174.20800030231476, - "p95": 182.49599635601044, - "p99": 194.72000002861023 - }, - "combine": { - "p50": 158.27199816703796, - "p90": 174.8799979686737, - "p95": 179.58399653434753, - "p99": 191.26400351524353 - }, - "roundtrip": { - "p50": 296.9920039176941, - "p90": 319.0079927444458, - "p95": 327.2320032119751, - "p99": 340.03201127052307 - }, - "isolatedSum": { - "p50": 319.9999928474426, - "p90": 349.08799827098846, - "p95": 362.07999289035797, - "p99": 385.98400354385376 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 237.34399676322937, - "p90": 252.19199061393738, - "p95": 259.48798656463623, - "p99": 274.0800082683563 - }, - "combine": { - "p50": 260.44800877571106, - "p90": 278.2079875469208, - "p95": 284.7999930381775, - "p99": 298.880010843277 - }, - "roundtrip": { - "p50": 475.1040041446686, - "p90": 495.2319860458374, - "p95": 509.3119740486145, - "p99": 531.8080186843872 - }, - "isolatedSum": { - "p50": 497.79200553894043, - "p90": 530.3999781608582, - "p95": 544.2879796028137, - "p99": 572.9600191116333 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 379.4879913330078, - "p90": 389.60000872612, - "p95": 395.6800103187561, - "p99": 409.92000699043274 - }, - "combine": { - "p50": 438.1760060787201, - "p90": 452.06400752067566, - "p95": 457.69599080085754, - "p99": 494.59201097488403 - }, - "roundtrip": { - "p50": 794.2079901695251, - "p90": 809.7919821739197, - "p95": 823.6799836158752, - "p99": 875.6160140037537 - }, - "isolatedSum": { - "p50": 817.6639974117279, - "p90": 841.6640162467957, - "p95": 853.3760011196136, - "p99": 904.5120179653168 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 671.2319850921631, - "p90": 682.6879978179932, - "p95": 689.2480254173279, - "p99": 929.0879964828491 - }, - "combine": { - "p50": 786.7839932441711, - "p90": 799.1999983787537, - "p95": 804.2880296707153, - "p99": 833.6960077285767 - }, - "roundtrip": { - "p50": 1430.0800561904907, - "p90": 1449.9200582504272, - "p95": 1461.3120555877686, - "p99": 1667.8080558776855 - }, - "isolatedSum": { - "p50": 1458.0159783363342, - "p90": 1481.8879961967468, - "p95": 1493.5360550880432, - "p99": 1762.7840042114258 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1269.2480087280273, - "p90": 1284.5439910888672, - "p95": 1292.9919958114624, - "p99": 1424.064040184021 - }, - "combine": { - "p50": 1480.6400537490845, - "p90": 1504.7039985656738, - "p95": 1519.10400390625, - "p99": 1724.0320444107056 - }, - "roundtrip": { - "p50": 2719.4879055023193, - "p90": 2740.70405960083, - "p95": 2764.8000717163086, - "p99": 3076.0960578918457 - }, - "isolatedSum": { - "p50": 2749.888062477112, - "p90": 2789.247989654541, - "p95": 2812.0959997177124, - "p99": 3148.0960845947266 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f58892d6", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h200_bac4102c", - "comparisonKey": "402825358de599a6", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:04:49.601548+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf-moderate", - "routingLabel": "zipf-moderate+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:6709a02c31933a9f", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272082600", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272082600", - "createdAt": "2026-06-27T00:04:49.601548+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.72800236940384, - "p90": 136.25599443912506, - "p95": 140.8960074186325, - "p99": 185.34399569034576 - }, - "combine": { - "p50": 103.61599922180176, - "p90": 115.9679964184761, - "p95": 122.49600142240524, - "p99": 137.7599984407425 - }, - "roundtrip": { - "p50": 197.02400267124176, - "p90": 215.13600647449493, - "p95": 222.6240038871765, - "p99": 233.43999683856964 - }, - "isolatedSum": { - "p50": 221.3440015912056, - "p90": 252.22399085760117, - "p95": 263.39200884103775, - "p99": 323.10399413108826 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 145.21600306034088, - "p90": 166.1120057106018, - "p95": 175.1679927110672, - "p99": 194.91200149059296 - }, - "combine": { - "p50": 144.22400295734406, - "p90": 156.2879979610443, - "p95": 161.18399798870087, - "p99": 171.90399765968323 - }, - "roundtrip": { - "p50": 262.87999749183655, - "p90": 277.5999903678894, - "p95": 286.3999903202057, - "p99": 298.97600412368774 - }, - "isolatedSum": { - "p50": 289.44000601768494, - "p90": 322.4000036716461, - "p95": 336.35199069976807, - "p99": 366.8159991502762 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 203.3279985189438, - "p90": 218.36799383163452, - "p95": 226.1440008878708, - "p99": 242.8479939699173 - }, - "combine": { - "p50": 223.00800681114197, - "p90": 237.5359982252121, - "p95": 245.7599937915802, - "p99": 267.2959864139557 - }, - "roundtrip": { - "p50": 399.77601170539856, - "p90": 420.415997505188, - "p95": 433.1839978694916, - "p99": 505.40798902511597 - }, - "isolatedSum": { - "p50": 426.33600533008575, - "p90": 455.9039920568466, - "p95": 471.903994679451, - "p99": 510.143980383873 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 312.1280074119568, - "p90": 327.7119994163513, - "p95": 334.879994392395, - "p99": 400.4479944705963 - }, - "combine": { - "p50": 352.7680039405823, - "p90": 362.527996301651, - "p95": 367.6159977912903, - "p99": 386.0799968242645 - }, - "roundtrip": { - "p50": 641.1839723587036, - "p90": 658.1119894981384, - "p95": 666.0159826278687, - "p99": 719.5199728012085 - }, - "isolatedSum": { - "p50": 664.8960113525391, - "p90": 690.2399957180023, - "p95": 702.4959921836853, - "p99": 786.5279912948608 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 527.5200009346008, - "p90": 542.4320101737976, - "p95": 550.4639744758606, - "p99": 575.2959847450256 - }, - "combine": { - "p50": 620.3839778900146, - "p90": 633.5999965667725, - "p95": 639.2639875411987, - "p99": 673.8560199737549 - }, - "roundtrip": { - "p50": 1121.1520433425903, - "p90": 1137.0879411697388, - "p95": 1147.3599672317505, - "p99": 1174.7519969940186 - }, - "isolatedSum": { - "p50": 1147.9039788246155, - "p90": 1176.03200674057, - "p95": 1189.7279620170593, - "p99": 1249.1520047187805 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1003.3919811248779, - "p90": 1031.5200090408325, - "p95": 1040.4160022735596, - "p99": 1070.2400207519531 - }, - "combine": { - "p50": 1121.9840049743652, - "p90": 1135.7760429382324, - "p95": 1145.0239419937134, - "p99": 1167.8400039672852 - }, - "roundtrip": { - "p50": 2083.0399990081787, - "p90": 2113.568067550659, - "p95": 2122.431993484497, - "p99": 2277.791976928711 - }, - "isolatedSum": { - "p50": 2125.375986099243, - "p90": 2167.296051979065, - "p95": 2185.439944267273, - "p99": 2238.0800247192383 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8c2088d8", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", - "colorKey": "h200_1eda221e", - "comparisonKey": "6ee0b18a3e276ae1", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:03:37.741116+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272052634", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272052634", - "createdAt": "2026-06-27T00:03:37.741116+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 118.65600198507309, - "p90": 133.56800377368927, - "p95": 139.1039937734604, - "p99": 146.97599411010742 - }, - "combine": { - "p50": 104.3199971318245, - "p90": 118.01599711179733, - "p95": 121.76000326871872, - "p99": 131.77600502967834 - }, - "roundtrip": { - "p50": 197.02400267124176, - "p90": 214.75200355052948, - "p95": 219.67999637126923, - "p99": 230.97600042819977 - }, - "isolatedSum": { - "p50": 222.97599911689758, - "p90": 251.5840008854866, - "p95": 260.8639970421791, - "p99": 278.75199913978577 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 144.41600441932678, - "p90": 161.8880033493042, - "p95": 168.96000504493713, - "p99": 186.43200397491455 - }, - "combine": { - "p50": 143.19999516010284, - "p90": 153.08800339698792, - "p95": 157.4079990386963, - "p99": 164.60800170898438 - }, - "roundtrip": { - "p50": 262.87999749183655, - "p90": 275.32801032066345, - "p95": 282.4000120162964, - "p99": 291.00799560546875 - }, - "isolatedSum": { - "p50": 287.6159995794296, - "p90": 314.9760067462921, - "p95": 326.3680040836334, - "p99": 351.0400056838989 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 203.0079960823059, - "p90": 220.768004655838, - "p95": 227.55199670791626, - "p99": 253.63200902938843 - }, - "combine": { - "p50": 219.4879949092865, - "p90": 227.52000391483307, - "p95": 231.23200237751007, - "p99": 248.79999458789825 - }, - "roundtrip": { - "p50": 397.0560133457184, - "p90": 409.5039963722229, - "p95": 413.4719967842102, - "p99": 425.82398653030396 - }, - "isolatedSum": { - "p50": 422.4959909915924, - "p90": 448.2880085706711, - "p95": 458.78399908542633, - "p99": 502.4320036172867 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 311.5839958190918, - "p90": 334.52799916267395, - "p95": 339.2319977283478, - "p99": 353.88800501823425 - }, - "combine": { - "p50": 350.20801424980164, - "p90": 362.0480000972748, - "p95": 365.9839928150177, - "p99": 423.71198534965515 - }, - "roundtrip": { - "p50": 636.7999911308289, - "p90": 650.1439809799194, - "p95": 654.2080044746399, - "p99": 711.4560008049011 - }, - "isolatedSum": { - "p50": 661.7920100688934, - "p90": 696.5759992599487, - "p95": 705.2159905433655, - "p99": 777.5999903678894 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 528.3839702606201, - "p90": 545.7599759101868, - "p95": 551.9999861717224, - "p99": 572.2879767417908 - }, - "combine": { - "p50": 608.959972858429, - "p90": 620.9920048713684, - "p95": 626.1119842529297, - "p99": 657.0559740066528 - }, - "roundtrip": { - "p50": 1110.2720499038696, - "p90": 1125.0239610671997, - "p95": 1132.032036781311, - "p99": 1183.0079555511475 - }, - "isolatedSum": { - "p50": 1137.343943119049, - "p90": 1166.7519807815552, - "p95": 1178.111970424652, - "p99": 1229.3439507484436 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 994.4639801979065, - "p90": 1016.1600112915039, - "p95": 1023.9039659500122, - "p99": 1042.0479774475098 - }, - "combine": { - "p50": 1103.2960414886475, - "p90": 1116.2559986114502, - "p95": 1121.7600107192993, - "p99": 1139.4879817962646 - }, - "roundtrip": { - "p50": 2056.544065475464, - "p90": 2077.9199600219727, - "p95": 2088.671922683716, - "p99": 2251.3279914855957 - }, - "isolatedSum": { - "p50": 2097.760021686554, - "p90": 2132.416009902954, - "p95": 2145.6639766693115, - "p99": 2181.5359592437744 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8e568434", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_c851a534", - "comparisonKey": "1f9e00010b0d6e5b", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:29:59.726916+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254392935", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", - "createdAt": "2026-06-26T17:29:59.726916+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 116.44800007343292, - "p90": 126.97599828243256, - "p95": 137.92000710964203, - "p99": 159.96800363063812 - }, - "combine": { - "p50": 103.55199873447418, - "p90": 113.11999708414078, - "p95": 120.80000340938568, - "p99": 147.10399508476257 - }, - "roundtrip": { - "p50": 194.62400674819946, - "p90": 208.19200575351715, - "p95": 215.39199352264404, - "p99": 238.75199258327484 - }, - "isolatedSum": { - "p50": 219.9999988079071, - "p90": 240.09599536657333, - "p95": 258.7200105190277, - "p99": 307.0719987154007 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 146.97599411010742, - "p90": 163.07200491428375, - "p95": 171.77599668502808, - "p99": 191.42399728298187 - }, - "combine": { - "p50": 142.84799993038177, - "p90": 154.78399395942688, - "p95": 165.12000560760498, - "p99": 172.28800058364868 - }, - "roundtrip": { - "p50": 267.0080065727234, - "p90": 288.9600098133087, - "p95": 295.77600955963135, - "p99": 315.71200489997864 - }, - "isolatedSum": { - "p50": 289.8239940404892, - "p90": 317.85599887371063, - "p95": 336.89600229263306, - "p99": 363.71199786663055 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 207.68000185489655, - "p90": 228.64000499248505, - "p95": 236.92800104618073, - "p99": 267.90401339530945 - }, - "combine": { - "p50": 210.36800742149353, - "p90": 225.0239998102188, - "p95": 234.68799889087677, - "p99": 271.58400416374207 - }, - "roundtrip": { - "p50": 390.49598574638367, - "p90": 413.37600350379944, - "p95": 420.28799653053284, - "p99": 449.8240053653717 - }, - "isolatedSum": { - "p50": 418.0480092763901, - "p90": 453.66400480270386, - "p95": 471.6159999370575, - "p99": 539.4880175590515 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 324.8960077762604, - "p90": 341.5679931640625, - "p95": 351.4559864997864, - "p99": 364.73599076271057 - }, - "combine": { - "p50": 328.0960023403168, - "p90": 339.6480083465576, - "p95": 345.95200419425964, - "p99": 362.8480136394501 - }, - "roundtrip": { - "p50": 628.9600133895874, - "p90": 643.231987953186, - "p95": 649.3120193481445, - "p99": 664.3199920654297 - }, - "isolatedSum": { - "p50": 652.9920101165771, - "p90": 681.2160015106201, - "p95": 697.407990694046, - "p99": 727.5840044021606 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 566.3679838180542, - "p90": 581.0880064964294, - "p95": 587.2960090637207, - "p99": 609.1520190238953 - }, - "combine": { - "p50": 560.9920024871826, - "p90": 573.0559825897217, - "p95": 578.2399773597717, - "p99": 609.7279787063599 - }, - "roundtrip": { - "p50": 1097.3440408706665, - "p90": 1114.400029182434, - "p95": 1121.791958808899, - "p99": 1286.6239547729492 - }, - "isolatedSum": { - "p50": 1127.3599863052368, - "p90": 1154.1439890861511, - "p95": 1165.5359864234924, - "p99": 1218.8799977302551 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1051.8079996109009, - "p90": 1067.8720474243164, - "p95": 1078.271985054016, - "p99": 1161.4079475402832 - }, - "combine": { - "p50": 1028.9920568466187, - "p90": 1044.0959930419922, - "p95": 1054.4320344924927, - "p99": 1218.783974647522 - }, - "roundtrip": { - "p50": 2049.3760108947754, - "p90": 2068.4800148010254, - "p95": 2079.200029373169, - "p99": 2593.600034713745 - }, - "isolatedSum": { - "p50": 2080.8000564575195, - "p90": 2111.9680404663086, - "p95": 2132.704019546509, - "p99": 2380.191922187805 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-6764a75f", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39", - "colorKey": "h200_a1e795ec", - "comparisonKey": "5a22622d9db14749", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:54.944678+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_8", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0a3064a2af0dd39", - "workloadId": "set:6:2dad1a73ff872905", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254443915", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915", - "createdAt": "2026-06-26T17:30:54.944678+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 133.88800621032715, - "p90": 147.16799557209015, - "p95": 159.5200002193451, - "p99": 177.76000499725342 - }, - "combine": { - "p50": 119.39200013875961, - "p90": 131.80799782276154, - "p95": 139.74399864673615, - "p99": 152.48000621795654 - }, - "roundtrip": { - "p50": 227.64800488948822, - "p90": 249.05599653720856, - "p95": 255.74401021003723, - "p99": 274.3679881095886 - }, - "isolatedSum": { - "p50": 253.28000634908676, - "p90": 278.9759933948517, - "p95": 299.26399886608124, - "p99": 330.24001121520996 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 117440512, - "combineLogicalBytes": 117440512, - "fanoutMean": 8, - "recvTokensMax": 1024, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 176.54399573802948, - "p90": 188.4160041809082, - "p95": 203.07199656963348, - "p99": 299.8400032520294 - }, - "combine": { - "p50": 169.91999745368958, - "p90": 175.48799514770508, - "p95": 180.16000092029572, - "p99": 187.51999735832214 - }, - "roundtrip": { - "p50": 319.4560110569, - "p90": 328.7679851055145, - "p95": 336.32001280784607, - "p99": 355.0400137901306 - }, - "isolatedSum": { - "p50": 346.46399319171906, - "p90": 363.9039993286133, - "p95": 383.2319974899292, - "p99": 487.36000061035156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 234881024, - "combineLogicalBytes": 234881024, - "fanoutMean": 8, - "recvTokensMax": 2048, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 269.567996263504, - "p90": 288.12798857688904, - "p95": 294.048011302948, - "p99": 315.3280019760132 - }, - "combine": { - "p50": 262.0159983634949, - "p90": 282.1120023727417, - "p95": 286.5920066833496, - "p99": 306.11199140548706 - }, - "roundtrip": { - "p50": 505.7920217514038, - "p90": 531.9039821624756, - "p95": 535.7760190963745, - "p99": 544.6720123291016 - }, - "isolatedSum": { - "p50": 531.5839946269989, - "p90": 570.2399909496307, - "p95": 580.6400179862976, - "p99": 621.4399933815002 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 469762048, - "combineLogicalBytes": 469762048, - "fanoutMean": 8, - "recvTokensMax": 4096, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 443.87200474739075, - "p90": 459.55199003219604, - "p95": 467.74399280548096, - "p99": 487.199991941452 - }, - "combine": { - "p50": 427.64800786972046, - "p90": 442.81598925590515, - "p95": 451.58401131629944, - "p99": 483.13599824905396 - }, - "roundtrip": { - "p50": 844.7999954223633, - "p90": 860.0640296936035, - "p95": 867.0719861984253, - "p99": 924.67200756073 - }, - "isolatedSum": { - "p50": 871.5200126171112, - "p90": 902.3679792881012, - "p95": 919.3280041217804, - "p99": 970.335990190506 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 939524096, - "combineLogicalBytes": 939524096, - "fanoutMean": 8, - "recvTokensMax": 8192, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 806.1119914054871, - "p90": 823.7119913101196, - "p95": 832.4480056762695, - "p99": 892.3199772834778 - }, - "combine": { - "p50": 758.9120268821716, - "p90": 777.1199941635132, - "p95": 790.3040051460266, - "p99": 827.3919820785522 - }, - "roundtrip": { - "p50": 1534.5920324325562, - "p90": 1550.75204372406, - "p95": 1561.3759756088257, - "p99": 1597.9520082473755 - }, - "isolatedSum": { - "p50": 1565.0240182876587, - "p90": 1600.8319854736328, - "p95": 1622.7520108222961, - "p99": 1719.71195936203 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1879048192, - "combineLogicalBytes": 1879048192, - "fanoutMean": 8, - "recvTokensMax": 16384, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1534.7520112991333, - "p90": 1552.4159669876099, - "p95": 1570.9120035171509, - "p99": 1686.7519617080688 - }, - "combine": { - "p50": 1415.2640104293823, - "p90": 1439.2000436782837, - "p95": 1449.120044708252, - "p99": 1643.1679725646973 - }, - "roundtrip": { - "p50": 2922.528028488159, - "p90": 2943.743944168091, - "p95": 2957.535982131958, - "p99": 3040.5759811401367 - }, - "isolatedSum": { - "p50": 2950.0160217285156, - "p90": 2991.6160106658936, - "p95": 3020.032048225403, - "p99": 3329.919934272766 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3758096384, - "combineLogicalBytes": 3758096384, - "fanoutMean": 8, - "recvTokensMax": 32768, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e63750d6", - "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86", - "colorKey": "h200_0a93a01f", - "comparisonKey": "f4911d0a95d49c62", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:31:03.582434+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_0", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "b5217e990b95f86", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254452252", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252", - "createdAt": "2026-06-26T17:31:03.582434+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 120.86399644613266, - "p90": 133.53599607944489, - "p95": 138.5280042886734, - "p99": 154.01600301265717 - }, - "combine": { - "p50": 112.64000087976456, - "p90": 124.86399710178375, - "p95": 130.5599957704544, - "p99": 142.7839994430542 - }, - "roundtrip": { - "p50": 213.47199380397797, - "p90": 229.72799837589264, - "p95": 238.68800699710846, - "p99": 280.8000147342682 - }, - "isolatedSum": { - "p50": 233.50399732589722, - "p90": 258.39999318122864, - "p95": 269.0880000591278, - "p99": 296.80000245571136 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 49946624, - "combineLogicalBytes": 49946624, - "fanoutMean": 3.40234375, - "recvTokensMax": 1022, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 163.55200111865997, - "p90": 173.7920045852661, - "p95": 181.66400492191315, - "p99": 202.87999510765076 - }, - "combine": { - "p50": 156.54399991035461, - "p90": 170.9119975566864, - "p95": 178.20799350738525, - "p99": 194.62400674819946 - }, - "roundtrip": { - "p50": 297.1839904785156, - "p90": 314.65598940849304, - "p95": 321.02400064468384, - "p99": 352.28800773620605 - }, - "isolatedSum": { - "p50": 320.0960010290146, - "p90": 344.7040021419525, - "p95": 359.8719984292984, - "p99": 397.5040018558502 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 100509696, - "combineLogicalBytes": 100509696, - "fanoutMean": 3.42333984375, - "recvTokensMax": 2046, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 237.56800591945648, - "p90": 246.72000110149384, - "p95": 252.44799256324768, - "p99": 262.2720003128052 - }, - "combine": { - "p50": 242.3039972782135, - "p90": 256.99201226234436, - "p95": 264.5759880542755, - "p99": 294.17601227760315 - }, - "roundtrip": { - "p50": 457.5679898262024, - "p90": 477.27999091148376, - "p95": 485.6959879398346, - "p99": 519.9679732322693 - }, - "isolatedSum": { - "p50": 479.87200319767, - "p90": 503.7120133638382, - "p95": 517.0239806175232, - "p99": 556.4480125904083 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 201678848, - "combineLogicalBytes": 201678848, - "fanoutMean": 3.4345703125, - "recvTokensMax": 4094, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 384.0320110321045, - "p90": 394.9120044708252, - "p95": 400.2879858016968, - "p99": 411.77600622177124 - }, - "combine": { - "p50": 408.2239866256714, - "p90": 420.22401094436646, - "p95": 427.39200592041016, - "p99": 457.5679898262024 - }, - "roundtrip": { - "p50": 765.9199833869934, - "p90": 785.9519720077515, - "p95": 798.2079982757568, - "p99": 844.543993473053 - }, - "isolatedSum": { - "p50": 792.2559976577759, - "p90": 815.1360154151917, - "p95": 827.6799917221069, - "p99": 869.3439960479736 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 405035008, - "combineLogicalBytes": 405035008, - "fanoutMean": 3.4488525390625, - "recvTokensMax": 8189, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 669.6959733963013, - "p90": 682.3359727859497, - "p95": 689.0559792518616, - "p99": 731.8080067634583 - }, - "combine": { - "p50": 727.1360158920288, - "p90": 740.4800057411194, - "p95": 746.783971786499, - "p99": 762.8480195999146 - }, - "roundtrip": { - "p50": 1366.0800457000732, - "p90": 1389.631986618042, - "p95": 1405.6639671325684, - "p99": 1561.8239641189575 - }, - "isolatedSum": { - "p50": 1396.83198928833, - "p90": 1422.815978527069, - "p95": 1435.8399510383606, - "p99": 1494.6560263633728 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 808822784, - "combineLogicalBytes": 808822784, - "fanoutMean": 3.44354248046875, - "recvTokensMax": 16380, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1259.1999769210815, - "p90": 1273.1839418411255, - "p95": 1278.5600423812866, - "p99": 1390.463948249817 - }, - "combine": { - "p50": 1366.8160438537598, - "p90": 1383.2319974899292, - "p95": 1391.2960290908813, - "p99": 1428.5119771957397 - }, - "roundtrip": { - "p50": 2598.0799198150635, - "p90": 2617.0880794525146, - "p95": 2628.2238960266113, - "p99": 2879.9679279327393 - }, - "isolatedSum": { - "p50": 2626.0160207748413, - "p90": 2656.4159393310547, - "p95": 2669.856071472168, - "p99": 2818.9759254455566 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1619795968, - "combineLogicalBytes": 1619795968, - "fanoutMean": 3.4481201171875, - "recvTokensMax": 32761, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-353049ec", - "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39", - "colorKey": "h200_993777bf", - "comparisonKey": "cb74cc9ee6130bb2", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:47:04.200207+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2b57a75d27f5b39", - "workloadId": "set:6:830e36e88869e222", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.895263671875, - "eplbImbalanceAfter": 1.0000902811686199, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28255303840", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840", - "createdAt": "2026-06-26T17:47:04.200207+00:00", - "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 117.40799993276596, - "p90": 132.54399597644806, - "p95": 140.06400108337402, - "p99": 154.27200496196747 - }, - "combine": { - "p50": 104.3199971318245, - "p90": 118.04799735546112, - "p95": 123.99999797344208, - "p99": 158.75199437141418 - }, - "roundtrip": { - "p50": 193.9840018749237, - "p90": 207.68000185489655, - "p95": 215.61600267887115, - "p99": 244.6720004081726 - }, - "isolatedSum": { - "p50": 221.72799706459045, - "p90": 250.59199333190918, - "p95": 264.0639990568161, - "p99": 313.02399933338165 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77385728, - "combineLogicalBytes": 77385728, - "fanoutMean": 5.271484375, - "recvTokensMax": 691, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 146.68799936771393, - "p90": 160.64000129699707, - "p95": 169.855996966362, - "p99": 192.06400215625763 - }, - "combine": { - "p50": 142.91200041770935, - "p90": 152.0320028066635, - "p95": 157.98400342464447, - "p99": 178.0479997396469 - }, - "roundtrip": { - "p50": 266.1440074443817, - "p90": 278.7199914455414, - "p95": 285.6000065803528, - "p99": 310.43198704719543 - }, - "isolatedSum": { - "p50": 289.5999997854233, - "p90": 312.6720041036606, - "p95": 327.84000039100647, - "p99": 370.11200189590454 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155172864, - "combineLogicalBytes": 155172864, - "fanoutMean": 5.28515625, - "recvTokensMax": 1378, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 208.12800526618958, - "p90": 229.8559993505478, - "p95": 237.34399676322937, - "p99": 272.5760042667389 - }, - "combine": { - "p50": 210.62399446964264, - "p90": 222.75200486183167, - "p95": 228.99200022220612, - "p99": 251.45599246025085 - }, - "roundtrip": { - "p50": 391.4879858493805, - "p90": 413.05598616600037, - "p95": 424.54400658607483, - "p99": 474.047988653183 - }, - "isolatedSum": { - "p50": 418.7519997358322, - "p90": 452.60800421237946, - "p95": 466.3359969854355, - "p99": 524.0319967269897 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 310546432, - "combineLogicalBytes": 310546432, - "fanoutMean": 5.28857421875, - "recvTokensMax": 2745, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 322.7840065956116, - "p90": 342.78398752212524, - "p95": 351.6800105571747, - "p99": 378.2399892807007 - }, - "combine": { - "p50": 330.1439881324768, - "p90": 345.0239896774292, - "p95": 349.8559892177582, - "p99": 379.13599610328674 - }, - "roundtrip": { - "p50": 626.2080073356628, - "p90": 646.8480229377747, - "p95": 661.1520051956177, - "p99": 823.4559893608093 - }, - "isolatedSum": { - "p50": 652.9279947280884, - "p90": 687.8079771995544, - "p95": 701.5359997749329, - "p99": 757.3759853839874 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 620619776, - "combineLogicalBytes": 620619776, - "fanoutMean": 5.2845458984375, - "recvTokensMax": 5526, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 569.7280168533325, - "p90": 585.7920050621033, - "p95": 596.2240099906921, - "p99": 690.7520294189453 - }, - "combine": { - "p50": 569.1199898719788, - "p90": 583.1040143966675, - "p95": 591.0400152206421, - "p99": 609.503984451294 - }, - "roundtrip": { - "p50": 1109.8560094833374, - "p90": 1127.8719902038574, - "p95": 1138.335943222046, - "p99": 1191.648006439209 - }, - "isolatedSum": { - "p50": 1138.8480067253113, - "p90": 1168.8960194587708, - "p95": 1187.2640252113342, - "p99": 1300.2560138702393 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239175168, - "combineLogicalBytes": 1239175168, - "fanoutMean": 5.2757568359375, - "recvTokensMax": 11165, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1082.5920104980469, - "p90": 1103.16801071167, - "p95": 1116.927981376648, - "p99": 1311.8400573730469 - }, - "combine": { - "p50": 1018.3039903640747, - "p90": 1032.4480533599854, - "p95": 1047.5200414657593, - "p99": 1417.472004890442 - }, - "roundtrip": { - "p50": 2072.60799407959, - "p90": 2096.7679023742676, - "p95": 2112.7359867095947, - "p99": 2388.000011444092 - }, - "isolatedSum": { - "p50": 2100.8960008621216, - "p90": 2135.6160640716553, - "p95": 2164.448022842407, - "p99": 2729.3120622634888 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2481604608, - "combineLogicalBytes": 2481604608, - "fanoutMean": 5.282684326171875, - "recvTokensMax": 22165, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-5c3f9114", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_edd92e38", - "comparisonKey": "696a49bd5b0de953", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:13.181201+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254409438", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438", - "createdAt": "2026-06-26T17:30:13.181201+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 104.032002389431, - "p90": 116.12799763679504, - "p95": 120.83200365304947, - "p99": 131.00799918174744 - }, - "combine": { - "p50": 103.07200253009796, - "p90": 115.167997777462, - "p95": 120.95999717712402, - "p99": 125.76000392436981 - }, - "roundtrip": { - "p50": 182.23999440670013, - "p90": 196.48000597953796, - "p95": 200.095996260643, - "p99": 249.7600018978119 - }, - "isolatedSum": { - "p50": 207.10400491952896, - "p90": 231.29599541425705, - "p95": 241.7920008301735, - "p99": 256.76800310611725 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 136.03200018405914, - "p90": 151.96800231933594, - "p95": 158.4639996290207, - "p99": 170.68800330162048 - }, - "combine": { - "p50": 142.59199798107147, - "p90": 157.53600001335144, - "p95": 161.18399798870087, - "p99": 179.6800047159195 - }, - "roundtrip": { - "p50": 252.8960108757019, - "p90": 265.28000831604004, - "p95": 271.232008934021, - "p99": 293.4400141239166 - }, - "isolatedSum": { - "p50": 278.6239981651306, - "p90": 309.5040023326874, - "p95": 319.64799761772156, - "p99": 350.36800801754 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 194.87999379634857, - "p90": 210.33599972724915, - "p95": 215.87200462818146, - "p99": 243.9039945602417 - }, - "combine": { - "p50": 208.064004778862, - "p90": 222.04799950122833, - "p95": 230.14399409294128, - "p99": 255.42399287223816 - }, - "roundtrip": { - "p50": 378.84798645973206, - "p90": 394.9120044708252, - "p95": 405.5039882659912, - "p99": 434.27199125289917 - }, - "isolatedSum": { - "p50": 402.94399857521057, - "p90": 432.3839992284775, - "p95": 446.01599872112274, - "p99": 499.32798743247986 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 312.99200654029846, - "p90": 334.1119885444641, - "p95": 342.9119884967804, - "p99": 389.15199041366577 - }, - "combine": { - "p50": 326.1120021343231, - "p90": 339.35999870300293, - "p95": 347.3280072212219, - "p99": 393.0560052394867 - }, - "roundtrip": { - "p50": 614.0159964561462, - "p90": 628.4800171852112, - "p95": 635.7759833335876, - "p99": 708.4479928016663 - }, - "isolatedSum": { - "p50": 639.1040086746216, - "p90": 673.471987247467, - "p95": 690.2399957180023, - "p99": 782.2079956531525 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 549.3760108947754, - "p90": 563.264012336731, - "p95": 569.2480206489563, - "p99": 593.1519865989685 - }, - "combine": { - "p50": 560.8000159263611, - "p90": 573.2799768447876, - "p95": 579.8400044441223, - "p99": 591.871976852417 - }, - "roundtrip": { - "p50": 1080.9600353240967, - "p90": 1097.5359678268433, - "p95": 1106.0800552368164, - "p99": 1136.512041091919 - }, - "isolatedSum": { - "p50": 1110.1760268211365, - "p90": 1136.5439891815186, - "p95": 1149.0880250930786, - "p99": 1185.0239634513855 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1017.7919864654541, - "p90": 1032.1600437164307, - "p95": 1039.6480560302734, - "p99": 1061.1519813537598 - }, - "combine": { - "p50": 1013.0879878997803, - "p90": 1025.823950767517, - "p95": 1031.775951385498, - "p99": 1097.7599620819092 - }, - "roundtrip": { - "p50": 2001.5358924865723, - "p90": 2015.7439708709717, - "p95": 2029.7598838806152, - "p99": 2119.1039085388184 - }, - "isolatedSum": { - "p50": 2030.8799743652344, - "p90": 2057.9839944839478, - "p95": 2071.4240074157715, - "p99": 2158.911943435669 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e1047fdc", - "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_76bb7d5d", - "comparisonKey": "174936235ac15d2c", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:49:44.261568+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_2", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · bf16 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271611947", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271611947", - "createdAt": "2026-06-26T23:49:44.261568+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 104.3199971318245, - "p90": 121.50400131940842, - "p95": 125.50400197505951, - "p99": 141.76000654697418 - }, - "combine": { - "p50": 104.032002389431, - "p90": 119.71200257539749, - "p95": 123.96799772977829, - "p99": 145.4080045223236 - }, - "roundtrip": { - "p50": 184.4799965620041, - "p90": 197.24799692630768, - "p95": 202.11200416088104, - "p99": 221.91999852657318 - }, - "isolatedSum": { - "p50": 208.3519995212555, - "p90": 241.2160038948059, - "p95": 249.4719997048378, - "p99": 287.1680110692978 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 134.49600338935852, - "p90": 149.59999918937683, - "p95": 156.63999319076538, - "p99": 199.0080028772354 - }, - "combine": { - "p50": 143.71199905872345, - "p90": 156.51200711727142, - "p95": 161.6639941930771, - "p99": 174.14399981498718 - }, - "roundtrip": { - "p50": 254.88001108169556, - "p90": 277.50399708747864, - "p95": 284.09600257873535, - "p99": 315.20000100135803 - }, - "isolatedSum": { - "p50": 278.20800244808197, - "p90": 306.11200630664825, - "p95": 318.30398738384247, - "p99": 373.1520026922226 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 192.89599359035492, - "p90": 207.39200711250305, - "p95": 213.53599429130554, - "p99": 229.8240065574646 - }, - "combine": { - "p50": 222.88000583648682, - "p90": 239.77600038051605, - "p95": 244.06400322914124, - "p99": 276.16000175476074 - }, - "roundtrip": { - "p50": 388.51198554039, - "p90": 405.08800745010376, - "p95": 412.6400053501129, - "p99": 470.43201327323914 - }, - "isolatedSum": { - "p50": 415.77599942684174, - "p90": 447.1680074930191, - "p95": 457.5999975204468, - "p99": 505.98400831222534 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 304.32000756263733, - "p90": 328.2560110092163, - "p95": 334.6239924430847, - "p99": 354.8159897327423 - }, - "combine": { - "p50": 352.35199332237244, - "p90": 364.1279935836792, - "p95": 372.44799733161926, - "p99": 391.80800318717957 - }, - "roundtrip": { - "p50": 630.1760077476501, - "p90": 646.7840075492859, - "p95": 655.135989189148, - "p99": 679.5520186424255 - }, - "isolatedSum": { - "p50": 656.6720008850098, - "p90": 692.3840045928955, - "p95": 707.071989774704, - "p99": 746.6239929199219 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 515.2000188827515, - "p90": 530.0800204277039, - "p95": 538.9119982719421, - "p99": 611.7119789123535 - }, - "combine": { - "p50": 611.2319827079773, - "p90": 623.5520243644714, - "p95": 633.2160234451294, - "p99": 764.1919851303101 - }, - "roundtrip": { - "p50": 1099.4880199432373, - "p90": 1118.4959411621094, - "p95": 1131.1999559402466, - "p99": 1154.2079448699951 - }, - "isolatedSum": { - "p50": 1126.4320015907288, - "p90": 1153.6320447921753, - "p95": 1172.1280217170715, - "p99": 1375.9039640426636 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 964.959979057312, - "p90": 992.2879934310913, - "p95": 1000.3199577331543, - "p99": 1034.4959497451782 - }, - "combine": { - "p50": 1105.7920455932617, - "p90": 1125.1840591430664, - "p95": 1137.5679969787598, - "p99": 1247.26402759552 - }, - "roundtrip": { - "p50": 2036.895990371704, - "p90": 2068.3839321136475, - "p95": 2084.383964538574, - "p99": 2168.4799194335938 - }, - "isolatedSum": { - "p50": 2070.7520246505737, - "p90": 2117.4720525741577, - "p95": 2137.887954711914, - "p99": 2281.7599773406982 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e384c8f8", - "identity": "h200|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h200_9979edfc", - "comparisonKey": "ca4b77cbfe002bae", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:27.799131+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_11", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287507619", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287507619", - "createdAt": "2026-06-27T11:14:27.799131+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 77.2159993648529, - "p90": 103.67999970912933, - "p95": 112.0000034570694, - "p99": 133.63200426101685 - }, - "combine": { - "p50": 73.72800260782242, - "p90": 91.71199798583984, - "p95": 96.99200093746185, - "p99": 107.45599865913391 - }, - "roundtrip": { - "p50": 171.1679995059967, - "p90": 215.87200462818146, - "p95": 231.36000335216522, - "p99": 281.3119888305664 - }, - "isolatedSum": { - "p50": 150.94400197267532, - "p90": 195.39199769496918, - "p95": 208.99200439453125, - "p99": 241.08800292015076 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 92.32000261545181, - "p90": 114.88000303506851, - "p95": 124.54400211572647, - "p99": 145.6959992647171 - }, - "combine": { - "p50": 98.78399968147278, - "p90": 115.99999666213989, - "p95": 121.76000326871872, - "p99": 152.92799472808838 - }, - "roundtrip": { - "p50": 223.29600155353546, - "p90": 252.16001272201538, - "p95": 263.90400528907776, - "p99": 281.72799944877625 - }, - "isolatedSum": { - "p50": 191.1040022969246, - "p90": 230.8799996972084, - "p95": 246.3040053844452, - "p99": 298.6239939928055 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 120.38400024175644, - "p90": 147.77599275112152, - "p95": 158.78400206565857, - "p99": 194.87999379634857 - }, - "combine": { - "p50": 148.44800531864166, - "p90": 163.71199488639832, - "p95": 171.6800034046173, - "p99": 186.8479996919632 - }, - "roundtrip": { - "p50": 343.9359962940216, - "p90": 367.64800548553467, - "p95": 382.9120099544525, - "p99": 435.84001064300537 - }, - "isolatedSum": { - "p50": 268.8320055603981, - "p90": 311.48798763751984, - "p95": 330.4640054702759, - "p99": 381.72799348831177 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 174.97600615024567, - "p90": 203.07199656963348, - "p95": 215.71199595928192, - "p99": 236.76800727844238 - }, - "combine": { - "p50": 243.68000030517578, - "p90": 257.6960027217865, - "p95": 264.16000723838806, - "p99": 295.26400566101074 - }, - "roundtrip": { - "p50": 581.7599892616272, - "p90": 607.3920130729675, - "p95": 614.687979221344, - "p99": 658.847987651825 - }, - "isolatedSum": { - "p50": 418.65600645542145, - "p90": 460.76799929142, - "p95": 479.87200319767, - "p99": 532.0320129394531 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 281.1200022697449, - "p90": 304.1599988937378, - "p95": 312.032014131546, - "p99": 346.3360071182251 - }, - "combine": { - "p50": 413.12000155448914, - "p90": 429.3760061264038, - "p95": 438.87999653816223, - "p99": 470.8159863948822 - }, - "roundtrip": { - "p50": 1013.4719610214233, - "p90": 1037.824034690857, - "p95": 1052.0960092544556, - "p99": 1194.1440105438232 - }, - "isolatedSum": { - "p50": 694.240003824234, - "p90": 733.5360050201416, - "p95": 750.9120106697083, - "p99": 817.1519935131073 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 500.7359981536865, - "p90": 529.9519896507263, - "p95": 540.992021560669, - "p99": 584.3200087547302 - }, - "combine": { - "p50": 754.8159956932068, - "p90": 771.6479897499084, - "p95": 786.4639759063721, - "p99": 983.8079810142517 - }, - "roundtrip": { - "p50": 1906.6879749298096, - "p90": 1934.656023979187, - "p95": 1949.887990951538, - "p99": 2083.967924118042 - }, - "isolatedSum": { - "p50": 1255.5519938468933, - "p90": 1301.5999794006348, - "p95": 1327.455997467041, - "p99": 1568.127989768982 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-26de8d70", - "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", - "colorKey": "h200_87683f6c", - "comparisonKey": "b7adcc489d58bf89", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:37.273038+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Qwen3.5", - "shape": { - "hidden": 4096, - "topk": 8, - "experts": 128, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "dc27c5e0894e569", - "workloadId": "set:6:76d8142d69406335", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271739849", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271739849", - "createdAt": "2026-06-26T23:53:37.273038+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 233.2800030708313, - "p90": 296.25600576400757, - "p95": 315.45600295066833, - "p99": 387.84000277519226 - }, - "combine": { - "p50": 74.72000271081924, - "p90": 92.96000003814697, - "p95": 97.98400104045868, - "p99": 124.86399710178375 - }, - "roundtrip": { - "p50": 278.9759933948517, - "p90": 337.44001388549805, - "p95": 363.5840117931366, - "p99": 408.9600145816803 - }, - "isolatedSum": { - "p50": 308.00000578165054, - "p90": 389.21600580215454, - "p95": 413.440003991127, - "p99": 512.703999876976 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 22282240, - "combineLogicalBytes": 44564480, - "fanoutMean": 5.3125, - "recvTokensMax": 699, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 240.28800427913666, - "p90": 292.03200340270996, - "p95": 306.97599053382874, - "p99": 329.5679986476898 - }, - "combine": { - "p50": 98.30400347709656, - "p90": 115.07199704647064, - "p95": 119.00799721479416, - "p99": 131.9359987974167 - }, - "roundtrip": { - "p50": 325.408011674881, - "p90": 376.67199969291687, - "p95": 392.8639888763428, - "p99": 439.520001411438 - }, - "isolatedSum": { - "p50": 338.5920077562332, - "p90": 407.1040004491806, - "p95": 425.9839877486229, - "p99": 461.5039974451065 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 44863488, - "combineLogicalBytes": 89726976, - "fanoutMean": 5.34814453125, - "recvTokensMax": 1385, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 315.45600295066833, - "p90": 357.08799958229065, - "p95": 369.9199855327606, - "p99": 407.039999961853 - }, - "combine": { - "p50": 147.45600521564484, - "p90": 164.67200219631195, - "p95": 168.16000640392303, - "p99": 182.52800405025482 - }, - "roundtrip": { - "p50": 460.4479968547821, - "p90": 508.575975894928, - "p95": 523.360013961792, - "p99": 576.0959982872009 - }, - "isolatedSum": { - "p50": 462.91200816631317, - "p90": 521.7600017786026, - "p95": 538.0799919366837, - "p99": 589.5680040121078 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 89751552, - "combineLogicalBytes": 179503104, - "fanoutMean": 5.349609375, - "recvTokensMax": 2772, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 458.2720100879669, - "p90": 501.5680193901062, - "p95": 517.632007598877, - "p99": 562.1119737625122 - }, - "combine": { - "p50": 241.2160038948059, - "p90": 252.06398963928223, - "p95": 257.34400749206543, - "p99": 279.83999252319336 - }, - "roundtrip": { - "p50": 681.9199919700623, - "p90": 713.4079933166504, - "p95": 728.8320064544678, - "p99": 805.8239817619324 - }, - "isolatedSum": { - "p50": 699.4880139827728, - "p90": 753.6320090293884, - "p95": 774.9760150909424, - "p99": 841.9519662857056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 179511296, - "combineLogicalBytes": 359022592, - "fanoutMean": 5.349853515625, - "recvTokensMax": 5558, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 734.112024307251, - "p90": 769.8879837989807, - "p95": 783.7439775466919, - "p99": 899.9680280685425 - }, - "combine": { - "p50": 410.17600893974304, - "p90": 422.4640130996704, - "p95": 427.64800786972046, - "p99": 457.72799849510193 - }, - "roundtrip": { - "p50": 1137.4399662017822, - "p90": 1176.416039466858, - "p95": 1203.328013420105, - "p99": 1318.8159465789795 - }, - "isolatedSum": { - "p50": 1144.288033246994, - "p90": 1192.3519968986511, - "p95": 1211.3919854164124, - "p99": 1357.6960265636444 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 358055936, - "combineLogicalBytes": 716111872, - "fanoutMean": 5.33544921875, - "recvTokensMax": 10982, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1373.792052268982, - "p90": 1396.7679738998413, - "p95": 1406.9440364837646, - "p99": 1577.5359869003296 - }, - "combine": { - "p50": 750.3679990768433, - "p90": 762.6879811286926, - "p95": 770.3359723091125, - "p99": 788.0319952964783 - }, - "roundtrip": { - "p50": 2134.335994720459, - "p90": 2161.439895629883, - "p95": 2178.2400608062744, - "p99": 2561.3439083099365 - }, - "isolatedSum": { - "p50": 2124.160051345825, - "p90": 2159.455955028534, - "p95": 2177.280008792877, - "p99": 2365.567982196808 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 716197888, - "combineLogicalBytes": 1432395776, - "fanoutMean": 5.336090087890625, - "recvTokensMax": 21939, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2e0e49b4", - "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", - "colorKey": "h200_87683f6c", - "comparisonKey": "dcdf4b262ed1d48f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:08.323229+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "shape 5120/8/160", - "shape": { - "hidden": 5120, - "topk": 8, - "experts": 160, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "0c022a63bbcbf42", - "workloadId": "set:6:28c0c09b13ff0acf", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271755854", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271755854", - "createdAt": "2026-06-26T23:54:08.323229+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 219.84000504016876, - "p90": 274.01599287986755, - "p95": 289.5039916038513, - "p99": 343.77598762512207 - }, - "combine": { - "p50": 81.08799904584885, - "p90": 91.90399944782257, - "p95": 99.55199807882309, - "p99": 105.79200088977814 - }, - "roundtrip": { - "p50": 288.57600688934326, - "p90": 340.2239978313446, - "p95": 353.95199060440063, - "p99": 388.0319893360138 - }, - "isolatedSum": { - "p50": 300.9280040860176, - "p90": 365.9199923276901, - "p95": 389.0559896826744, - "p99": 449.5679885149002 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 27837440, - "combineLogicalBytes": 55674880, - "fanoutMean": 5.3095703125, - "recvTokensMax": 699, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 254.62400913238525, - "p90": 299.74400997161865, - "p95": 313.2160007953644, - "p99": 335.6480002403259 - }, - "combine": { - "p50": 112.60800063610077, - "p90": 124.57600235939026, - "p95": 128.31999361515045, - "p99": 137.472003698349 - }, - "roundtrip": { - "p50": 357.88801312446594, - "p90": 402.78398990631104, - "p95": 418.7839925289154, - "p99": 468.3839976787567 - }, - "isolatedSum": { - "p50": 367.232009768486, - "p90": 424.3200123310089, - "p95": 441.53599441051483, - "p99": 473.1200039386749 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 55552000, - "combineLogicalBytes": 111104000, - "fanoutMean": 5.2978515625, - "recvTokensMax": 1387, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 338.17601203918457, - "p90": 376.48001313209534, - "p95": 391.80800318717957, - "p99": 431.71200156211853 - }, - "combine": { - "p50": 170.43200135231018, - "p90": 182.8480064868927, - "p95": 187.77599930763245, - "p99": 198.46400618553162 - }, - "roundtrip": { - "p50": 509.5679759979248, - "p90": 558.2079887390137, - "p95": 577.6960253715515, - "p99": 617.7600026130676 - }, - "isolatedSum": { - "p50": 508.60801339149475, - "p90": 559.328019618988, - "p95": 579.584002494812, - "p99": 630.1760077476501 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 111549440, - "combineLogicalBytes": 223098880, - "fanoutMean": 5.319091796875, - "recvTokensMax": 2762, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 505.0879716873169, - "p90": 540.7040119171143, - "p95": 552.6720285415649, - "p99": 595.1679944992065 - }, - "combine": { - "p50": 273.75999093055725, - "p90": 285.66399216651917, - "p95": 291.4240062236786, - "p99": 313.05599212646484 - }, - "roundtrip": { - "p50": 780.2879810333252, - "p90": 834.7839713096619, - "p95": 867.3920035362244, - "p99": 1058.9760541915894 - }, - "isolatedSum": { - "p50": 778.8479626178741, - "p90": 826.3680040836334, - "p95": 844.0960347652435, - "p99": 908.2239866256714 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 223365120, - "combineLogicalBytes": 446730240, - "fanoutMean": 5.325439453125, - "recvTokensMax": 5518, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 859.1039776802063, - "p90": 874.3680119514465, - "p95": 884.447991847992, - "p99": 1000.8000135421753 - }, - "combine": { - "p50": 476.0960042476654, - "p90": 487.5839948654175, - "p95": 495.9680140018463, - "p99": 551.2639880180359 - }, - "roundtrip": { - "p50": 1315.2320384979248, - "p90": 1342.4960374832153, - "p95": 1364.9920225143433, - "p99": 1437.1839761734009 - }, - "isolatedSum": { - "p50": 1335.1999819278717, - "p90": 1361.952006816864, - "p95": 1380.4160058498383, - "p99": 1552.0640015602112 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 446817280, - "combineLogicalBytes": 893634560, - "fanoutMean": 5.32647705078125, - "recvTokensMax": 11032, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1618.3040142059326, - "p90": 1638.8479471206665, - "p95": 1650.3679752349854, - "p99": 1797.8880405426025 - }, - "combine": { - "p50": 871.5839982032776, - "p90": 885.4719996452332, - "p95": 893.7280178070068, - "p99": 936.1280202865601 - }, - "roundtrip": { - "p50": 2472.0640182495117, - "p90": 2496.8960285186768, - "p95": 2517.6639556884766, - "p99": 2775.1998901367188 - }, - "isolatedSum": { - "p50": 2489.88801240921, - "p90": 2524.3199467658997, - "p95": 2544.095993041992, - "p99": 2734.0160608291626 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 893132800, - "combineLogicalBytes": 1786265600, - "fanoutMean": 5.323486328125, - "recvTokensMax": 21895, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-cd909950", - "identity": "h200|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_9979edfc", - "comparisonKey": "eb524229a3f58a63", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:00.891802+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287496212", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287496212", - "createdAt": "2026-06-27T11:14:00.891802+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 84.54400300979614, - "p90": 112.44799941778183, - "p95": 123.03999811410904, - "p99": 166.81599617004395 - }, - "combine": { - "p50": 87.99999952316284, - "p90": 105.56799918413162, - "p95": 112.35199868679047, - "p99": 141.34399592876434 - }, - "roundtrip": { - "p50": 196.16000354290009, - "p90": 240.22400379180908, - "p95": 254.91198897361755, - "p99": 326.30398869514465 - }, - "isolatedSum": { - "p50": 172.54400253295898, - "p90": 218.01599860191345, - "p95": 235.3919968008995, - "p99": 308.1599920988083 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 101.75999999046326, - "p90": 124.09599870443344, - "p95": 130.11200726032257, - "p99": 143.61600577831268 - }, - "combine": { - "p50": 120.41600048542023, - "p90": 135.13599336147308, - "p95": 138.5280042886734, - "p99": 143.8719928264618 - }, - "roundtrip": { - "p50": 278.0480086803436, - "p90": 296.9599962234497, - "p95": 302.91199684143066, - "p99": 346.3360071182251 - }, - "isolatedSum": { - "p50": 222.17600047588348, - "p90": 259.2319920659065, - "p95": 268.640011548996, - "p99": 287.4879986047745 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 131.6159963607788, - "p90": 153.18399667739868, - "p95": 161.82400286197662, - "p99": 184.79999899864197 - }, - "combine": { - "p50": 191.00800156593323, - "p90": 205.1199972629547, - "p95": 210.62399446964264, - "p99": 231.87200725078583 - }, - "roundtrip": { - "p50": 444.19199228286743, - "p90": 466.5600061416626, - "p95": 479.13599014282227, - "p99": 664.7359728813171 - }, - "isolatedSum": { - "p50": 322.62399792671204, - "p90": 358.3039939403534, - "p95": 372.44799733161926, - "p99": 416.6720062494278 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 195.64799964427948, - "p90": 217.21599996089935, - "p95": 222.78399765491486, - "p99": 280.0320088863373 - }, - "combine": { - "p50": 306.304007768631, - "p90": 318.39999556541443, - "p95": 326.6240060329437, - "p99": 356.9279909133911 - }, - "roundtrip": { - "p50": 739.0080094337463, - "p90": 759.2960000038147, - "p95": 774.0479707717896, - "p99": 813.5039806365967 - }, - "isolatedSum": { - "p50": 501.95200741291046, - "p90": 535.6159955263138, - "p95": 549.4080036878586, - "p99": 636.9599997997284 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 324.319988489151, - "p90": 343.77598762512207, - "p95": 355.9359908103943, - "p99": 389.2799913883209 - }, - "combine": { - "p50": 538.1439924240112, - "p90": 549.2799878120422, - "p95": 556.9919943809509, - "p99": 602.7839779853821 - }, - "roundtrip": { - "p50": 1345.0239896774292, - "p90": 1376.4480352401733, - "p95": 1404.3519496917725, - "p99": 1568.6399936676025 - }, - "isolatedSum": { - "p50": 862.4639809131622, - "p90": 893.0559754371643, - "p95": 912.9279851913452, - "p99": 992.063969373703 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 583.4559798240662, - "p90": 596.5120196342468, - "p95": 603.9360165596008, - "p99": 710.3360295295715 - }, - "combine": { - "p50": 978.2400131225586, - "p90": 994.8480129241943, - "p95": 1007.7120065689087, - "p99": 1139.9359703063965 - }, - "roundtrip": { - "p50": 2591.327905654907, - "p90": 2624.3200302124023, - "p95": 2637.3119354248047, - "p99": 2756.351947784424 - }, - "isolatedSum": { - "p50": 1561.6959929466248, - "p90": 1591.3600325584412, - "p95": 1611.6480231285095, - "p99": 1850.271999835968 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-92d6dac4", - "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_87683f6c", - "comparisonKey": "5878390fb0ef3ac0", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:54:33.209811+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "MiniMax-M3", - "shape": { - "hidden": 6144, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:9f5e1e005a35e937", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271771597", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271771597", - "createdAt": "2026-06-26T23:54:33.209811+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 237.12000250816345, - "p90": 447.00801372528076, - "p95": 466.2080109119415, - "p99": 509.2800259590149 - }, - "combine": { - "p50": 89.59999680519104, - "p90": 118.20799857378006, - "p95": 120.38400024175644, - "p99": 131.55199587345123 - }, - "roundtrip": { - "p50": 299.51998591423035, - "p90": 465.9839868545532, - "p95": 490.01601338386536, - "p99": 533.9199900627136 - }, - "isolatedSum": { - "p50": 326.7199993133545, - "p90": 565.2160122990608, - "p95": 586.592011153698, - "p99": 640.8320218324661 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 33288192, - "combineLogicalBytes": 66576384, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 272.96000719070435, - "p90": 312.19199299812317, - "p95": 322.7840065956116, - "p99": 376.6080141067505 - }, - "combine": { - "p50": 121.91999703645706, - "p90": 133.34399461746216, - "p95": 139.1039937734604, - "p99": 144.48000490665436 - }, - "roundtrip": { - "p50": 388.5760009288788, - "p90": 429.28001284599304, - "p95": 448.5439956188202, - "p99": 507.87198543548584 - }, - "isolatedSum": { - "p50": 394.8800042271614, - "p90": 445.5359876155853, - "p95": 461.88800036907196, - "p99": 521.0880190134048 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 66809856, - "combineLogicalBytes": 133619712, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 375.61601400375366, - "p90": 427.4879992008209, - "p95": 443.77601146698, - "p99": 500.4799962043762 - }, - "combine": { - "p50": 192.9599940776825, - "p90": 205.08800446987152, - "p95": 213.47199380397797, - "p99": 237.92000114917755 - }, - "roundtrip": { - "p50": 553.5680055618286, - "p90": 599.2000102996826, - "p95": 623.583972454071, - "p99": 716.1920070648193 - }, - "isolatedSum": { - "p50": 568.5760080814362, - "p90": 632.5760036706924, - "p95": 657.248005270958, - "p99": 738.3999973535538 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 133828608, - "combineLogicalBytes": 267657216, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 557.6000213623047, - "p90": 596.7360138893127, - "p95": 607.3920130729675, - "p99": 644.9599862098694 - }, - "combine": { - "p50": 306.335985660553, - "p90": 316.3520097732544, - "p95": 320.51199674606323, - "p99": 334.52799916267395 - }, - "roundtrip": { - "p50": 853.1839847564697, - "p90": 880.8959722518921, - "p95": 895.3920006752014, - "p99": 966.7840003967285 - }, - "isolatedSum": { - "p50": 863.9360070228577, - "p90": 913.0880236625671, - "p95": 927.9040098190308, - "p99": 979.4879853725433 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 267190272, - "combineLogicalBytes": 534380544, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 987.8720045089722, - "p90": 1001.9840002059937, - "p95": 1013.2479667663574, - "p99": 1395.5520391464233 - }, - "combine": { - "p50": 540.9280061721802, - "p90": 573.7280249595642, - "p95": 584.6400260925293, - "p99": 626.0480284690857 - }, - "roundtrip": { - "p50": 1523.6799716949463, - "p90": 1545.408010482788, - "p95": 1558.1120252609253, - "p99": 1704.2880058288574 - }, - "isolatedSum": { - "p50": 1528.8000106811523, - "p90": 1575.7120251655579, - "p95": 1597.8879928588867, - "p99": 2021.600067615509 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 533059584, - "combineLogicalBytes": 1066119168, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1865.3759956359863, - "p90": 1883.2000494003296, - "p95": 1893.02396774292, - "p99": 1925.7279634475708 - }, - "combine": { - "p50": 981.823980808258, - "p90": 994.0800070762634, - "p95": 1002.7199983596802, - "p99": 1096.3200330734253 - }, - "roundtrip": { - "p50": 2907.2320461273193, - "p90": 2933.151960372925, - "p95": 2943.104028701782, - "p99": 3191.3599967956543 - }, - "isolatedSum": { - "p50": 2847.1999764442444, - "p90": 2877.280056476593, - "p95": 2895.7439661026, - "p99": 3022.047996520996 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1065861120, - "combineLogicalBytes": 2131722240, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-e6cb64c3", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_9979edfc", - "comparisonKey": "73a640c71287a1ce", - "schemaVersion": 3, - "generatedAt": "2026-06-27T10:26:33.521456+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_5", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "2.0.0+af9a040", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28286433802", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286433802", - "createdAt": "2026-06-27T10:26:33.521456+00:00", - "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.17600053548813, - "p90": 102.20800340175629, - "p95": 112.15999722480774, - "p99": 126.68800354003906 - }, - "combine": { - "p50": 96.44799679517746, - "p90": 110.97600311040878, - "p95": 116.83200299739838, - "p99": 120.44800072908401 - }, - "roundtrip": { - "p50": 209.98400449752808, - "p90": 236.95999383926392, - "p95": 250.40000677108765, - "p99": 302.11201310157776 - }, - "isolatedSum": { - "p50": 182.6239973306656, - "p90": 213.18400651216507, - "p95": 228.99200022220612, - "p99": 247.13600426912308 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 103.39199751615524, - "p90": 121.05599790811539, - "p95": 127.96799838542938, - "p99": 135.6479972600937 - }, - "combine": { - "p50": 137.79200613498688, - "p90": 151.07199549674988, - "p95": 155.13600409030914, - "p99": 164.89599645137787 - }, - "roundtrip": { - "p50": 314.2400085926056, - "p90": 329.50401306152344, - "p95": 339.26400542259216, - "p99": 374.36801195144653 - }, - "isolatedSum": { - "p50": 241.18400365114212, - "p90": 272.12799340486526, - "p95": 283.1040024757385, - "p99": 300.54399371147156 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 135.96799969673157, - "p90": 154.91199493408203, - "p95": 162.75200247764587, - "p99": 174.5920032262802 - }, - "combine": { - "p50": 218.62399578094482, - "p90": 232.80000686645508, - "p95": 239.99999463558197, - "p99": 370.59199810028076 - }, - "roundtrip": { - "p50": 495.2639937400818, - "p90": 509.2160105705261, - "p95": 516.9280171394348, - "p99": 547.6800203323364 - }, - "isolatedSum": { - "p50": 354.5919954776764, - "p90": 387.7120018005371, - "p95": 402.75199711322784, - "p99": 545.184001326561 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 203.07199656963348, - "p90": 224.95999932289124, - "p95": 230.04800081253052, - "p99": 242.5920069217682 - }, - "combine": { - "p50": 351.967990398407, - "p90": 361.5039885044098, - "p95": 367.2640025615692, - "p99": 383.2319974899292 - }, - "roundtrip": { - "p50": 836.3519906997681, - "p90": 849.6959805488586, - "p95": 854.1439771652222, - "p99": 861.3759875297546 - }, - "isolatedSum": { - "p50": 555.0399869680405, - "p90": 586.463987827301, - "p95": 597.3120033740997, - "p99": 625.8240044116974 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 333.2799971103668, - "p90": 350.14399886131287, - "p95": 359.96800661087036, - "p99": 417.4720048904419 - }, - "combine": { - "p50": 617.3120141029358, - "p90": 628.0959844589233, - "p95": 631.6159963607788, - "p99": 644.8959708213806 - }, - "roundtrip": { - "p50": 1508.4160566329956, - "p90": 1521.9520330429077, - "p95": 1531.7440032958984, - "p99": 1626.688003540039 - }, - "isolatedSum": { - "p50": 950.5920112133026, - "p90": 978.2399833202362, - "p95": 991.5840029716492, - "p99": 1062.3679757118225 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 604.9280166625977, - "p90": 614.6559715270996, - "p95": 619.488000869751, - "p99": 634.335994720459 - }, - "combine": { - "p50": 1122.1439838409424, - "p90": 1135.9360218048096, - "p95": 1145.7600593566895, - "p99": 1211.1680507659912 - }, - "roundtrip": { - "p50": 2860.6081008911133, - "p90": 2879.5840740203857, - "p95": 2889.3120288848877, - "p99": 3131.5200328826904 - }, - "isolatedSum": { - "p50": 1727.07200050354, - "p90": 1750.5919933319092, - "p95": 1765.2480602264404, - "p99": 1845.5040454864502 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4da6f6db", - "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_87683f6c", - "comparisonKey": "90a8a7fc3b314f23", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:44.259181+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271640687", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271640687", - "createdAt": "2026-06-26T23:50:44.259181+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 239.3600046634674, - "p90": 286.52799129486084, - "p95": 313.79199028015137, - "p99": 391.2000060081482 - }, - "combine": { - "p50": 97.21600264310837, - "p90": 110.59200018644333, - "p95": 116.67200177907944, - "p99": 134.783998131752 - }, - "roundtrip": { - "p50": 309.9519908428192, - "p90": 360.48001050949097, - "p95": 381.5680146217346, - "p99": 466.94400906562805 - }, - "isolatedSum": { - "p50": 336.5760073065758, - "p90": 397.11999148130417, - "p95": 430.4639920592308, - "p99": 525.9840041399002 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 291.0720109939575, - "p90": 340.5759930610657, - "p95": 355.19999265670776, - "p99": 430.30399084091187 - }, - "combine": { - "p50": 137.7599984407425, - "p90": 154.30399775505066, - "p95": 160.41600704193115, - "p99": 182.3360025882721 - }, - "roundtrip": { - "p50": 415.8079922199249, - "p90": 464.0960097312927, - "p95": 484.5759868621826, - "p99": 556.8320155143738 - }, - "isolatedSum": { - "p50": 428.8320094347, - "p90": 494.87999081611633, - "p95": 515.6159996986389, - "p99": 612.639993429184 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 408.28800201416016, - "p90": 486.4000082015991, - "p95": 495.7759976387024, - "p99": 554.3680191040039 - }, - "combine": { - "p50": 219.10400688648224, - "p90": 233.37599635124207, - "p95": 239.48800563812256, - "p99": 266.07999205589294 - }, - "roundtrip": { - "p50": 607.4560284614563, - "p90": 650.2400040626526, - "p95": 670.5920100212097, - "p99": 729.3760180473328 - }, - "isolatedSum": { - "p50": 627.3920089006424, - "p90": 719.7760045528412, - "p95": 735.264003276825, - "p99": 820.4480111598969 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 621.9840049743652, - "p90": 667.8720116615295, - "p95": 696.0639953613281, - "p99": 765.0880217552185 - }, - "combine": { - "p50": 346.8480110168457, - "p90": 362.08000779151917, - "p95": 368.47999691963196, - "p99": 384.89601016044617 - }, - "roundtrip": { - "p50": 955.2639722824097, - "p90": 1010.1120471954346, - "p95": 1039.4879579544067, - "p99": 1108.6399555206299 - }, - "isolatedSum": { - "p50": 968.8320159912109, - "p90": 1029.9520194530487, - "p95": 1064.54399228096, - "p99": 1149.9840319156647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 1107.7439785003662, - "p90": 1126.9439458847046, - "p95": 1137.887954711914, - "p99": 1176.8319606781006 - }, - "combine": { - "p50": 609.9200248718262, - "p90": 624.4159936904907, - "p95": 631.8399906158447, - "p99": 652.1919965744019 - }, - "roundtrip": { - "p50": 1692.2240257263184, - "p90": 1713.1520509719849, - "p95": 1732.5439453125, - "p99": 1810.7199668884277 - }, - "isolatedSum": { - "p50": 1717.6640033721924, - "p90": 1751.3599395751953, - "p95": 1769.7279453277588, - "p99": 1829.0239572525024 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 2100.4478931427, - "p90": 2129.312038421631, - "p95": 2148.47993850708, - "p99": 2358.464002609253 - }, - "combine": { - "p50": 1102.6560068130493, - "p90": 1120.0640201568604, - "p95": 1132.8959465026855, - "p99": 1158.560037612915 - }, - "roundtrip": { - "p50": 3193.376064300537, - "p90": 3219.615936279297, - "p95": 3229.9840450286865, - "p99": 3288.5758876800537 - }, - "isolatedSum": { - "p50": 3203.1038999557495, - "p90": 3249.376058578491, - "p95": 3281.3758850097656, - "p99": 3517.024040222168 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-15326a90", - "identity": "h200|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h200_9979edfc", - "comparisonKey": "0bd4a1be28b155b0", - "schemaVersion": 3, - "generatedAt": "2026-06-27T11:14:15.177243+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_4", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28287502149", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287502149", - "createdAt": "2026-06-27T11:14:15.177243+00:00", - "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.40000224113464, - "p90": 109.18399691581726, - "p95": 114.3999993801117, - "p99": 152.0960032939911 - }, - "combine": { - "p50": 96.99200093746185, - "p90": 110.55999994277954, - "p95": 116.83200299739838, - "p99": 123.64800274372101 - }, - "roundtrip": { - "p50": 211.42399311065674, - "p90": 238.11200261116028, - "p95": 247.8400021791458, - "p99": 270.81599831581116 - }, - "isolatedSum": { - "p50": 183.3920031785965, - "p90": 219.7439968585968, - "p95": 231.23200237751007, - "p99": 275.7440060377121 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 104.38399761915207, - "p90": 124.89599734544754, - "p95": 131.71200454235077, - "p99": 141.66399836540222 - }, - "combine": { - "p50": 137.05599308013916, - "p90": 149.82399344444275, - "p95": 154.14400398731232, - "p99": 171.87200486660004 - }, - "roundtrip": { - "p50": 308.8639974594116, - "p90": 326.7520070075989, - "p95": 331.2320113182068, - "p99": 342.52798557281494 - }, - "isolatedSum": { - "p50": 241.43999069929123, - "p90": 274.7199907898903, - "p95": 285.8560085296631, - "p99": 313.53600323200226 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 141.12000167369843, - "p90": 168.47999393939972, - "p95": 177.47199535369873, - "p99": 233.43999683856964 - }, - "combine": { - "p50": 215.83999693393707, - "p90": 233.60000550746918, - "p95": 237.7600073814392, - "p99": 313.08799982070923 - }, - "roundtrip": { - "p50": 488.5759949684143, - "p90": 503.32802534103394, - "p95": 508.67199897766113, - "p99": 524.0640044212341 - }, - "isolatedSum": { - "p50": 356.9599986076355, - "p90": 402.0799994468689, - "p95": 415.23200273513794, - "p99": 546.5279966592789 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 205.79199492931366, - "p90": 228.89600694179535, - "p95": 234.46400463581085, - "p99": 248.89600276947021 - }, - "combine": { - "p50": 347.3599851131439, - "p90": 359.0080142021179, - "p95": 364.73599076271057, - "p99": 389.3119990825653 - }, - "roundtrip": { - "p50": 830.016016960144, - "p90": 851.2319922447205, - "p95": 861.8239760398865, - "p99": 894.0479755401611 - }, - "isolatedSum": { - "p50": 553.1519800424576, - "p90": 587.9040211439133, - "p95": 599.1999953985214, - "p99": 638.2080018520355 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 340.1600122451782, - "p90": 360.1279854774475, - "p95": 373.7280070781708, - "p99": 421.4720129966736 - }, - "combine": { - "p50": 600.1920104026794, - "p90": 613.1839752197266, - "p95": 621.2480068206787, - "p99": 657.696008682251 - }, - "roundtrip": { - "p50": 1490.880012512207, - "p90": 1514.016032218933, - "p95": 1529.2479991912842, - "p99": 1652.6720523834229 - }, - "isolatedSum": { - "p50": 940.3520226478577, - "p90": 973.3119606971741, - "p95": 994.9760138988495, - "p99": 1079.1680216789246 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 612.2879981994629, - "p90": 627.1359920501709, - "p95": 634.656012058258, - "p99": 680.351972579956 - }, - "combine": { - "p50": 1088.5440111160278, - "p90": 1107.0400476455688, - "p95": 1131.872057914734, - "p99": 1238.976001739502 - }, - "roundtrip": { - "p50": 2821.4080333709717, - "p90": 2847.007989883423, - "p95": 2862.6561164855957, - "p99": 3033.9200496673584 - }, - "isolatedSum": { - "p50": 1700.8320093154907, - "p90": 1734.1760396957397, - "p95": 1766.528069972992, - "p99": 1919.327974319458 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-d2673258", - "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", - "colorKey": "h200_87683f6c", - "comparisonKey": "ae4528707b5ffd7f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:53:16.316846+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_3", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "runtime-visible-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8", - "model": "Kimi-K2", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 384, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "cd50548525dafdf", - "workloadId": "set:6:b23bc0c4b6402c69", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271725115", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271725115", - "createdAt": "2026-06-26T23:53:16.316846+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 221.27999365329742, - "p90": 242.20800399780273, - "p95": 255.3279995918274, - "p99": 294.94398832321167 - }, - "combine": { - "p50": 96.67199850082397, - "p90": 103.20000350475311, - "p95": 107.32799768447876, - "p99": 117.85600334405899 - }, - "roundtrip": { - "p50": 306.8479895591736, - "p90": 331.07200264930725, - "p95": 352.31998562812805, - "p99": 409.05600786209106 - }, - "isolatedSum": { - "p50": 317.9519921541214, - "p90": 345.40800750255585, - "p95": 362.65599727630615, - "p99": 412.79999166727066 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38757376, - "combineLogicalBytes": 77514752, - "fanoutMean": 5.2802734375, - "recvTokensMax": 707, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 282.04798698425293, - "p90": 307.3279857635498, - "p95": 327.2320032119751, - "p99": 442.68798828125 - }, - "combine": { - "p50": 138.87999951839447, - "p90": 145.05599439144135, - "p95": 152.73599326610565, - "p99": 170.01600563526154 - }, - "roundtrip": { - "p50": 410.46398878097534, - "p90": 435.39199233055115, - "p95": 465.6960070133209, - "p99": 525.2479910850525 - }, - "isolatedSum": { - "p50": 420.9279865026474, - "p90": 452.38398015499115, - "p95": 479.96799647808075, - "p99": 612.7039939165115 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77285376, - "combineLogicalBytes": 154570752, - "fanoutMean": 5.2646484375, - "recvTokensMax": 1391, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 390.9760117530823, - "p90": 407.8719913959503, - "p95": 414.3039882183075, - "p99": 448.2240080833435 - }, - "combine": { - "p50": 212.3199999332428, - "p90": 220.2560007572174, - "p95": 229.08799350261688, - "p99": 299.71200227737427 - }, - "roundtrip": { - "p50": 589.3120169639587, - "p90": 609.9839806556702, - "p95": 625.5040168762207, - "p99": 686.6880059242249 - }, - "isolatedSum": { - "p50": 603.2960116863251, - "p90": 628.1279921531677, - "p95": 643.3919817209244, - "p99": 747.9360103607178 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 154886144, - "combineLogicalBytes": 309772288, - "fanoutMean": 5.275390625, - "recvTokensMax": 2754, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 601.7919778823853, - "p90": 624.064028263092, - "p95": 640.0960087776184, - "p99": 705.2800059318542 - }, - "combine": { - "p50": 343.29599142074585, - "p90": 351.39200091362, - "p95": 357.02401399612427, - "p99": 386.01601123809814 - }, - "roundtrip": { - "p50": 930.400013923645, - "p90": 953.1520009040833, - "p95": 967.1040177345276, - "p99": 1069.5680379867554 - }, - "isolatedSum": { - "p50": 945.0879693031311, - "p90": 975.456029176712, - "p95": 997.1200227737427, - "p99": 1091.2960171699524 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 309750784, - "combineLogicalBytes": 619501568, - "fanoutMean": 5.2750244140625, - "recvTokensMax": 5469, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 1100.0959873199463, - "p90": 1113.9520406723022, - "p95": 1130.784034729004, - "p99": 1221.2159633636475 - }, - "combine": { - "p50": 596.3199734687805, - "p90": 606.9440245628357, - "p95": 612.6400232315063, - "p99": 648.5120058059692 - }, - "roundtrip": { - "p50": 1675.5199432373047, - "p90": 1687.999963760376, - "p95": 1695.3599452972412, - "p99": 2014.2719745635986 - }, - "isolatedSum": { - "p50": 1696.4159607887268, - "p90": 1720.896065235138, - "p95": 1743.4240579605103, - "p99": 1869.7279691696167 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 619687936, - "combineLogicalBytes": 1239375872, - "fanoutMean": 5.276611328125, - "recvTokensMax": 10883, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 2087.3920917510986, - "p90": 2099.519968032837, - "p95": 2110.6879711151123, - "p99": 2213.7598991394043 - }, - "combine": { - "p50": 1087.4559879302979, - "p90": 1099.4240045547485, - "p95": 1103.5200357437134, - "p99": 1151.8080234527588 - }, - "roundtrip": { - "p50": 3166.016101837158, - "p90": 3187.0079040527344, - "p95": 3196.5761184692383, - "p99": 3422.0480918884277 - }, - "isolatedSum": { - "p50": 3174.8480796813965, - "p90": 3198.9439725875854, - "p95": 3214.2080068588257, - "p99": 3365.567922592163 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1239834624, - "combineLogicalBytes": 2479669248, - "fanoutMean": 5.278564453125, - "recvTokensMax": 21730, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-5a82a4d9", - "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_3a17d46b", - "comparisonKey": "680e15fb3428bab0", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:05.917629+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_10", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254401482", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482", - "createdAt": "2026-06-26T17:30:05.917629+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 86.81599795818329, - "p90": 108.2879975438118, - "p95": 115.26399850845337, - "p99": 141.79199934005737 - }, - "combine": { - "p50": 96.38399630784988, - "p90": 114.68800157308578, - "p95": 119.55200135707855, - "p99": 138.72000575065613 - }, - "roundtrip": { - "p50": 210.59200167655945, - "p90": 242.94400215148926, - "p95": 254.17599081993103, - "p99": 313.27998638153076 - }, - "isolatedSum": { - "p50": 183.19999426603317, - "p90": 222.97599911689758, - "p95": 234.81599986553192, - "p99": 280.5120050907135 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 103.2319962978363, - "p90": 128.28800082206726, - "p95": 134.8160058259964, - "p99": 155.07200360298157 - }, - "combine": { - "p50": 133.66399705410004, - "p90": 149.79200065135956, - "p95": 157.21599757671356, - "p99": 173.37599396705627 - }, - "roundtrip": { - "p50": 304.22401428222656, - "p90": 332.41599798202515, - "p95": 337.92001008987427, - "p99": 353.2800078392029 - }, - "isolatedSum": { - "p50": 236.89599335193634, - "p90": 278.0800014734268, - "p95": 292.03200340270996, - "p99": 328.44799757003784 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 135.77599823474884, - "p90": 162.30399906635284, - "p95": 169.95200514793396, - "p99": 237.98400163650513 - }, - "combine": { - "p50": 203.2960057258606, - "p90": 220.41599452495575, - "p95": 226.55999660491943, - "p99": 257.31199979782104 - }, - "roundtrip": { - "p50": 476.9600033760071, - "p90": 496.63999676704407, - "p95": 511.55197620391846, - "p99": 544.7999835014343 - }, - "isolatedSum": { - "p50": 339.07200396060944, - "p90": 382.7199935913086, - "p95": 396.5120017528534, - "p99": 495.2960014343262 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 196.57599925994873, - "p90": 218.87999773025513, - "p95": 225.3119945526123, - "p99": 253.7280023097992 - }, - "combine": { - "p50": 320.607990026474, - "p90": 335.2319896221161, - "p95": 344.4800078868866, - "p99": 365.9519851207733 - }, - "roundtrip": { - "p50": 794.7199940681458, - "p90": 817.6959753036499, - "p95": 837.0879888534546, - "p99": 910.5280041694641 - }, - "isolatedSum": { - "p50": 517.1839892864227, - "p90": 554.1119873523712, - "p95": 569.7920024394989, - "p99": 619.6799874305725 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 320.16000151634216, - "p90": 343.55199337005615, - "p95": 363.45601081848145, - "p99": 439.9999976158142 - }, - "combine": { - "p50": 554.8160076141357, - "p90": 569.7919726371765, - "p95": 577.6000022888184, - "p99": 639.3280029296875 - }, - "roundtrip": { - "p50": 1425.7279634475708, - "p90": 1448.3519792556763, - "p95": 1468.4480428695679, - "p99": 1752.8959512710571 - }, - "isolatedSum": { - "p50": 874.9760091304779, - "p90": 913.3439660072327, - "p95": 941.0560131072998, - "p99": 1079.3280005455017 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 572.4160075187683, - "p90": 584.447979927063, - "p95": 591.6479825973511, - "p99": 629.6640038490295 - }, - "combine": { - "p50": 1012.6080513000488, - "p90": 1025.696039199829, - "p95": 1030.2400588989258, - "p99": 1060.1279735565186 - }, - "roundtrip": { - "p50": 2698.7199783325195, - "p90": 2725.055932998657, - "p95": 2745.215892791748, - "p99": 2952.064037322998 - }, - "isolatedSum": { - "p50": 1585.0240588188171, - "p90": 1610.144019126892, - "p95": 1621.8880414962769, - "p99": 1689.791977405548 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-da3555d5", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", - "colorKey": "h200_50a9ee63", - "comparisonKey": "ee1a607167629f55", - "schemaVersion": 3, - "generatedAt": "2026-06-26T17:30:23.809590+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 (norm) [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.1818, - "configuredUnits": 24, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28254418007", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007", - "createdAt": "2026-06-26T17:30:23.809590+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 73.69600236415863, - "p90": 84.63999629020691, - "p95": 90.08000046014786, - "p99": 106.6880002617836 - }, - "combine": { - "p50": 95.20000219345093, - "p90": 106.97600245475769, - "p95": 112.28799819946289, - "p99": 135.77599823474884 - }, - "roundtrip": { - "p50": 196.70400023460388, - "p90": 213.79199624061584, - "p95": 224.16000068187714, - "p99": 281.0240089893341 - }, - "isolatedSum": { - "p50": 168.89600455760956, - "p90": 191.6159987449646, - "p95": 202.36799865961075, - "p99": 242.46399849653244 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 91.71199798583984, - "p90": 108.0000028014183, - "p95": 111.87200248241425, - "p99": 124.57600235939026 - }, - "combine": { - "p50": 132.7359974384308, - "p90": 146.2399959564209, - "p95": 151.8400013446808, - "p99": 165.56799411773682 - }, - "roundtrip": { - "p50": 291.456013917923, - "p90": 308.57598781585693, - "p95": 313.34400177001953, - "p99": 330.78399300575256 - }, - "isolatedSum": { - "p50": 224.44799542427063, - "p90": 254.2399987578392, - "p95": 263.71200382709503, - "p99": 290.1439964771271 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 125.50400197505951, - "p90": 144.3520039319992, - "p95": 149.85600113868713, - "p99": 213.6639952659607 - }, - "combine": { - "p50": 203.10400426387787, - "p90": 215.64799547195435, - "p95": 220.47999501228333, - "p99": 236.92800104618073 - }, - "roundtrip": { - "p50": 464.7040069103241, - "p90": 485.5999946594238, - "p95": 495.64799666404724, - "p99": 524.3520140647888 - }, - "isolatedSum": { - "p50": 328.6080062389374, - "p90": 359.99999940395355, - "p95": 370.33599615097046, - "p99": 450.5919963121414 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 184.1599941253662, - "p90": 198.94400238990784, - "p95": 204.352006316185, - "p99": 232.12799429893494 - }, - "combine": { - "p50": 318.39999556541443, - "p90": 328.96000146865845, - "p95": 333.15199613571167, - "p99": 352.7359962463379 - }, - "roundtrip": { - "p50": 782.4640274047852, - "p90": 796.064019203186, - "p95": 802.4960160255432, - "p99": 826.4960050582886 - }, - "isolatedSum": { - "p50": 502.55998969078064, - "p90": 527.9040038585663, - "p95": 537.5040024518967, - "p99": 584.8639905452728 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 304.3519854545593, - "p90": 320.8320140838623, - "p95": 336.2559974193573, - "p99": 371.42398953437805 - }, - "combine": { - "p50": 550.4000186920166, - "p90": 560.2880120277405, - "p95": 567.7760243415833, - "p99": 656.8959951400757 - }, - "roundtrip": { - "p50": 1410.4959964752197, - "p90": 1427.456021308899, - "p95": 1436.4160299301147, - "p99": 1585.2479934692383 - }, - "isolatedSum": { - "p50": 854.7520041465759, - "p90": 881.1200261116028, - "p95": 904.0320217609406, - "p99": 1028.3199846744537 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 542.8479909896851, - "p90": 557.5680136680603, - "p95": 565.5360221862793, - "p99": 587.7760052680969 - }, - "combine": { - "p50": 1013.5680437088013, - "p90": 1026.4320373535156, - "p95": 1031.999945640564, - "p99": 1048.192024230957 - }, - "roundtrip": { - "p50": 2668.4160232543945, - "p90": 2694.3039894104004, - "p95": 2716.320037841797, - "p99": 3019.615888595581 - }, - "isolatedSum": { - "p50": 1556.4160346984863, - "p90": 1584.000051021576, - "p95": 1597.5359678268433, - "p99": 1635.968029499054 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-4a1bc537", - "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_4f483b60", - "comparisonKey": "ac62097ce902c24f", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:50:33.490755+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_1", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "cached-layout-comm-only-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · deepep · fp8 [cl]", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "fp8", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "1.2.1", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271633476", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271633476", - "createdAt": "2026-06-26T23:50:33.490755+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 75.71200281381607, - "p90": 95.29600292444229, - "p95": 102.11200267076492, - "p99": 128.83199751377106 - }, - "combine": { - "p50": 97.31200337409973, - "p90": 115.93600362539291, - "p95": 120.80000340938568, - "p99": 140.44800400733948 - }, - "roundtrip": { - "p50": 200.8959949016571, - "p90": 248.28800559043884, - "p95": 261.24799251556396, - "p99": 302.5600016117096 - }, - "isolatedSum": { - "p50": 173.0240061879158, - "p90": 211.2320065498352, - "p95": 222.9120060801506, - "p99": 269.28000152111053 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38836224, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 91.61599725484848, - "p90": 110.33599823713303, - "p95": 116.35199934244156, - "p99": 134.17600095272064 - }, - "combine": { - "p50": 136.76799833774567, - "p90": 151.5199989080429, - "p95": 159.04000401496887, - "p99": 170.6240028142929 - }, - "roundtrip": { - "p50": 299.45600032806396, - "p90": 324.38400387763977, - "p95": 331.07200264930725, - "p99": 365.7279908657074 - }, - "isolatedSum": { - "p50": 228.38399559259415, - "p90": 261.85599714517593, - "p95": 275.39200335741043, - "p99": 304.80000376701355 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77944832, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 127.83999741077423, - "p90": 142.94399321079254, - "p95": 150.4960060119629, - "p99": 162.7199947834015 - }, - "combine": { - "p50": 214.62400257587433, - "p90": 226.78400576114655, - "p95": 231.51999711990356, - "p99": 242.14400351047516 - }, - "roundtrip": { - "p50": 483.5200011730194, - "p90": 497.2800016403198, - "p95": 504.5120120048523, - "p99": 540.831983089447 - }, - "isolatedSum": { - "p50": 342.46399998664856, - "p90": 369.7279989719391, - "p95": 382.01600313186646, - "p99": 404.86399829387665 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 156133376, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 194.75199282169342, - "p90": 214.88000452518463, - "p95": 220.2879935503006, - "p99": 243.74400079250336 - }, - "combine": { - "p50": 346.3360071182251, - "p90": 362.8160059452057, - "p95": 374.4960129261017, - "p99": 426.56001448631287 - }, - "roundtrip": { - "p50": 824.5440125465393, - "p90": 852.5760173797607, - "p95": 862.2400164604187, - "p99": 896.6720104217529 - }, - "isolatedSum": { - "p50": 541.0879999399185, - "p90": 577.6960104703903, - "p95": 594.7840064764023, - "p99": 670.3040152788162 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 311721984, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 325.0879943370819, - "p90": 342.52798557281494, - "p95": 348.9919900894165, - "p99": 374.9440014362335 - }, - "combine": { - "p50": 603.8720011711121, - "p90": 613.6959791183472, - "p95": 618.1120276451111, - "p99": 640.3520107269287 - }, - "roundtrip": { - "p50": 1486.36794090271, - "p90": 1510.7519626617432, - "p95": 1524.1600275039673, - "p99": 1566.3679838180542 - }, - "isolatedSum": { - "p50": 928.959995508194, - "p90": 956.2239646911621, - "p95": 967.1040177345276, - "p99": 1015.2960121631622 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 621902848, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 586.624026298523, - "p90": 618.9759969711304, - "p95": 627.6800036430359, - "p99": 654.7200083732605 - }, - "combine": { - "p50": 1108.8639497756958, - "p90": 1126.1119842529297, - "p95": 1134.2079639434814, - "p99": 1169.376015663147 - }, - "roundtrip": { - "p50": 2817.1839714050293, - "p90": 2849.3120670318604, - "p95": 2871.0079193115234, - "p99": 3254.4960975646973 - }, - "isolatedSum": { - "p50": 1695.4879760742188, - "p90": 1745.08798122406, - "p95": 1761.8879675865173, - "p99": 1824.0960240364075 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243504640, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8ae4b608", - "identity": "h200|nccl-ep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|16|decode|normal|none|none|0|normalized|0.18|22edb632bb1b9d9", - "colorKey": "h200_45246fb2", - "comparisonKey": "bd3ee598fb548c4d", - "schemaVersion": 3, - "generatedAt": "2026-06-28T15:33:05.143900+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "h200-dgxc-slurm_6", - "sku": "h200", - "backend": "nccl-ep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-multinode-ib", - "transport": "rdma", - "worldSize": 16, - "epSize": 16, - "label": "H200 EP16 · nccl-ep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 132, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 16, - "scaleUpDomain": 16 - }, - "routingConsistent": true, - "traceSignature": "22edb632bb1b9d9", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28327088942", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28327088942", - "createdAt": "2026-06-28T15:33:05.143900+00:00", - "sha": "127785d43b1ea119c05a2b798bf0be56e5c9baa7" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 16, - "dispatch": { - "p50": 578.4000158309937, - "p90": 2543.3599948883057, - "p95": 2675.1999855041504, - "p99": 2675.1999855041504 - }, - "combine": { - "p50": 233.43999683856964, - "p90": 532.7680110931396, - "p95": 914.2079949378967, - "p99": 914.2079949378967 - }, - "roundtrip": { - "p50": 794.975996017456, - "p90": 861.2800240516663, - "p95": 1168.6400175094604, - "p99": 1168.6400175094604 - }, - "isolatedSum": { - "p50": 811.8400126695633, - "p90": 3076.1280059814453, - "p95": 3589.407980442047, - "p99": 3589.407980442047 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1505280, - "combineLogicalBytes": 1505280, - "fanoutMean": 6.5625, - "recvTokensMax": 12, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 2, - "globalTokens": 32, - "dispatch": { - "p50": 547.2319722175598, - "p90": 880.2559971809387, - "p95": 977.3759841918945, - "p99": 977.3759841918945 - }, - "combine": { - "p50": 212.25599944591522, - "p90": 238.3359968662262, - "p95": 239.32799696922302, - "p99": 239.32799696922302 - }, - "roundtrip": { - "p50": 960.6080055236816, - "p90": 2553.6320209503174, - "p95": 2696.3839530944824, - "p99": 2696.3839530944824 - }, - "isolatedSum": { - "p50": 759.487971663475, - "p90": 1118.591994047165, - "p95": 1216.7039811611176, - "p99": 1216.7039811611176 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3067904, - "combineLogicalBytes": 3067904, - "fanoutMean": 6.6875, - "recvTokensMax": 24, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 4, - "globalTokens": 64, - "dispatch": { - "p50": 621.504008769989, - "p90": 645.1839804649353, - "p95": 711.0400199890137, - "p99": 711.0400199890137 - }, - "combine": { - "p50": 249.08800423145294, - "p90": 263.64800333976746, - "p95": 269.53598856925964, - "p99": 269.53598856925964 - }, - "roundtrip": { - "p50": 1369.53604221344, - "p90": 1802.5599718093872, - "p95": 1879.744052886963, - "p99": 1879.744052886963 - }, - "isolatedSum": { - "p50": 870.592013001442, - "p90": 908.8319838047028, - "p95": 980.5760085582733, - "p99": 980.5760085582733 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5992448, - "combineLogicalBytes": 5992448, - "fanoutMean": 6.53125, - "recvTokensMax": 43, - "stragglerRank": 10, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 8, - "globalTokens": 128, - "dispatch": { - "p50": 611.8080019950867, - "p90": 2058.079957962036, - "p95": 2190.5601024627686, - "p99": 2190.5601024627686 - }, - "combine": { - "p50": 238.46399784088135, - "p90": 636.1280083656311, - "p95": 679.2960166931152, - "p99": 679.2960166931152 - }, - "roundtrip": { - "p50": 799.5200157165527, - "p90": 1625.3759860992432, - "p95": 2821.2480545043945, - "p99": 2821.2480545043945 - }, - "isolatedSum": { - "p50": 850.271999835968, - "p90": 2694.2079663276672, - "p95": 2869.856119155884, - "p99": 2869.856119155884 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12214272, - "combineLogicalBytes": 12214272, - "fanoutMean": 6.65625, - "recvTokensMax": 84, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 16, - "globalTokens": 256, - "dispatch": { - "p50": 631.8399906158447, - "p90": 645.6639766693115, - "p95": 672.3840236663818, - "p99": 672.3840236663818 - }, - "combine": { - "p50": 256.9279968738556, - "p90": 264.1279995441437, - "p95": 272.41599559783936, - "p99": 272.41599559783936 - }, - "roundtrip": { - "p50": 827.135980129242, - "p90": 967.136025428772, - "p95": 1139.7440433502197, - "p99": 1139.7440433502197 - }, - "isolatedSum": { - "p50": 888.7679874897003, - "p90": 909.7919762134552, - "p95": 944.8000192642212, - "p99": 944.8000192642212 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 24127488, - "combineLogicalBytes": 24127488, - "fanoutMean": 6.57421875, - "recvTokensMax": 154, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 32, - "globalTokens": 512, - "dispatch": { - "p50": 782.8800082206726, - "p90": 2639.967918395996, - "p95": 2675.584077835083, - "p99": 2675.584077835083 - }, - "combine": { - "p50": 265.855997800827, - "p90": 287.200003862381, - "p95": 290.43200612068176, - "p99": 290.43200612068176 - }, - "roundtrip": { - "p50": 890.496015548706, - "p90": 1573.8240480422974, - "p95": 2191.551923751831, - "p99": 2191.551923751831 - }, - "isolatedSum": { - "p50": 1048.7360060214996, - "p90": 2927.167922258377, - "p95": 2966.0160839557648, - "p99": 2966.0160839557648 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 48140288, - "combineLogicalBytes": 48140288, - "fanoutMean": 6.55859375, - "recvTokensMax": 295, - "stragglerRank": 15, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 64, - "globalTokens": 1024, - "dispatch": { - "p50": 679.3280243873596, - "p90": 740.6079769134521, - "p95": 822.9439854621887, - "p99": 822.9439854621887 - }, - "combine": { - "p50": 339.9040102958679, - "p90": 763.9359831809998, - "p95": 791.6160225868225, - "p99": 791.6160225868225 - }, - "roundtrip": { - "p50": 922.2720265388489, - "p90": 1468.127965927124, - "p95": 1530.8159589767456, - "p99": 1530.8159589767456 - }, - "isolatedSum": { - "p50": 1019.2320346832275, - "p90": 1504.543960094452, - "p95": 1614.5600080490112, - "p99": 1614.5600080490112 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 96165888, - "combineLogicalBytes": 96165888, - "fanoutMean": 6.55078125, - "recvTokensMax": 573, - "stragglerRank": 14, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 128, - "globalTokens": 2048, - "dispatch": { - "p50": 808.1279993057251, - "p90": 833.5999846458435, - "p95": 1317.952036857605, - "p99": 1317.952036857605 - }, - "combine": { - "p50": 518.9120173454285, - "p90": 535.0080132484436, - "p95": 549.5679974555969, - "p99": 549.5679974555969 - }, - "roundtrip": { - "p50": 1294.9440479278564, - "p90": 1688.86399269104, - "p95": 2760.256052017212, - "p99": 2760.256052017212 - }, - "isolatedSum": { - "p50": 1327.0400166511536, - "p90": 1368.607997894287, - "p95": 1867.520034313202, - "p99": 1867.520034313202 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 191758336, - "combineLogicalBytes": 191758336, - "fanoutMean": 6.53125, - "recvTokensMax": 1126, - "stragglerRank": 15, - "correct": true, - "samplesPooled": 8, - "trials": 1 - } - ] - }, - { - "id": "cx-d2620b3b", - "identity": "h200|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "colorKey": "h200_c317e88d", - "comparisonKey": "8bbd7f30d0bdbd11", - "schemaVersion": 3, - "generatedAt": "2026-06-27T17:36:22.388714+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_9", - "sku": "h200", - "backend": "uccl", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · uccl · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "ac583971f94b176", - "workloadId": "set:8:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28296668644", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296668644", - "createdAt": "2026-06-27T17:36:22.388714+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 123.61600250005722, - "p90": 165.53600132465363, - "p95": 184.38400328159332, - "p99": 203.13599705696106 - }, - "combine": { - "p50": 83.93599838018417, - "p90": 102.33599692583084, - "p95": 113.76000195741653, - "p99": 124.89599734544754 - }, - "roundtrip": { - "p50": 184.32000279426575, - "p90": 227.52000391483307, - "p95": 243.3920055627823, - "p99": 272.38398790359497 - }, - "isolatedSum": { - "p50": 207.5520008802414, - "p90": 267.87199825048447, - "p95": 298.14400523900986, - "p99": 328.0319944024086 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 114.84800279140472, - "p90": 152.6080071926117, - "p95": 160.0639969110489, - "p99": 180.9920072555542 - }, - "combine": { - "p50": 82.40000158548355, - "p90": 91.80799871683121, - "p95": 102.94400155544281, - "p99": 110.75200140476227 - }, - "roundtrip": { - "p50": 183.74399840831757, - "p90": 219.7120040655136, - "p95": 225.69599747657776, - "p99": 255.71200251579285 - }, - "isolatedSum": { - "p50": 197.24800437688828, - "p90": 244.4160059094429, - "p95": 263.0079984664917, - "p99": 291.74400866031647 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 111.68000102043152, - "p90": 147.8080004453659, - "p95": 156.19200468063354, - "p99": 167.35999286174774 - }, - "combine": { - "p50": 84.1279998421669, - "p90": 91.96799993515015, - "p95": 107.55199939012527, - "p99": 117.85600334405899 - }, - "roundtrip": { - "p50": 196.44799828529358, - "p90": 245.2480047941208, - "p95": 256.3199996948242, - "p99": 278.0480086803436 - }, - "isolatedSum": { - "p50": 195.80800086259842, - "p90": 239.77600038051605, - "p95": 263.7440040707588, - "p99": 285.21599620580673 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 112.83200234174728, - "p90": 151.19999647140503, - "p95": 155.87200224399567, - "p99": 166.33599996566772 - }, - "combine": { - "p50": 84.25600081682205, - "p90": 98.39999675750732, - "p95": 109.56799983978271, - "p99": 117.8240031003952 - }, - "roundtrip": { - "p50": 184.9920004606247, - "p90": 221.82400524616241, - "p95": 229.98400032520294, - "p99": 244.35199797153473 - }, - "isolatedSum": { - "p50": 197.08800315856934, - "p90": 249.59999322891235, - "p95": 265.4400020837784, - "p99": 284.1600030660629 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 120.28799951076508, - "p90": 159.93599593639374, - "p95": 176.64000391960144, - "p99": 217.02399849891663 - }, - "combine": { - "p50": 85.9839990735054, - "p90": 95.42399644851685, - "p95": 103.64799946546555, - "p99": 113.63200098276138 - }, - "roundtrip": { - "p50": 203.0400037765503, - "p90": 253.91998887062073, - "p95": 280.5759906768799, - "p99": 364.51199650764465 - }, - "isolatedSum": { - "p50": 206.27199858427048, - "p90": 255.35999238491058, - "p95": 280.288003385067, - "p99": 330.655999481678 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 32, - "globalTokens": 256, - "dispatch": { - "p50": 123.58400225639343, - "p90": 147.96799421310425, - "p95": 156.38400614261627, - "p99": 169.5680022239685 - }, - "combine": { - "p50": 91.67999774217606, - "p90": 106.20799660682678, - "p95": 115.99999666213989, - "p99": 126.97599828243256 - }, - "roundtrip": { - "p50": 195.96800208091736, - "p90": 235.07200181484222, - "p95": 244.35199797153473, - "p99": 258.87998938560486 - }, - "isolatedSum": { - "p50": 215.2639999985695, - "p90": 254.17599081993103, - "p95": 272.38400280475616, - "p99": 296.54400050640106 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 19726336, - "combineLogicalBytes": 19726336, - "fanoutMean": 5.375, - "recvTokensMax": 182, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 64, - "globalTokens": 512, - "dispatch": { - "p50": 136.00000739097595, - "p90": 157.24800527095795, - "p95": 164.89599645137787, - "p99": 197.37599790096283 - }, - "combine": { - "p50": 100.54399818181992, - "p90": 108.22399705648422, - "p95": 118.40000003576279, - "p99": 127.07200646400452 - }, - "roundtrip": { - "p50": 203.96800339221954, - "p90": 239.96800184249878, - "p95": 250.46399235725403, - "p99": 268.38400959968567 - }, - "isolatedSum": { - "p50": 236.54400557279587, - "p90": 265.47200232744217, - "p95": 283.29599648714066, - "p99": 324.44800436496735 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 38993920, - "combineLogicalBytes": 38993920, - "fanoutMean": 5.3125, - "recvTokensMax": 367, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 156.15999698638916, - "p90": 172.38399386405945, - "p95": 181.66400492191315, - "p99": 197.4720060825348 - }, - "combine": { - "p50": 119.1679984331131, - "p90": 133.18400084972382, - "p95": 142.84799993038177, - "p99": 152.96000242233276 - }, - "roundtrip": { - "p50": 237.69600689411163, - "p90": 256.0639977455139, - "p95": 266.01600646972656, - "p99": 278.2079875469208 - }, - "isolatedSum": { - "p50": 275.32799541950226, - "p90": 305.56799471378326, - "p95": 324.5120048522949, - "p99": 350.43200850486755 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-ec807828", - "identity": "h200|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", - "colorKey": "h200_c317e88d", - "comparisonKey": "4f6cbb2ad4892beb", - "schemaVersion": 3, - "generatedAt": "2026-06-27T17:36:28.990296+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "h200-dgxc-slurm_13", - "sku": "h200", - "backend": "uccl", - "phase": "prefill", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "epSize": 8, - "label": "H200 EP8 · uccl · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.1515, - "configuredUnits": 20, - "deviceUnits": 132, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 1, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "64d989e2e2a6b31", - "workloadId": "set:6:a426d66e479dc893", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28296668644", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296668644", - "createdAt": "2026-06-27T17:36:28.990296+00:00", - "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e" - }, - "rows": [ - { - "tokensPerRank": 128, - "globalTokens": 1024, - "dispatch": { - "p50": 159.71200168132782, - "p90": 192.60799884796143, - "p95": 207.8399956226349, - "p99": 266.6560113430023 - }, - "combine": { - "p50": 120.92799693346024, - "p90": 134.20799374580383, - "p95": 145.9839940071106, - "p99": 155.7759940624237 - }, - "roundtrip": { - "p50": 235.00800132751465, - "p90": 250.94398856163025, - "p95": 275.55200457572937, - "p99": 301.66399478912354 - }, - "isolatedSum": { - "p50": 280.63999861478806, - "p90": 326.81599259376526, - "p95": 353.8239896297455, - "p99": 422.432005405426 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 77672448, - "combineLogicalBytes": 77672448, - "fanoutMean": 5.291015625, - "recvTokensMax": 723, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 256, - "globalTokens": 2048, - "dispatch": { - "p50": 185.85599958896637, - "p90": 202.78400182724, - "p95": 209.82399582862854, - "p99": 239.71199989318848 - }, - "combine": { - "p50": 160.89600324630737, - "p90": 168.86399686336517, - "p95": 174.27200078964233, - "p99": 189.88800048828125 - }, - "roundtrip": { - "p50": 307.20001459121704, - "p90": 324.5759904384613, - "p95": 329.3440043926239, - "p99": 353.0240058898926 - }, - "isolatedSum": { - "p50": 346.75200283527374, - "p90": 371.64799869060516, - "p95": 384.0959966182709, - "p99": 429.6000003814697 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 155889664, - "combineLogicalBytes": 155889664, - "fanoutMean": 5.3095703125, - "recvTokensMax": 1422, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 512, - "globalTokens": 4096, - "dispatch": { - "p50": 239.29600417613983, - "p90": 249.24799799919128, - "p95": 254.97600436210632, - "p99": 267.2320008277893 - }, - "combine": { - "p50": 236.80000007152557, - "p90": 243.93600225448608, - "p95": 246.72000110149384, - "p99": 257.1200132369995 - }, - "roundtrip": { - "p50": 436.2879991531372, - "p90": 448.3200013637543, - "p95": 454.52800393104553, - "p99": 473.2159972190857 - }, - "isolatedSum": { - "p50": 476.0960042476654, - "p90": 493.18400025367737, - "p95": 501.69600546360016, - "p99": 524.3520140647888 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 312266752, - "combineLogicalBytes": 312266752, - "fanoutMean": 5.31787109375, - "recvTokensMax": 2779, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 1024, - "globalTokens": 8192, - "dispatch": { - "p50": 351.74399614334106, - "p90": 368.99200081825256, - "p95": 383.35999846458435, - "p99": 419.23201084136963 - }, - "combine": { - "p50": 371.7440068721771, - "p90": 381.72799348831177, - "p95": 388.3199989795685, - "p99": 399.26400780677795 - }, - "roundtrip": { - "p50": 682.9439997673035, - "p90": 696.7359781265259, - "p95": 707.647979259491, - "p99": 768.2560086250305 - }, - "isolatedSum": { - "p50": 723.4880030155182, - "p90": 750.7199943065643, - "p95": 771.6799974441528, - "p99": 818.4960186481476 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 623443968, - "combineLogicalBytes": 623443968, - "fanoutMean": 5.30859375, - "recvTokensMax": 5505, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2048, - "globalTokens": 16384, - "dispatch": { - "p50": 584.7359895706177, - "p90": 603.0399799346924, - "p95": 611.1680269241333, - "p99": 635.0719928741455 - }, - "combine": { - "p50": 632.9600214958191, - "p90": 644.3520188331604, - "p95": 648.0640172958374, - "p99": 671.2639927864075 - }, - "roundtrip": { - "p50": 1173.792004585266, - "p90": 1189.3759965896606, - "p95": 1196.7999935150146, - "p99": 1212.448000907898 - }, - "isolatedSum": { - "p50": 1217.6960110664368, - "p90": 1247.3919987678528, - "p95": 1259.2320442199707, - "p99": 1306.335985660553 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1243805696, - "combineLogicalBytes": 1243805696, - "fanoutMean": 5.29547119140625, - "recvTokensMax": 10952, - "stragglerRank": 7, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4096, - "globalTokens": 32768, - "dispatch": { - "p50": 1028.6400318145752, - "p90": 1050.5599975585938, - "p95": 1060.0320100784302, - "p99": 1135.2959871292114 - }, - "combine": { - "p50": 1139.7119760513306, - "p90": 1153.1200408935547, - "p95": 1158.5919857025146, - "p99": 1179.0399551391602 - }, - "roundtrip": { - "p50": 2122.623920440674, - "p90": 2145.440101623535, - "p95": 2151.3919830322266, - "p99": 2202.49605178833 - }, - "isolatedSum": { - "p50": 2168.3520078659058, - "p90": 2203.6800384521484, - "p95": 2218.623995780945, - "p99": 2314.3359422683716 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2487009280, - "combineLogicalBytes": 2487009280, - "fanoutMean": 5.294189453125, - "recvTokensMax": 21781, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-279043f8", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "5776ea979804ef91", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:08:32.534640+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_05", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "fp8-saturation", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272169530", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272169530", - "createdAt": "2026-06-27T00:08:32.534640+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.19999876618385, - "p90": 43.000999838113785, - "p95": 44.56000030040741, - "p99": 47.880999743938446 - }, - "combine": { - "p50": 17.760999500751495, - "p90": 19.360000267624855, - "p95": 20.959999412298203, - "p99": 23.080000653862953 - }, - "roundtrip": { - "p50": 56.04099854826927, - "p90": 59.00000035762787, - "p95": 60.201000422239304, - "p99": 62.24000081419945 - }, - "isolatedSum": { - "p50": 57.96099826693535, - "p90": 62.36100010573864, - "p95": 65.51999971270561, - "p99": 70.9610003978014 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.64099895954132, - "p90": 45.52000015974045, - "p95": 47.07999899983406, - "p99": 49.76100102066994 - }, - "combine": { - "p50": 16.599999740719795, - "p90": 18.60000006854534, - "p95": 19.79999989271164, - "p99": 23.080000653862953 - }, - "roundtrip": { - "p50": 58.96100029349327, - "p90": 62.39999830722809, - "p95": 64.32099640369415, - "p99": 102.64100134372711 - }, - "isolatedSum": { - "p50": 59.240998700261116, - "p90": 64.12000022828579, - "p95": 66.8799988925457, - "p99": 72.84100167453289 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.160000652074814, - "p90": 44.76099833846092, - "p95": 46.20100185275078, - "p99": 48.5600009560585 - }, - "combine": { - "p50": 19.759999588131905, - "p90": 21.27999998629093, - "p95": 22.5210003554821, - "p99": 25.200000032782555 - }, - "roundtrip": { - "p50": 62.001001089811325, - "p90": 65.32099843025208, - "p95": 66.16000086069107, - "p99": 69.15999948978424 - }, - "isolatedSum": { - "p50": 61.92000024020672, - "p90": 66.04099832475185, - "p95": 68.72200220823288, - "p99": 73.76000098884106 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.399998754262924, - "p90": 45.35999894142151, - "p95": 47.15999960899353, - "p99": 49.52000081539154 - }, - "combine": { - "p50": 20.880000665783882, - "p90": 23.08100089430809, - "p95": 24.04000051319599, - "p99": 26.441000401973724 - }, - "roundtrip": { - "p50": 62.52100318670273, - "p90": 65.64100086688995, - "p95": 66.56000018119812, - "p99": 68.84100288152695 - }, - "isolatedSum": { - "p50": 63.279999420046806, - "p90": 68.4409998357296, - "p95": 71.20000012218952, - "p99": 75.96100121736526 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.52000153064728, - "p90": 45.32000049948692, - "p95": 46.640001237392426, - "p99": 49.04000088572502 - }, - "combine": { - "p50": 25.599999353289604, - "p90": 27.799999341368675, - "p95": 29.239999130368233, - "p99": 31.520001590251923 - }, - "roundtrip": { - "p50": 67.63999909162521, - "p90": 70.60100138187408, - "p95": 71.68100029230118, - "p99": 74.36099648475647 - }, - "isolatedSum": { - "p50": 68.12000088393688, - "p90": 73.1199998408556, - "p95": 75.88000036776066, - "p99": 80.56000247597694 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-60c60832", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "3677ee6ace04ac65", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:53:59.155172+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_05", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28273516714", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714", - "createdAt": "2026-06-27T00:53:59.155172+00:00", - "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.6000018119812, - "p90": 43.76000165939331, - "p95": 45.239999890327454, - "p99": 54.71999943256378 - }, - "combine": { - "p50": 17.920000478625298, - "p90": 19.039999693632126, - "p95": 20.999999716877937, - "p99": 22.87999913096428 - }, - "roundtrip": { - "p50": 56.32000043988228, - "p90": 59.4400018453598, - "p95": 60.64099818468094, - "p99": 63.19999694824219 - }, - "isolatedSum": { - "p50": 58.5200022906065, - "p90": 62.800001353025436, - "p95": 66.23999960720539, - "p99": 77.59999856352806 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.64000058174133, - "p90": 45.35999894142151, - "p95": 46.76000028848648, - "p99": 50.23999884724617 - }, - "combine": { - "p50": 16.759999096393585, - "p90": 18.68000067770481, - "p95": 19.801000133156776, - "p99": 22.08000048995018 - }, - "roundtrip": { - "p50": 58.9199997484684, - "p90": 61.799999326467514, - "p95": 62.95999884605408, - "p99": 65.20000100135803 - }, - "isolatedSum": { - "p50": 59.39999967813492, - "p90": 64.03999961912632, - "p95": 66.56100042164326, - "p99": 72.31999933719635 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.44000092148781, - "p90": 45.281000435352325, - "p95": 46.4400015771389, - "p99": 47.919999808073044 - }, - "combine": { - "p50": 19.999999552965164, - "p90": 21.99999988079071, - "p95": 23.360000923275948, - "p99": 25.72000026702881 - }, - "roundtrip": { - "p50": 61.91999837756157, - "p90": 65.20099937915802, - "p95": 66.3599967956543, - "p99": 67.84100085496902 - }, - "isolatedSum": { - "p50": 62.44000047445297, - "p90": 67.28100031614304, - "p95": 69.80000250041485, - "p99": 73.64000007510185 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.44000092148781, - "p90": 45.00100016593933, - "p95": 46.88100144267082, - "p99": 49.27999898791313 - }, - "combine": { - "p50": 20.880000665783882, - "p90": 22.840000689029694, - "p95": 24.240000173449516, - "p99": 26.399999856948853 - }, - "roundtrip": { - "p50": 62.401000410318375, - "p90": 65.48000127077103, - "p95": 66.28099828958511, - "p99": 68.00000369548798 - }, - "isolatedSum": { - "p50": 63.32000158727169, - "p90": 67.84100085496902, - "p95": 71.12100161612034, - "p99": 75.67999884486198 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.520999908447266, - "p90": 45.1200008392334, - "p95": 46.59999907016754, - "p99": 49.04000088572502 - }, - "combine": { - "p50": 25.8799996227026, - "p90": 27.879999950528145, - "p95": 29.239999130368233, - "p99": 31.800001859664917 - }, - "roundtrip": { - "p50": 67.80099868774414, - "p90": 71.16000354290009, - "p95": 72.2000002861023, - "p99": 74.47999715805054 - }, - "isolatedSum": { - "p50": 68.40099953114986, - "p90": 73.00000078976154, - "p95": 75.83999820053577, - "p99": 80.84000274538994 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-f513e0f0", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "43eedfb9c3cc2b53", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:07:01.734617+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_01", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "small-amplitude", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272162006", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272162006", - "createdAt": "2026-06-27T00:07:01.734617+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.44099897146225, - "p90": 43.72100159525871, - "p95": 45.1200008392334, - "p99": 51.600001752376556 - }, - "combine": { - "p50": 15.960000455379486, - "p90": 18.160000443458557, - "p95": 19.279999658465385, - "p99": 21.159999072551727 - }, - "roundtrip": { - "p50": 55.56099861860275, - "p90": 58.75999853014946, - "p95": 60.120001435279846, - "p99": 63.63999843597412 - }, - "isolatedSum": { - "p50": 56.400999426841736, - "p90": 61.88100203871727, - "p95": 64.40000049769878, - "p99": 72.76000082492828 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.08099842071533, - "p90": 45.0810007750988, - "p95": 46.39999940991402, - "p99": 49.76100102066994 - }, - "combine": { - "p50": 16.00000075995922, - "p90": 18.60000006854534, - "p95": 19.55999992787838, - "p99": 21.920999512076378 - }, - "roundtrip": { - "p50": 58.32099914550781, - "p90": 61.64000183343887, - "p95": 63.600003719329834, - "p99": 67.59999692440033 - }, - "isolatedSum": { - "p50": 58.08099918067455, - "p90": 63.68100084364414, - "p95": 65.9599993377924, - "p99": 71.68200053274632 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 41.839998215436935, - "p90": 44.920001178979874, - "p95": 46.28000035881996, - "p99": 49.40100014209747 - }, - "combine": { - "p50": 19.31999996304512, - "p90": 21.75999991595745, - "p95": 22.5600004196167, - "p99": 24.43999983370304 - }, - "roundtrip": { - "p50": 60.80099940299988, - "p90": 64.03999775648117, - "p95": 65.56099653244019, - "p99": 69.92000341415405 - }, - "isolatedSum": { - "p50": 61.159998178482056, - "p90": 66.68000109493732, - "p95": 68.84000077843666, - "p99": 73.84099997580051 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.080000042915344, - "p90": 45.20000144839287, - "p95": 46.64099961519241, - "p99": 48.43999817967415 - }, - "combine": { - "p50": 20.16099914908409, - "p90": 22.280000150203705, - "p95": 23.04000034928322, - "p99": 24.960000067949295 - }, - "roundtrip": { - "p50": 62.199998646974564, - "p90": 65.36100059747696, - "p95": 66.72099977731705, - "p99": 68.71999800205231 - }, - "isolatedSum": { - "p50": 62.240999191999435, - "p90": 67.48000159859657, - "p95": 69.68099996447563, - "p99": 73.39999824762344 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.24099963903427, - "p90": 45.239999890327454, - "p95": 46.36099934577942, - "p99": 48.40100184082985 - }, - "combine": { - "p50": 24.639999493956566, - "p90": 26.88100002706051, - "p95": 27.881000190973282, - "p99": 30.079999938607216 - }, - "roundtrip": { - "p50": 67.47999787330627, - "p90": 70.60100138187408, - "p95": 72.28100299835205, - "p99": 75.20099729299545 - }, - "isolatedSum": { - "p50": 66.88099913299084, - "p90": 72.12099991738796, - "p95": 74.2419995367527, - "p99": 78.48100177943707 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 4, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-67074ab6", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "2ccb7553c969aafc", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:07:48.076161+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_06", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "wide-dynamic-range", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272165928", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272165928", - "createdAt": "2026-06-27T00:07:48.076161+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.240999311208725, - "p90": 43.43999922275543, - "p95": 44.76099833846092, - "p99": 48.11999946832657 - }, - "combine": { - "p50": 16.839999705553055, - "p90": 18.319999799132347, - "p95": 19.600000232458115, - "p99": 23.399999365210533 - }, - "roundtrip": { - "p50": 56.120000779628754, - "p90": 59.48000028729439, - "p95": 60.76100096106529, - "p99": 65.24000316858292 - }, - "isolatedSum": { - "p50": 57.08099901676178, - "p90": 61.75999902188778, - "p95": 64.36099857091904, - "p99": 71.5199988335371 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.44000092148781, - "p90": 45.48000171780586, - "p95": 46.51999846100807, - "p99": 49.19999837875366 - }, - "combine": { - "p50": 16.201000660657883, - "p90": 18.479999154806137, - "p95": 19.55999992787838, - "p99": 21.800000220537186 - }, - "roundtrip": { - "p50": 58.80099907517433, - "p90": 61.96000054478645, - "p95": 62.76000291109085, - "p99": 64.19999897480011 - }, - "isolatedSum": { - "p50": 58.64100158214569, - "p90": 63.960000872612, - "p95": 66.07999838888645, - "p99": 70.99999859929085 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.04000160098076, - "p90": 44.280000030994415, - "p95": 45.921001583337784, - "p99": 49.28100109100342 - }, - "combine": { - "p50": 19.039999693632126, - "p90": 21.51999995112419, - "p95": 22.801000624895096, - "p99": 24.560000747442245 - }, - "roundtrip": { - "p50": 61.601001769304276, - "p90": 64.92000073194504, - "p95": 66.00099802017212, - "p99": 67.72000342607498 - }, - "isolatedSum": { - "p50": 61.080001294612885, - "p90": 65.7999999821186, - "p95": 68.72200220823288, - "p99": 73.84100183844566 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.27999970316887, - "p90": 45.00000178813934, - "p95": 46.23999819159508, - "p99": 48.16000163555145 - }, - "combine": { - "p50": 20.320000126957893, - "p90": 23.32100085914135, - "p95": 25.439999997615814, - "p99": 57.88100138306618 - }, - "roundtrip": { - "p50": 62.3599998652935, - "p90": 65.0399997830391, - "p95": 66.0799965262413, - "p99": 68.00100207328796 - }, - "isolatedSum": { - "p50": 62.59999983012676, - "p90": 68.3210026472807, - "p95": 71.67999818921089, - "p99": 106.04100301861763 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.399998754262924, - "p90": 45.0810007750988, - "p95": 46.23999819159508, - "p99": 48.8400012254715 - }, - "combine": { - "p50": 25.120999664068222, - "p90": 27.2000003606081, - "p95": 28.161000460386276, - "p99": 30.319999903440475 - }, - "roundtrip": { - "p50": 67.63999909162521, - "p90": 70.79999893903732, - "p95": 71.68000191450119, - "p99": 73.72000068426132 - }, - "isolatedSum": { - "p50": 67.52099841833115, - "p90": 72.2810011357069, - "p95": 74.40099865198135, - "p99": 79.16000112891197 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-23f1ecd4", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||c774c8e4abb34da", - "colorKey": "mi355x_4ec24046", - "comparisonKey": "1ab1f06166250146", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:06:16.763261+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_02", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "zeros", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28272158268", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272158268", - "createdAt": "2026-06-27T00:06:16.763261+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.240999311208725, - "p90": 43.5199998319149, - "p95": 44.920001178979874, - "p99": 54.32000011205673 - }, - "combine": { - "p50": 17.680000513792038, - "p90": 19.401000812649727, - "p95": 20.759999752044678, - "p99": 23.80100078880787 - }, - "roundtrip": { - "p50": 56.040000170469284, - "p90": 59.12100151181221, - "p95": 60.47999858856201, - "p99": 63.040003180503845 - }, - "isolatedSum": { - "p50": 57.92099982500076, - "p90": 62.92100064456463, - "p95": 65.68000093102455, - "p99": 78.1210009008646 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.319998145103455, - "p90": 44.87999901175499, - "p95": 46.480998396873474, - "p99": 49.320999532938 - }, - "combine": { - "p50": 16.720000654459, - "p90": 18.240999430418015, - "p95": 19.401000812649727, - "p99": 23.240000009536743 - }, - "roundtrip": { - "p50": 58.479998260736465, - "p90": 61.879999935626984, - "p95": 62.880001962184906, - "p99": 65.99999964237213 - }, - "isolatedSum": { - "p50": 59.039998799562454, - "p90": 63.120998442173004, - "p95": 65.8819992095232, - "p99": 72.56099954247475 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 41.919998824596405, - "p90": 45.120999217033386, - "p95": 46.59999907016754, - "p99": 50.84000155329704 - }, - "combine": { - "p50": 19.79999989271164, - "p90": 21.27999998629093, - "p95": 23.16099964082241, - "p99": 25.400999933481216 - }, - "roundtrip": { - "p50": 61.51999905705452, - "p90": 64.40100073814392, - "p95": 65.80100208520889, - "p99": 68.24000179767609 - }, - "isolatedSum": { - "p50": 61.719998717308044, - "p90": 66.40099920332432, - "p95": 69.76099871098995, - "p99": 76.24100148677826 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.121000587940216, - "p90": 45.04000023007393, - "p95": 46.31999880075455, - "p99": 50.641000270843506 - }, - "combine": { - "p50": 21.04100026190281, - "p90": 22.95999974012375, - "p95": 24.6799997985363, - "p99": 26.920000091195107 - }, - "roundtrip": { - "p50": 62.20100075006485, - "p90": 66.39999896287918, - "p95": 68.59999895095825, - "p99": 95.88100016117096 - }, - "isolatedSum": { - "p50": 63.162000849843025, - "p90": 67.99999997019768, - "p95": 70.99999859929085, - "p99": 77.56100036203861 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.281001806259155, - "p90": 45.27999833226204, - "p95": 46.51999846100807, - "p99": 49.320001155138016 - }, - "combine": { - "p50": 25.919999927282333, - "p90": 28.080999851226807, - "p95": 29.559999704360962, - "p99": 32.35999867320061 - }, - "roundtrip": { - "p50": 67.31999665498734, - "p90": 70.2809989452362, - "p95": 71.40100002288818, - "p99": 74.16000217199326 - }, - "isolatedSum": { - "p50": 68.20100173354149, - "p90": 73.36099818348885, - "p95": 76.07999816536903, - "p99": 81.67999982833862 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-83a44089", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2c22646e864c27e", - "colorKey": "mi355x_eb5b377e", - "comparisonKey": "5bbe7a250a72d8b4", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:58:24.839410+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_01", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · balanced", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced", - "routingLabel": "balanced", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "2c22646e864c27e", - "workloadId": "set:5:7af12818400d6348", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271906612", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271906612", - "createdAt": "2026-06-26T23:58:24.839410+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.36099836230278, - "p90": 43.44100132584572, - "p95": 44.60100084543228, - "p99": 48.920001834630966 - }, - "combine": { - "p50": 16.3199994713068, - "p90": 18.880000337958336, - "p95": 19.88000050187111, - "p99": 21.880999207496643 - }, - "roundtrip": { - "p50": 57.20100179314613, - "p90": 60.63999980688095, - "p95": 61.72100082039833, - "p99": 64.56000357866287 - }, - "isolatedSum": { - "p50": 56.68099783360958, - "p90": 62.321001663804054, - "p95": 64.48100134730339, - "p99": 70.80100104212761 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 8, - "recvTokensMax": 8, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.64099895954132, - "p90": 45.680999755859375, - "p95": 47.2010001540184, - "p99": 49.47999864816666 - }, - "combine": { - "p50": 16.519999131560326, - "p90": 18.92000064253807, - "p95": 20.080000162124634, - "p99": 21.801000460982323 - }, - "roundtrip": { - "p50": 59.52100083231926, - "p90": 62.67999857664108, - "p95": 63.84100019931793, - "p99": 66.96099787950516 - }, - "isolatedSum": { - "p50": 59.160998091101646, - "p90": 64.60100039839745, - "p95": 67.28100031614304, - "p99": 71.28099910914898 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 8, - "recvTokensMax": 16, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.64000058174133, - "p90": 45.8809994161129, - "p95": 47.00100049376488, - "p99": 49.959998577833176 - }, - "combine": { - "p50": 20.759999752044678, - "p90": 23.600000888109207, - "p95": 24.480000138282776, - "p99": 26.760000735521317 - }, - "roundtrip": { - "p50": 64.12000209093094, - "p90": 67.08099693059921, - "p95": 67.88100302219391, - "p99": 70.36100327968597 - }, - "isolatedSum": { - "p50": 63.40000033378601, - "p90": 69.4810003042221, - "p95": 71.48100063204765, - "p99": 76.71999931335449 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3670016, - "combineLogicalBytes": 3670016, - "fanoutMean": 8, - "recvTokensMax": 32, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.7200011909008, - "p90": 45.88000103831291, - "p95": 47.36100137233734, - "p99": 49.60000142455101 - }, - "combine": { - "p50": 22.679999470710754, - "p90": 25.280000641942024, - "p95": 26.159999892115593, - "p99": 27.240000665187836 - }, - "roundtrip": { - "p50": 65.72099775075912, - "p90": 68.64099949598312, - "p95": 69.64000314474106, - "p99": 72.2000002861023 - }, - "isolatedSum": { - "p50": 65.40000066161156, - "p90": 71.16000168025494, - "p95": 73.52100126445293, - "p99": 76.84000208973885 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 7340032, - "combineLogicalBytes": 7340032, - "fanoutMean": 8, - "recvTokensMax": 64, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.87999868392944, - "p90": 45.88000103831291, - "p95": 46.959999948740005, - "p99": 48.79999905824661 - }, - "combine": { - "p50": 28.119999915361404, - "p90": 30.44000081717968, - "p95": 31.401000916957855, - "p99": 33.640000969171524 - }, - "roundtrip": { - "p50": 71.80000096559525, - "p90": 75.15999674797058, - "p95": 76.39999687671661, - "p99": 78.31999659538269 - }, - "isolatedSum": { - "p50": 70.99999859929085, - "p90": 76.32000185549259, - "p95": 78.36100086569786, - "p99": 82.44000002741814 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 14680064, - "combineLogicalBytes": 14680064, - "fanoutMean": 8, - "recvTokensMax": 128, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-c1291ad7", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||15d7289bb70ed17", - "colorKey": "mi355x_ae729691", - "comparisonKey": "730c294e090417f2", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:10.167624+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_06", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · balanced-rank-local", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "balanced-rank-local", - "routingLabel": "balanced-rank-local", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "15d7289bb70ed17", - "workloadId": "set:5:2eebbed158fe1320", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271910050", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271910050", - "createdAt": "2026-06-26T23:59:10.167624+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 36.80099919438362, - "p90": 39.80100154876709, - "p95": 40.76100140810013, - "p99": 43.63999888300896 - }, - "combine": { - "p50": 15.320000238716602, - "p90": 17.480000853538513, - "p95": 18.68000067770481, - "p99": 20.999999716877937 - }, - "roundtrip": { - "p50": 49.07999932765961, - "p90": 51.80000141263008, - "p95": 52.76099964976311, - "p99": 53.76100167632103 - }, - "isolatedSum": { - "p50": 52.12099943310022, - "p90": 57.2810024023056, - "p95": 59.44100208580494, - "p99": 64.6399985998869 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 114688, - "combineLogicalBytes": 114688, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 38.07999938726425, - "p90": 40.39999842643738, - "p95": 41.20099917054176, - "p99": 42.80000180006027 - }, - "combine": { - "p50": 15.799999237060547, - "p90": 17.999999225139618, - "p95": 19.279999658465385, - "p99": 21.040000021457672 - }, - "roundtrip": { - "p50": 51.600001752376556, - "p90": 53.92000079154968, - "p95": 55.24099990725517, - "p99": 57.32100084424019 - }, - "isolatedSum": { - "p50": 53.8799986243248, - "p90": 58.399997651576996, - "p95": 60.48099882900715, - "p99": 63.840001821517944 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 229376, - "combineLogicalBytes": 229376, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 32.71999955177307, - "p90": 35.5600006878376, - "p95": 36.559998989105225, - "p99": 39.000000804662704 - }, - "combine": { - "p50": 13.72000016272068, - "p90": 15.799999237060547, - "p95": 16.599999740719795, - "p99": 18.120000138878822 - }, - "roundtrip": { - "p50": 45.71999981999397, - "p90": 49.04000088572502, - "p95": 49.96100068092346, - "p99": 51.44000053405762 - }, - "isolatedSum": { - "p50": 46.43999971449375, - "p90": 51.35999992489815, - "p95": 53.15999872982502, - "p99": 57.12000094354153 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 458752, - "combineLogicalBytes": 458752, - "fanoutMean": 1, - "recvTokensMax": 4, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 37.84099966287613, - "p90": 40.92000052332878, - "p95": 41.999999433755875, - "p99": 43.880000710487366 - }, - "combine": { - "p50": 14.919999986886978, - "p90": 17.27999933063984, - "p95": 18.039999529719353, - "p99": 19.55999992787838 - }, - "roundtrip": { - "p50": 52.241001278162, - "p90": 55.75999990105629, - "p95": 56.68000131845474, - "p99": 58.35999920964241 - }, - "isolatedSum": { - "p50": 52.76099964976311, - "p90": 58.19999985396862, - "p95": 60.03999896347523, - "p99": 63.440000638365746 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 917504, - "combineLogicalBytes": 917504, - "fanoutMean": 1, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 38.24099898338318, - "p90": 40.92000052332878, - "p95": 41.839998215436935, - "p99": 44.16000097990036 - }, - "combine": { - "p50": 16.24000072479248, - "p90": 18.841000273823738, - "p95": 19.88000050187111, - "p99": 22.280000150203705 - }, - "roundtrip": { - "p50": 54.28000167012215, - "p90": 57.840000838041306, - "p95": 58.800000697374344, - "p99": 60.96100062131882 - }, - "isolatedSum": { - "p50": 54.48099970817566, - "p90": 59.76100079715252, - "p95": 61.719998717308044, - "p99": 66.44000113010406 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1835008, - "combineLogicalBytes": 1835008, - "fanoutMean": 1, - "recvTokensMax": 16, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-ace78f17", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||c8b7839b4895c1a", - "colorKey": "mi355x_62dc5cd4", - "comparisonKey": "316ae2638347880f", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:01:29.418642+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_00", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · hotspot-single", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "hotspot-single", - "routingLabel": "hotspot-single", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c8b7839b4895c1a", - "workloadId": "set:5:286be993cd819ed9", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271920340", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271920340", - "createdAt": "2026-06-27T00:01:29.418642+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 39.84000161290169, - "p90": 42.55999997258186, - "p95": 44.08000037074089, - "p99": 48.601001501083374 - }, - "combine": { - "p50": 16.200000420212746, - "p90": 17.960000783205032, - "p95": 19.07999999821186, - "p99": 21.640000864863396 - }, - "roundtrip": { - "p50": 55.44000118970871, - "p90": 58.27999860048294, - "p95": 59.20099839568138, - "p99": 60.920000076293945 - }, - "isolatedSum": { - "p50": 56.04000203311443, - "p90": 60.520000755786896, - "p95": 63.16000036895275, - "p99": 70.24100236594677 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 602112, - "combineLogicalBytes": 602112, - "fanoutMean": 5.25, - "recvTokensMax": 8, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.1609990298748, - "p90": 44.920001178979874, - "p95": 45.80099880695343, - "p99": 47.800999134778976 - }, - "combine": { - "p50": 16.07999950647354, - "p90": 18.401000648736954, - "p95": 19.279999658465385, - "p99": 20.880000665783882 - }, - "roundtrip": { - "p50": 58.35999920964241, - "p90": 61.56099960207939, - "p95": 62.60000169277191, - "p99": 64.7599995136261 - }, - "isolatedSum": { - "p50": 58.24099853634834, - "p90": 63.32100182771683, - "p95": 65.08099846541882, - "p99": 68.68099980056286 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1247232, - "combineLogicalBytes": 1247232, - "fanoutMean": 5.4375, - "recvTokensMax": 16, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 41.96000099182129, - "p90": 44.599998742341995, - "p95": 45.96000164747238, - "p99": 48.16100001335144 - }, - "combine": { - "p50": 19.401000812649727, - "p90": 21.880000829696655, - "p95": 23.080000653862953, - "p99": 24.12099950015545 - }, - "roundtrip": { - "p50": 61.68099865317345, - "p90": 65.20099937915802, - "p95": 65.99999964237213, - "p99": 67.4000009894371 - }, - "isolatedSum": { - "p50": 61.361001804471016, - "p90": 66.47999957203865, - "p95": 69.04000230133533, - "p99": 72.28199951350689 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2451456, - "combineLogicalBytes": 2451456, - "fanoutMean": 5.34375, - "recvTokensMax": 32, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 41.80099815130234, - "p90": 44.2809984087944, - "p95": 45.559998601675034, - "p99": 48.39999973773956 - }, - "combine": { - "p50": 21.239999681711197, - "p90": 23.19999970495701, - "p95": 24.080000817775726, - "p99": 26.040000841021538 - }, - "roundtrip": { - "p50": 62.960997223854065, - "p90": 66.041000187397, - "p95": 66.91999733448029, - "p99": 68.71999800205231 - }, - "isolatedSum": { - "p50": 63.040997833013535, - "p90": 67.48099811375141, - "p95": 69.63999941945076, - "p99": 74.4400005787611 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4859904, - "combineLogicalBytes": 4859904, - "fanoutMean": 5.296875, - "recvTokensMax": 64, - "stragglerRank": 6, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.1609990298748, - "p90": 45.00000178813934, - "p95": 45.96000164747238, - "p99": 50.40000006556511 - }, - "combine": { - "p50": 26.599999517202377, - "p90": 28.68100069463253, - "p95": 29.96000088751316, - "p99": 31.720001250505447 - }, - "roundtrip": { - "p50": 69.20100003480911, - "p90": 71.76099717617035, - "p95": 72.7199986577034, - "p99": 74.16000217199326 - }, - "isolatedSum": { - "p50": 68.76099854707718, - "p90": 73.68100248277187, - "p95": 75.92000253498554, - "p99": 82.12000131607056 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9748480, - "combineLogicalBytes": 9748480, - "fanoutMean": 5.3125, - "recvTokensMax": 128, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2129d47b", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||4d5546b3fb85130", - "colorKey": "mi355x_570d6605", - "comparisonKey": "1ea3da47c00f36f8", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:59:55.992554+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_07", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · zipf", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf", - "routingLabel": "zipf", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "4d5546b3fb85130", - "workloadId": "set:5:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271913592", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271913592", - "createdAt": "2026-06-26T23:59:55.992554+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 39.03999924659729, - "p90": 41.76099970936775, - "p95": 43.40000078082085, - "p99": 47.15999960899353 - }, - "combine": { - "p50": 16.359999775886536, - "p90": 18.519999459385872, - "p95": 20.12000046670437, - "p99": 23.40099960565567 - }, - "roundtrip": { - "p50": 53.95999923348427, - "p90": 57.20100179314613, - "p95": 58.75999853014946, - "p99": 61.20099872350693 - }, - "isolatedSum": { - "p50": 55.399999022483826, - "p90": 60.280999168753624, - "p95": 63.520001247525215, - "p99": 70.5609992146492 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 444416, - "combineLogicalBytes": 444416, - "fanoutMean": 3.875, - "recvTokensMax": 8, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 40.640998631715775, - "p90": 43.99999976158142, - "p95": 44.840000569820404, - "p99": 48.0009987950325 - }, - "combine": { - "p50": 16.519999131560326, - "p90": 18.561000004410744, - "p95": 20.24099975824356, - "p99": 23.520000278949738 - }, - "roundtrip": { - "p50": 55.52000179886818, - "p90": 59.321001172065735, - "p95": 60.72099879384041, - "p99": 68.88099759817123 - }, - "isolatedSum": { - "p50": 57.1609977632761, - "p90": 62.560999765992165, - "p95": 65.08100032806396, - "p99": 71.52099907398224 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 845824, - "combineLogicalBytes": 845824, - "fanoutMean": 3.6875, - "recvTokensMax": 16, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 41.28099977970123, - "p90": 44.16000097990036, - "p95": 45.00000178813934, - "p99": 47.68000170588493 - }, - "combine": { - "p50": 17.640000209212303, - "p90": 20.160000771284103, - "p95": 21.479999646544456, - "p99": 24.6799997985363 - }, - "roundtrip": { - "p50": 59.04100090265274, - "p90": 63.07999789714813, - "p95": 64.87999856472015, - "p99": 68.83999705314636 - }, - "isolatedSum": { - "p50": 58.920999988913536, - "p90": 64.32000175118446, - "p95": 66.4800014346838, - "p99": 72.36000150442123 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1691648, - "combineLogicalBytes": 1691648, - "fanoutMean": 3.6875, - "recvTokensMax": 32, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 41.919998824596405, - "p90": 44.801000505685806, - "p95": 46.84000089764595, - "p99": 50.880998373031616 - }, - "combine": { - "p50": 19.600000232458115, - "p90": 22.120000794529915, - "p95": 23.520000278949738, - "p99": 26.799999177455902 - }, - "roundtrip": { - "p50": 61.000000685453415, - "p90": 64.56000357866287, - "p95": 65.88099896907806, - "p99": 69.52100247144699 - }, - "isolatedSum": { - "p50": 61.51999905705452, - "p90": 66.92100130021572, - "p95": 70.36000117659569, - "p99": 77.68099755048752 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3354624, - "combineLogicalBytes": 3354624, - "fanoutMean": 3.65625, - "recvTokensMax": 64, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 41.839998215436935, - "p90": 44.920001178979874, - "p95": 46.92000150680542, - "p99": 50.1599982380867 - }, - "combine": { - "p50": 24.481000378727913, - "p90": 27.720000594854355, - "p95": 30.561000108718872, - "p99": 59.321001172065735 - }, - "roundtrip": { - "p50": 66.23999774456024, - "p90": 69.36100125312805, - "p95": 70.47999650239944, - "p99": 73.36000353097916 - }, - "isolatedSum": { - "p50": 66.32099859416485, - "p90": 72.64000177383423, - "p95": 77.48100161552429, - "p99": 109.48099941015244 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 6537216, - "combineLogicalBytes": 6537216, - "fanoutMean": 3.5625, - "recvTokensMax": 127, - "stragglerRank": 3, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-47886ba2", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||5c00b1a0c13aa3e", - "colorKey": "mi355x_6fd30e97", - "comparisonKey": "41d88b5d4da0110a", - "schemaVersion": 3, - "generatedAt": "2026-06-27T00:00:43.491121+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_03", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · zipf-heavy", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "zipf-heavy", - "routingLabel": "zipf-heavy", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "5c00b1a0c13aa3e", - "workloadId": "set:5:6b84350720aa8233", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271916622", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271916622", - "createdAt": "2026-06-27T00:00:43.491121+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 38.32000121474266, - "p90": 40.28100147843361, - "p95": 41.600000113248825, - "p99": 46.31999880075455 - }, - "combine": { - "p50": 15.720000490546227, - "p90": 17.03999936580658, - "p95": 18.640000373125076, - "p99": 20.800000056624413 - }, - "roundtrip": { - "p50": 51.16099864244461, - "p90": 53.55999991297722, - "p95": 54.96000126004219, - "p99": 57.760998606681824 - }, - "isolatedSum": { - "p50": 54.04000170528889, - "p90": 57.32100084424019, - "p95": 60.2400004863739, - "p99": 67.11999885737896 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 172032, - "combineLogicalBytes": 172032, - "fanoutMean": 1.5, - "recvTokensMax": 8, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 40.47999903559685, - "p90": 42.64099895954132, - "p95": 44.47999969124794, - "p99": 48.760998994112015 - }, - "combine": { - "p50": 16.00099913775921, - "p90": 17.160000279545784, - "p95": 18.039999529719353, - "p99": 20.800000056624413 - }, - "roundtrip": { - "p50": 53.16000059247017, - "p90": 56.07999861240387, - "p95": 57.64099955558777, - "p99": 60.08100137114525 - }, - "isolatedSum": { - "p50": 56.480998173356056, - "p90": 59.800999239087105, - "p95": 62.51999922096729, - "p99": 69.56099905073643 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 315392, - "combineLogicalBytes": 315392, - "fanoutMean": 1.375, - "recvTokensMax": 16, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 41.200000792741776, - "p90": 43.241001665592194, - "p95": 44.52100023627281, - "p99": 48.280999064445496 - }, - "combine": { - "p50": 17.240000888705254, - "p90": 18.519999459385872, - "p95": 20.19999921321869, - "p99": 22.5210003554821 - }, - "roundtrip": { - "p50": 56.561000645160675, - "p90": 59.241000562906265, - "p95": 60.440998524427414, - "p99": 64.4410029053688 - }, - "isolatedSum": { - "p50": 58.44000168144703, - "p90": 61.761001124978065, - "p95": 64.7209994494915, - "p99": 70.8019994199276 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 616448, - "combineLogicalBytes": 616448, - "fanoutMean": 1.34375, - "recvTokensMax": 32, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 41.31999984383583, - "p90": 43.28100010752678, - "p95": 44.679999351501465, - "p99": 46.480000019073486 - }, - "combine": { - "p50": 18.8400000333786, - "p90": 20.041000097990036, - "p95": 21.240999922156334, - "p99": 24.441000074148178 - }, - "roundtrip": { - "p50": 58.761000633239746, - "p90": 61.43999844789505, - "p95": 63.1600022315979, - "p99": 65.52000343799591 - }, - "isolatedSum": { - "p50": 60.15999987721443, - "p90": 63.322000205516815, - "p95": 65.9209992736578, - "p99": 70.92100009322166 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1376256, - "combineLogicalBytes": 1376256, - "fanoutMean": 1.5, - "recvTokensMax": 64, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 41.40099883079529, - "p90": 43.480001389980316, - "p95": 44.440001249313354, - "p99": 46.00000008940697 - }, - "combine": { - "p50": 22.87999913096428, - "p90": 24.6799997985363, - "p95": 26.559999212622643, - "p99": 29.40100058913231 - }, - "roundtrip": { - "p50": 63.19999694824219, - "p90": 65.76000154018402, - "p95": 67.28000193834305, - "p99": 69.64100152254105 - }, - "isolatedSum": { - "p50": 64.28099796175957, - "p90": 68.16000118851662, - "p95": 71.000000461936, - "p99": 75.40100067853928 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2781184, - "combineLogicalBytes": 2781184, - "fanoutMean": 1.515625, - "recvTokensMax": 128, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-8d163d45", - "identity": "mi355x|mori|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||d42040086b5de07", - "colorKey": "mi355x_65e339f9", - "comparisonKey": "2ba4cba3af48c2b3", - "schemaVersion": 3, - "generatedAt": "2026-06-26T23:39:01.384245+00:00", - "status": "valid", - "publicationStatus": "official", - "runner": "mi355x-amds_07", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "tuned", - "suite": "backend-default", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 · zipf+eplb", - "model": "DeepSeek-V3 (EPLB physical)", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 288, - "routing": "zipf", - "routingLabel": "zipf+eplb", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": true, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": null, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "backend-tuned", - "conformanceClass": "backend-default", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "d42040086b5de07", - "workloadId": "set:5:f5576e2b712d38c3", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": 4.875, - "eplbImbalanceAfter": 1.0033482142857144, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28271245352", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271245352", - "createdAt": "2026-06-26T23:39:01.384245+00:00", - "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 39.319999516010284, - "p90": 42.11999848484993, - "p95": 43.15999895334244, - "p99": 46.52100056409836 - }, - "combine": { - "p50": 15.399999916553497, - "p90": 17.601000145077705, - "p95": 18.75999942421913, - "p99": 21.320000290870667 - }, - "roundtrip": { - "p50": 54.23999950289726, - "p90": 57.440001517534256, - "p95": 58.921001851558685, - "p99": 60.95999851822853 - }, - "isolatedSum": { - "p50": 54.71999943256378, - "p90": 59.720998629927635, - "p95": 61.91999837756157, - "p99": 67.84100085496902 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 587776, - "combineLogicalBytes": 587776, - "fanoutMean": 5.125, - "recvTokensMax": 7, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 41.71999916434288, - "p90": 44.84099894762039, - "p95": 46.4400015771389, - "p99": 49.15999993681908 - }, - "combine": { - "p50": 15.599999576807022, - "p90": 17.839999869465828, - "p95": 19.88000050187111, - "p99": 22.5600004196167 - }, - "roundtrip": { - "p50": 57.08099901676178, - "p90": 60.67999824881554, - "p95": 61.59999966621399, - "p99": 63.48100304603577 - }, - "isolatedSum": { - "p50": 57.3199987411499, - "p90": 62.68099881708622, - "p95": 66.32000207901001, - "p99": 71.72000035643578 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1161216, - "combineLogicalBytes": 1161216, - "fanoutMean": 5.0625, - "recvTokensMax": 13, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 41.88000038266182, - "p90": 44.08099874854088, - "p95": 45.120999217033386, - "p99": 48.239998519420624 - }, - "combine": { - "p50": 18.719999119639397, - "p90": 21.04100026190281, - "p95": 22.760000079870224, - "p99": 26.760000735521317 - }, - "roundtrip": { - "p50": 61.43999844789505, - "p90": 64.43999707698822, - "p95": 65.68100303411484, - "p99": 67.87999719381332 - }, - "isolatedSum": { - "p50": 60.599999502301216, - "p90": 65.12199901044369, - "p95": 67.88099929690361, - "p99": 74.99999925494194 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2394112, - "combineLogicalBytes": 2394112, - "fanoutMean": 5.21875, - "recvTokensMax": 23, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 41.839998215436935, - "p90": 44.79999840259552, - "p95": 46.23999819159508, - "p99": 48.36000129580498 - }, - "combine": { - "p50": 21.199999377131462, - "p90": 22.95999974012375, - "p95": 24.19999986886978, - "p99": 26.040000841021538 - }, - "roundtrip": { - "p50": 61.51999905705452, - "p90": 64.92000073194504, - "p95": 65.92000275850296, - "p99": 68.08000057935715 - }, - "isolatedSum": { - "p50": 63.0399975925684, - "p90": 67.75999814271927, - "p95": 70.43999806046486, - "p99": 74.40000213682652 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4845568, - "combineLogicalBytes": 4845568, - "fanoutMean": 5.28125, - "recvTokensMax": 45, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.319998145103455, - "p90": 44.759999960660934, - "p95": 46.28000035881996, - "p99": 49.240998923778534 - }, - "combine": { - "p50": 24.879999458789825, - "p90": 27.079999446868896, - "p95": 28.440000489354134, - "p99": 56.88000097870827 - }, - "roundtrip": { - "p50": 66.3599967956543, - "p90": 69.95999813079834, - "p95": 70.91999799013138, - "p99": 73.00099730491638 - }, - "isolatedSum": { - "p50": 67.19999760389328, - "p90": 71.83999940752983, - "p95": 74.7200008481741, - "p99": 106.1209999024868 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9676800, - "combineLogicalBytes": 9676800, - "fanoutMean": 5.2734375, - "recvTokensMax": 88, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-2d0599c0", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", - "colorKey": "mi355x_2fa43515", - "comparisonKey": "2796ed88af4b14b0", - "schemaVersion": 3, - "generatedAt": "2026-06-26T15:40:45.756534+00:00", - "status": "valid", - "publicationStatus": "diagnostic", - "runner": "mi355x-amds_04", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "epSize": 8, - "label": "MI355X EP8 · mori · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": 0.3125, - "configuredUnits": 80, - "deviceUnits": 256, - "resourceClass": "unknown", - "conformanceClass": "minimum-functional", - "fixedKernel": false, - "paretoEligible": false - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 8, - "scaleUpDomain": 8 - }, - "routingConsistent": true, - "traceSignature": "c774c8e4abb34da", - "workloadId": "set:5:d8d49658059863f2", - "workloadSource": "canonical-serialized", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28247575150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", - "createdAt": "2026-06-26T15:40:45.756534+00:00", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 8, - "dispatch": { - "p50": 40.55999964475632, - "p90": 43.15999895334244, - "p95": 44.881001114845276, - "p99": 47.55999892950058 - }, - "combine": { - "p50": 16.119999811053276, - "p90": 18.719999119639397, - "p95": 19.840000197291374, - "p99": 22.520000115036964 - }, - "roundtrip": { - "p50": 56.040000170469284, - "p90": 59.20000001788139, - "p95": 60.80099940299988, - "p99": 63.120998442173004 - }, - "isolatedSum": { - "p50": 56.67999945580959, - "p90": 61.879998072981834, - "p95": 64.72100131213665, - "p99": 70.07999904453754 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 630784, - "combineLogicalBytes": 630784, - "fanoutMean": 5.5, - "recvTokensMax": 7, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 2, - "globalTokens": 16, - "dispatch": { - "p50": 42.55999997258186, - "p90": 45.441001653671265, - "p95": 47.040000557899475, - "p99": 49.959998577833176 - }, - "combine": { - "p50": 16.16000011563301, - "p90": 18.360000103712082, - "p95": 19.600000232458115, - "p99": 22.63999916613102 - }, - "roundtrip": { - "p50": 58.83999913930893, - "p90": 61.88099831342697, - "p95": 63.48100304603577, - "p99": 65.40100276470184 - }, - "isolatedSum": { - "p50": 58.720000088214874, - "p90": 63.80100175738335, - "p95": 66.64000079035759, - "p99": 72.5999977439642 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1232896, - "combineLogicalBytes": 1232896, - "fanoutMean": 5.375, - "recvTokensMax": 13, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 4, - "globalTokens": 32, - "dispatch": { - "p50": 42.160000652074814, - "p90": 44.840000569820404, - "p95": 46.28000035881996, - "p99": 49.84100162982941 - }, - "combine": { - "p50": 19.039999693632126, - "p90": 22.1599992364645, - "p95": 23.48100021481514, - "p99": 54.63999882340431 - }, - "roundtrip": { - "p50": 61.59999966621399, - "p90": 64.71999734640121, - "p95": 65.76000154018402, - "p99": 68.36000084877014 - }, - "isolatedSum": { - "p50": 61.20000034570694, - "p90": 66.9999998062849, - "p95": 69.7610005736351, - "p99": 104.48100045323372 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 2480128, - "combineLogicalBytes": 2480128, - "fanoutMean": 5.40625, - "recvTokensMax": 29, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 8, - "globalTokens": 64, - "dispatch": { - "p50": 42.52000153064728, - "p90": 45.1200008392334, - "p95": 46.080999076366425, - "p99": 48.8400012254715 - }, - "combine": { - "p50": 20.479999482631683, - "p90": 22.520000115036964, - "p95": 23.479999974370003, - "p99": 25.800000876188278 - }, - "roundtrip": { - "p50": 62.67999857664108, - "p90": 65.5599981546402, - "p95": 66.880002617836, - "p99": 68.56100261211395 - }, - "isolatedSum": { - "p50": 63.00000101327896, - "p90": 67.64000095427036, - "p95": 69.56099905073643, - "p99": 74.64000210165977 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 4974592, - "combineLogicalBytes": 4974592, - "fanoutMean": 5.421875, - "recvTokensMax": 47, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - }, - { - "tokensPerRank": 16, - "globalTokens": 128, - "dispatch": { - "p50": 42.67999902367592, - "p90": 45.27999833226204, - "p95": 46.799998730421066, - "p99": 49.720000475645065 - }, - "combine": { - "p50": 24.921000003814697, - "p90": 27.240000665187836, - "p95": 28.07999961078167, - "p99": 30.27999959886074 - }, - "roundtrip": { - "p50": 67.9209977388382, - "p90": 71.04100286960602, - "p95": 72.12000340223312, - "p99": 74.08100366592407 - }, - "isolatedSum": { - "p50": 67.60099902749062, - "p90": 72.51999899744987, - "p95": 74.87999834120274, - "p99": 80.0000000745058 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 9920512, - "combineLogicalBytes": 9920512, - "fanoutMean": 5.40625, - "recvTokensMax": 92, - "stragglerRank": 0, - "correct": true, - "samplesPooled": 600, - "trials": 3 - } - ] - }, - { - "id": "cx-cd519ebd", - "identity": "mi355x|nccl-ep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|16|decode|normal|none|none|0|normalized|0.18|1a4734625a379e3", - "colorKey": "mi355x_1180f01d", - "comparisonKey": "919b62d5ead26bb1", - "schemaVersion": 3, - "generatedAt": "2026-06-28T17:32:29.450290+00:00", - "status": "valid", - "publicationStatus": "comparable-experimental", - "runner": "mi355x-amds_04", - "sku": "mi355x", - "backend": "nccl-ep", - "phase": "decode", - "mode": "normal", - "resourceMode": "normalized", - "suite": "resource-constrained", - "comparisonClass": "standardized", - "measurementContract": "layout-and-dispatch-v1", - "topologyClass": "mi355x-multinode-rdma", - "transport": "rdma", - "worldSize": 16, - "epSize": 16, - "label": "MI355X EP16 · nccl-ep · bf16 (norm)", - "model": "DeepSeek-V3/V4", - "shape": { - "hidden": 7168, - "topk": 8, - "experts": 256, - "routing": "uniform", - "routingLabel": "uniform", - "routingStep": 0, - "unevenTokens": "none", - "eplbEnabled": false, - "dispatchDtype": "bf16", - "activationProfile": "normal", - "combineQuantMode": "none" - }, - "resourceProfile": { - "requestedFraction": 0.18, - "achievedFraction": null, - "configuredUnits": null, - "deviceUnits": 256, - "resourceClass": "resource-constrained", - "conformanceClass": "resource-conforming", - "fixedKernel": false, - "paretoEligible": true - }, - "placement": { - "kind": "packed", - "nodes": 2, - "gpusPerNode": 16, - "scaleUpDomain": 16 - }, - "routingConsistent": true, - "traceSignature": "1a4734625a379e3", - "workloadId": null, - "workloadSource": "seeded-runtime", - "eplbImbalanceBefore": null, - "eplbImbalanceAfter": null, - "backendVersion": null, - "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", - "repository": "SemiAnalysisAI/InferenceX", - "run": { - "id": "28328718973", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28328718973", - "createdAt": "2026-06-28T17:32:29.450290+00:00", - "sha": "41135333c6788fca7a4051185dfbb3a850649ed5" - }, - "rows": [ - { - "tokensPerRank": 1, - "globalTokens": 16, - "dispatch": { - "p50": 360.44201254844666, - "p90": 381.12300634384155, - "p95": 1227.2510528564453, - "p99": 1227.2510528564453 - }, - "combine": { - "p50": 120.64100056886673, - "p90": 125.08100271224976, - "p95": 169.5210039615631, - "p99": 169.5210039615631 - }, - "roundtrip": { - "p50": 445.8029866218567, - "p90": 475.7640063762665, - "p95": 482.00398683547974, - "p99": 482.00398683547974 - }, - "isolatedSum": { - "p50": 481.0830131173134, - "p90": 506.2040090560913, - "p95": 1396.7720568180084, - "p99": 1396.7720568180084 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 1505280, - "combineLogicalBytes": 1505280, - "fanoutMean": 6.5625, - "recvTokensMax": 12, - "stragglerRank": 15, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 2, - "globalTokens": 32, - "dispatch": { - "p50": 345.1229929924011, - "p90": 452.7229964733124, - "p95": 493.32401156425476, - "p99": 493.32401156425476 - }, - "combine": { - "p50": 124.20099973678589, - "p90": 149.48099851608276, - "p95": 168.08100044727325, - "p99": 168.08100044727325 - }, - "roundtrip": { - "p50": 448.28298687934875, - "p90": 470.24399042129517, - "p95": 487.1650040149689, - "p99": 487.1650040149689 - }, - "isolatedSum": { - "p50": 469.323992729187, - "p90": 602.2039949893951, - "p95": 661.405012011528, - "p99": 661.405012011528 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 3067904, - "combineLogicalBytes": 3067904, - "fanoutMean": 6.6875, - "recvTokensMax": 24, - "stragglerRank": 5, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 4, - "globalTokens": 64, - "dispatch": { - "p50": 352.80299186706543, - "p90": 358.8019907474518, - "p95": 375.04300475120544, - "p99": 375.04300475120544 - }, - "combine": { - "p50": 128.24100255966187, - "p90": 134.04099643230438, - "p95": 137.12100684642792, - "p99": 137.12100684642792 - }, - "roundtrip": { - "p50": 448.76399636268616, - "p90": 456.76299929618835, - "p95": 464.20300006866455, - "p99": 464.20300006866455 - }, - "isolatedSum": { - "p50": 481.0439944267273, - "p90": 492.84298717975616, - "p95": 512.1640115976334, - "p99": 512.1640115976334 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 5992448, - "combineLogicalBytes": 5992448, - "fanoutMean": 6.53125, - "recvTokensMax": 43, - "stragglerRank": 1, - "correct": true, - "samplesPooled": 8, - "trials": 1 - }, - { - "tokensPerRank": 8, - "globalTokens": 128, - "dispatch": { - "p50": 430.6829869747162, - "p90": 1308.8120222091675, - "p95": 1478.0919551849365, - "p99": 1478.0919551849365 - }, - "combine": { - "p50": 140.1209980249405, - "p90": 159.64199602603912, - "p95": 194.28199529647827, - "p99": 194.28199529647827 - }, - "roundtrip": { - "p50": 471.68299555778503, - "p90": 499.44400787353516, - "p95": 1358.8520288467407, - "p99": 1358.8520288467407 - }, - "isolatedSum": { - "p50": 570.8039849996567, - "p90": 1468.4540182352066, - "p95": 1672.3739504814148, - "p99": 1672.3739504814148 - }, - "roundtripMeasured": true, - "dispatchLogicalBytes": 12214272, - "combineLogicalBytes": 12214272, - "fanoutMean": 6.65625, - "recvTokensMax": 84, - "stragglerRank": 2, - "correct": true, - "samplesPooled": 8, - "trials": 1 - } - ] - } - ], - "failures": [ - { - "id": "cxf-6e691abd", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "generatedAt": "2026-06-26T17:32:59.549027+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28254359089", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", - "createdAt": "2026-06-26T17:32:59.549027+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - } - }, - { - "id": "cxf-25e7e895", - "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:49:09.827299+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28271594334", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334", - "createdAt": "2026-06-26T23:49:09.827299+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } - }, - { - "id": "cxf-433580a5", - "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:49:16.484836+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/runtime-visible", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28271598000", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000", - "createdAt": "2026-06-26T23:49:16.484836+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } - }, - { - "id": "cxf-bf8e2b86", - "identity": "h100|uccl||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", - "generatedAt": "1970-01-01T00:00:00.000Z", - "publicationStatus": "failed", - "status": "failed", - "sku": "h100", - "backend": "uccl", - "phase": "decode", - "config": "unknown/normal/unknown", - "reason": "unknown", - "returnCode": 1, - "run": { - "id": null, - "url": null, - "createdAt": "1970-01-01T00:00:00.000Z", - "sha": null - } - }, - { - "id": "cxf-70961aef", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", - "generatedAt": "2026-06-26T17:31:08.227503+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28254435010", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010", - "createdAt": "2026-06-26T17:31:08.227503+00:00", - "sha": "60dec7d70f554e252fec87709e2be52752947db1" - } - }, - { - "id": "cxf-e15f2b54", - "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:51:34.222899+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/layout-and-dispatch", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28271653486", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486", - "createdAt": "2026-06-26T23:51:34.222899+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } - }, - { - "id": "cxf-33a53f33", - "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", - "generatedAt": "2026-06-26T23:51:35.330044+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "config": "fp8/ll/runtime-visible", - "reason": "anomaly:roundtrip_gt_isolated_sum", - "returnCode": null, - "run": { - "id": "28271656517", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517", - "createdAt": "2026-06-26T23:51:35.330044+00:00", - "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" - } - }, - { - "id": "cxf-26d1baf4", - "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", - "generatedAt": "2026-06-26T15:40:45.756534+00:00", - "publicationStatus": "diagnostic", - "status": "valid", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "config": "bf16/normal/layout-and-dispatch", - "reason": "resource-nonconforming", - "returnCode": null, - "run": { - "id": "28247575150", - "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150", - "createdAt": "2026-06-26T15:40:45.756534+00:00", - "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" - } - } - ], - "summaryCards": [ - { - "title": "Best backend · decode EP8", - "value": "flashinfer · B300", - "sub": "71 us RT p99 · mxfp8 · T=64" - }, - { - "title": "Best backend · prefill EP8", - "value": "flashinfer · B300", - "sub": "85 us RT p99 · nvfp4 · T=256" - }, - { - "title": "LL -> normal crossover", - "value": "T~128 tok/rank", - "sub": "H100 EP8 fp8 · normal RT p50 wins above this" - }, - { - "title": "Resource-normalized winner", - "value": "deepep · H100", - "sub": "113 us RT p99 · bf16 · T=64" - }, - { - "title": "Backend-default winner", - "value": "flashinfer · B300", - "sub": "71 us RT p99 · mxfp8 · T=64" - }, - { - "title": "Most unstable config", - "value": "H100 · deepep decode", - "sub": "3.27x p99 under zipf-heavy vs uniform", - "warning": true - }, - { - "title": "Invalid / diagnostic cases", - "value": "8", - "sub": "see Evidence failed table", - "warning": true, - "href": "#tab-evidence" - } - ], - "decision": { - "budgetsUs": [100, 250, 500], - "maxTokensUnderBudget": [ - { - "id": "cxb-3f6620d0", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-c27e2cad", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-567c4192", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8-directcast", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-10314900", - "sku": "b300", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8-pertoken", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-238797ce", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 512 - } - }, - { - "id": "cxb-67e5feea", - "sku": "b300", - "backend": "deepep", - "phase": "prefill", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 256 - } - }, - { - "id": "cxb-7cddf11f", - "sku": "b300", - "backend": "deepep-hybrid", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 32, - "500": 128 - } - }, - { - "id": "cxb-4a0e300c", - "sku": "b300", - "backend": "deepep-hybrid", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": null, - "500": 512 - } - }, - { - "id": "cxb-6136a9d3", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 128, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-30070070", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 128, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-9a73b5f5", - "sku": "b300", - "backend": "flashinfer", - "phase": "decode", - "dispatchDtype": "mxfp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 128, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-207d8ef2", - "sku": "b300", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 128, - "250": 512, - "500": 1024 - } - }, - { - "id": "cxb-ae942e6d", - "sku": "b300", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 256, - "250": 1024, - "500": 2048 - } - }, - { - "id": "cxb-dede56e2", - "sku": "b300", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "mxfp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 256, - "250": 1024, - "500": 2048 - } - }, - { - "id": "cxb-85dec801", - "sku": "b300", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "nvfp4", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 256, - "250": 1024, - "500": 2048 - } - }, - { - "id": "cxb-2fdde1de", - "sku": "b300", - "backend": "uccl", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-8d828593", - "sku": "b300", - "backend": "uccl", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 512 - } - }, - { - "id": "cxb-7171c240", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-6f4d88a5", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "ll", - "budgets": { - "100": 32, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-416fcf7d", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-d35502c2", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8-directcast", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-779ba710", - "sku": "h100", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8-pertoken", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-d524fd7e", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 512 - } - }, - { - "id": "cxb-bf310e7a", - "sku": "h100", - "backend": "deepep", - "phase": "prefill", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 256 - } - }, - { - "id": "cxb-0f748c2f", - "sku": "h100", - "backend": "deepep-hybrid", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 2, - "500": 128 - } - }, - { - "id": "cxb-402bdadc", - "sku": "h100", - "backend": "deepep-hybrid", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": null, - "500": 512 - } - }, - { - "id": "cxb-f1858975", - "sku": "h100", - "backend": "flashinfer", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-236b5900", - "sku": "h100", - "backend": "flashinfer", - "phase": "decode", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-0d201725", - "sku": "h100", - "backend": "flashinfer", - "phase": "decode", - "dispatchDtype": "mxfp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-6fee4962", - "sku": "h100", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 128, - "250": 256, - "500": 512 - } - }, - { - "id": "cxb-6d37a6fd", - "sku": "h100", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 512, - "500": 1024 - } - }, - { - "id": "cxb-00728192", - "sku": "h100", - "backend": "flashinfer", - "phase": "prefill", - "dispatchDtype": "mxfp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 512, - "500": 1024 - } - }, - { - "id": "cxb-5657eb6e", - "sku": "h100", - "backend": "uccl", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 1, - "500": 128 - } - }, - { - "id": "cxb-8af55e63", - "sku": "h100", - "backend": "uccl", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": null, - "500": 512 - } - }, - { - "id": "cxb-a3bb3bd5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-274a06b0", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "ll", - "budgets": { - "100": 32, - "250": 128, - "500": 128 - } - }, - { - "id": "cxb-1d12a6ce", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 32, - "500": 128 - } - }, - { - "id": "cxb-858b05cb", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8-directcast", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 32, - "500": 128 - } - }, - { - "id": "cxb-339f09b5", - "sku": "h200", - "backend": "deepep", - "phase": "decode", - "dispatchDtype": "fp8-pertoken", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": null, - "500": 128 - } - }, - { - "id": "cxb-bc48bfe5", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 128, - "500": 512 - } - }, - { - "id": "cxb-e6cb64c3", - "sku": "h200", - "backend": "deepep", - "phase": "prefill", - "dispatchDtype": "fp8", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": null, - "500": 256 - } - }, - { - "id": "cxb-d2620b3b", - "sku": "h200", - "backend": "uccl", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": 8, - "500": 128 - } - }, - { - "id": "cxb-ec807828", - "sku": "h200", - "backend": "uccl", - "phase": "prefill", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": null, - "250": null, - "500": 512 - } - }, - { - "id": "cxb-279043f8", - "sku": "mi355x", - "backend": "mori", - "phase": "decode", - "dispatchDtype": "bf16", - "epSize": 8, - "mode": "normal", - "budgets": { - "100": 16, - "250": 16, - "500": 16 - } - } - ], - "recommendations": [ - { - "id": "cxr-d2992d7c", - "sku": "b300", - "phase": "decode", - "atTokensPerRank": 64, - "lowestP99DispatchUs": 71.4, - "config": "mxfp8/normal/layout-and-dispatch-v1/uniform/tuned", - "epSize": 8 - }, - { - "id": "cxr-1c3060b2", - "sku": "b300", - "phase": "prefill", - "atTokensPerRank": 256, - "lowestP99DispatchUs": 85, - "config": "nvfp4/normal/layout-and-dispatch-v1/uniform/tuned", - "epSize": 8 - }, - { - "id": "cxr-8fcf986c", - "sku": "h100", - "phase": "decode", - "atTokensPerRank": 64, - "lowestP99DispatchUs": 53.1, - "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized", - "epSize": 8 - }, - { - "id": "cxr-466c0bc2", - "sku": "h100", - "phase": "prefill", - "atTokensPerRank": 256, - "lowestP99DispatchUs": 104.6, - "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", - "epSize": 8 - }, - { - "id": "cxr-c2fe14a3", - "sku": "h200", - "phase": "decode", - "atTokensPerRank": 64, - "lowestP99DispatchUs": 62.1, - "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized", - "epSize": 8 - }, - { - "id": "cxr-7e4f951f", - "sku": "h200", - "phase": "prefill", - "atTokensPerRank": 256, - "lowestP99DispatchUs": 124.6, - "config": "fp8/normal/cached-layout-comm-only-v1/uniform/normalized", - "epSize": 8 - } - ], - "llCrossover": [ - { - "sku": "h100", - "ep": 8, - "dtype": "bf16", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h100", - "ep": 8, - "dtype": "bf16", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h100", - "ep": 8, - "dtype": "bf16", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h100", - "ep": 8, - "dtype": "bf16", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h100", - "ep": 8, - "dtype": "bf16", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h100", - "ep": 8, - "dtype": "bf16", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h100", - "ep": 8, - "dtype": "fp8", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h100", - "ep": 8, - "dtype": "fp8", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h100", - "ep": 8, - "dtype": "fp8", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h100", - "ep": 8, - "dtype": "fp8", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h100", - "ep": 8, - "dtype": "fp8", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h100", - "ep": 8, - "dtype": "fp8", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h200", - "ep": 8, - "dtype": "bf16", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h200", - "ep": 8, - "dtype": "bf16", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h200", - "ep": 8, - "dtype": "bf16", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h200", - "ep": 8, - "dtype": "bf16", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h200", - "ep": 8, - "dtype": "bf16", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h200", - "ep": 8, - "dtype": "bf16", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": "never-in-range" - }, - { - "sku": "h200", - "ep": 8, - "dtype": "fp8", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h200", - "ep": 8, - "dtype": "fp8", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h200", - "ep": 8, - "dtype": "fp8", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h200", - "ep": 8, - "dtype": "fp8", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h200", - "ep": 8, - "dtype": "fp8", - "stat": "p50", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - }, - { - "sku": "h200", - "ep": 8, - "dtype": "fp8", - "stat": "p99", - "basis": "measured-roundtrip", - "normal_faster_at_T": 128 - } - ], - "resourcePareto": [ - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 1, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 81.2, - "dispatch_p99": 93, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 57, - "dispatch_p99": 73.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 2, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 80.9, - "dispatch_p99": 89.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 57, - "dispatch_p99": 73.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 4, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 81.4, - "dispatch_p99": 107, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 57.8, - "dispatch_p99": 68.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 8, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 81.4, - "dispatch_p99": 93.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 59.3, - "dispatch_p99": 68.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 16, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 82.8, - "dispatch_p99": 97.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 62.8, - "dispatch_p99": 76.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 32, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 99.8, - "dispatch_p99": 106.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 69.2, - "dispatch_p99": 81.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 64, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 111.1, - "dispatch_p99": 119, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 82.5, - "dispatch_p99": 99.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "bf16", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 124.5, - "dispatch_p99": 138.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 93.9, - "dispatch_p99": 105, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 1, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 56.6, - "dispatch_p99": 67.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 56, - "dispatch_p99": 69.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 2, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 57.2, - "dispatch_p99": 67.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 56.9, - "dispatch_p99": 68.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 4, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 59.2, - "dispatch_p99": 68, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 57.6, - "dispatch_p99": 67.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 8, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 59.8, - "dispatch_p99": 69.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 59.5, - "dispatch_p99": 73.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 16, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 61.2, - "dispatch_p99": 85.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 60.7, - "dispatch_p99": 69.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 32, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 64, - "dispatch_p99": 75.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 63.6, - "dispatch_p99": 72.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 64, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 72.4, - "dispatch_p99": 84, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 72.6, - "dispatch_p99": 82.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "decode", - "dtype": "fp8", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 86.4, - "dispatch_p99": 98.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 83.2, - "dispatch_p99": 90.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "bf16", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 125.3, - "dispatch_p99": 135.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 94.1, - "dispatch_p99": 116.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "bf16", - "T": 256, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 157.3, - "dispatch_p99": 174.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 135.4, - "dispatch_p99": 151, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "bf16", - "T": 512, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 222.4, - "dispatch_p99": 234.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 193.2, - "dispatch_p99": 206.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "bf16", - "T": 1024, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 346, - "dispatch_p99": 360.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 326.2, - "dispatch_p99": 341.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "bf16", - "T": 2048, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 592, - "dispatch_p99": 609.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 577.1, - "dispatch_p99": 591.3, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "bf16", - "T": 4096, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 1092.6, - "dispatch_p99": 1123.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 1069.5, - "dispatch_p99": 1090.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "fp8", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 86.3, - "dispatch_p99": 98.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 83.5, - "dispatch_p99": 102.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "fp8", - "T": 256, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 118, - "dispatch_p99": 129.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 116.7, - "dispatch_p99": 135.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "fp8", - "T": 512, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 176.9, - "dispatch_p99": 189.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 177.8, - "dispatch_p99": 191.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "fp8", - "T": 1024, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 299.3, - "dispatch_p99": 312.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 298, - "dispatch_p99": 319.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "fp8", - "T": 2048, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 535.1, - "dispatch_p99": 553.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 541.5, - "dispatch_p99": 557.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "b300", - "phase": "prefill", - "dtype": "fp8", - "T": 4096, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1351, - "dispatch_p50": 1012, - "dispatch_p99": 1036.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1824, - "dispatch_p50": 1019.6, - "dispatch_p99": 1045.3, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 1, - "n_points": 4, - "curve": [ - { - "achieved_fraction": 0.0985, - "dispatch_p50": 97.2, - "dispatch_p99": 111.1, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.1515, - "dispatch_p50": 127.7, - "dispatch_p99": 143.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 95.7, - "dispatch_p99": 109.4, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.5985, - "dispatch_p50": 96.3, - "dispatch_p99": 108.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 2, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 179.5, - "dispatch_p99": 194.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 71.2, - "dispatch_p99": 107.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 4, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 130.9, - "dispatch_p99": 201, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 95.2, - "dispatch_p99": 439.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 8, - "n_points": 4, - "curve": [ - { - "achieved_fraction": 0.0985, - "dispatch_p50": 99.3, - "dispatch_p99": 113.5, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.1515, - "dispatch_p50": 133.1, - "dispatch_p99": 479, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 95.4, - "dispatch_p99": 113.5, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.5985, - "dispatch_p50": 96.7, - "dispatch_p99": 112.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 16, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 129.6, - "dispatch_p99": 203.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 95.7, - "dispatch_p99": 106.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 32, - "n_points": 4, - "curve": [ - { - "achieved_fraction": 0.0985, - "dispatch_p50": 103.3, - "dispatch_p99": 121.4, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.1515, - "dispatch_p50": 181.8, - "dispatch_p99": 324.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 80.9, - "dispatch_p99": 113.2, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.5985, - "dispatch_p50": 102.9, - "dispatch_p99": 114.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 64, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 147.7, - "dispatch_p99": 211.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 103.5, - "dispatch_p99": 125.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "bf16", - "T": 128, - "n_points": 4, - "curve": [ - { - "achieved_fraction": 0.0985, - "dispatch_p50": 129.7, - "dispatch_p99": 143.9, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.1515, - "dispatch_p50": 186.2, - "dispatch_p99": 208, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 111.3, - "dispatch_p99": 139.9, - "resource_class": "resource-constrained" - }, - { - "achieved_fraction": 0.5985, - "dispatch_p50": 129.1, - "dispatch_p99": 142.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 1, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 98.8, - "dispatch_p99": 114.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 98, - "dispatch_p99": 110.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 2, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 99.5, - "dispatch_p99": 111.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 72.2, - "dispatch_p99": 105.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 4, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 99.3, - "dispatch_p99": 110.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 72.3, - "dispatch_p99": 115.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 8, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 100.2, - "dispatch_p99": 111.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 97.5, - "dispatch_p99": 113.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 16, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 98.9, - "dispatch_p99": 112.2, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 97.1, - "dispatch_p99": 113.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 32, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 99.9, - "dispatch_p99": 181.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 73.1, - "dispatch_p99": 112.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 64, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 101.4, - "dispatch_p99": 370.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 78.7, - "dispatch_p99": 125.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "decode", - "dtype": "fp8", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 106.2, - "dispatch_p99": 117.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 90.3, - "dispatch_p99": 117.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "bf16", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 161.1, - "dispatch_p99": 170.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 110.5, - "dispatch_p99": 166, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "bf16", - "T": 256, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 186.6, - "dispatch_p99": 197.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 147.4, - "dispatch_p99": 154.3, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "bf16", - "T": 512, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 239.6, - "dispatch_p99": 250.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 204.9, - "dispatch_p99": 226.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "bf16", - "T": 1024, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 346.9, - "dispatch_p99": 358.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 319.9, - "dispatch_p99": 330.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "bf16", - "T": 2048, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 571.1, - "dispatch_p99": 621.2, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 570.9, - "dispatch_p99": 593.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "bf16", - "T": 4096, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 1035.6, - "dispatch_p99": 1074.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 1075.9, - "dispatch_p99": 1102.5, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "fp8", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 90, - "dispatch_p99": 158.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 89.6, - "dispatch_p99": 100.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "fp8", - "T": 256, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 107.5, - "dispatch_p99": 170.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 106.2, - "dispatch_p99": 125.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "fp8", - "T": 512, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 147, - "dispatch_p99": 460.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 138.5, - "dispatch_p99": 197.3, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "fp8", - "T": 1024, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 215.3, - "dispatch_p99": 223.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 197.4, - "dispatch_p99": 216.3, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "fp8", - "T": 2048, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 354.8, - "dispatch_p99": 380.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 318.7, - "dispatch_p99": 347.3, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h100", - "phase": "prefill", - "dtype": "fp8", - "T": 4096, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 641.4, - "dispatch_p99": 655.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 574.8, - "dispatch_p99": 604.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 1, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 123.6, - "dispatch_p99": 203.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 74.1, - "dispatch_p99": 138, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 2, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 114.8, - "dispatch_p99": 181, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 74.3, - "dispatch_p99": 131.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 4, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 111.7, - "dispatch_p99": 167.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 75, - "dispatch_p99": 139.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 8, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 112.8, - "dispatch_p99": 166.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 74.8, - "dispatch_p99": 123.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 16, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 120.3, - "dispatch_p99": 217, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 73.2, - "dispatch_p99": 195.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 32, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 123.6, - "dispatch_p99": 169.6, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 82.3, - "dispatch_p99": 134.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 64, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 136, - "dispatch_p99": 197.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 91.3, - "dispatch_p99": 146.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 156.2, - "dispatch_p99": 197.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 116, - "dispatch_p99": 149.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 1, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 73, - "dispatch_p99": 139.2, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 70.2, - "dispatch_p99": 121.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 2, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 71.4, - "dispatch_p99": 113.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 67.6, - "dispatch_p99": 144.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 4, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 72.7, - "dispatch_p99": 146.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 69.7, - "dispatch_p99": 228.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 8, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 70.1, - "dispatch_p99": 165.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 70.5, - "dispatch_p99": 151.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 16, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 73.7, - "dispatch_p99": 146.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 68.5, - "dispatch_p99": 126.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 32, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 71.6, - "dispatch_p99": 167.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 68.3, - "dispatch_p99": 114.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 64, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 79.8, - "dispatch_p99": 125.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 74.5, - "dispatch_p99": 120.6, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "fp8", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 88.4, - "dispatch_p99": 115.8, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 88.7, - "dispatch_p99": 129, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "bf16", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 159.7, - "dispatch_p99": 266.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 116.4, - "dispatch_p99": 160, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "bf16", - "T": 256, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 185.9, - "dispatch_p99": 239.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 147, - "dispatch_p99": 191.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "bf16", - "T": 512, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 239.3, - "dispatch_p99": 267.2, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 207.7, - "dispatch_p99": 267.9, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "bf16", - "T": 1024, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 351.7, - "dispatch_p99": 419.2, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 324.9, - "dispatch_p99": 364.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "bf16", - "T": 2048, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 584.7, - "dispatch_p99": 635.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 566.4, - "dispatch_p99": 609.2, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "bf16", - "T": 4096, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 1028.6, - "dispatch_p99": 1135.3, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 1051.8, - "dispatch_p99": 1161.4, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "fp8", - "T": 128, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 86.4, - "dispatch_p99": 152.1, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 86.8, - "dispatch_p99": 141.8, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "fp8", - "T": 256, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 104.4, - "dispatch_p99": 141.7, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 103.2, - "dispatch_p99": 155.1, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "fp8", - "T": 512, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 141.1, - "dispatch_p99": 233.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 135.8, - "dispatch_p99": 238, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "fp8", - "T": 1024, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 205.8, - "dispatch_p99": 248.9, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 196.6, - "dispatch_p99": 253.7, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "fp8", - "T": 2048, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 340.2, - "dispatch_p99": 421.5, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 320.2, - "dispatch_p99": 440, - "resource_class": "resource-constrained" - } - ] - }, - { - "sku": "h200", - "phase": "prefill", - "dtype": "fp8", - "T": 4096, - "n_points": 2, - "curve": [ - { - "achieved_fraction": 0.1515, - "dispatch_p50": 612.3, - "dispatch_p99": 680.4, - "resource_class": "backend-tuned" - }, - { - "achieved_fraction": 0.1818, - "dispatch_p50": 572.4, - "dispatch_p99": 629.7, - "resource_class": "resource-constrained" - } - ] - } - ], - "topologyPenalty": [ - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 1, - "ep8_p50": 123.6, - "ep16_p50": 578.4, - "penalty_pct": 367.9 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 2, - "ep8_p50": 114.8, - "ep16_p50": 547.2, - "penalty_pct": 376.5 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 4, - "ep8_p50": 111.7, - "ep16_p50": 621.5, - "penalty_pct": 456.5 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 8, - "ep8_p50": 112.8, - "ep16_p50": 611.8, - "penalty_pct": 442.2 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 16, - "ep8_p50": 120.3, - "ep16_p50": 631.8, - "penalty_pct": 425.3 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 32, - "ep8_p50": 123.6, - "ep16_p50": 782.9, - "penalty_pct": 533.5 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 64, - "ep8_p50": 136, - "ep16_p50": 679.3, - "penalty_pct": 399.5 - }, - { - "sku": "h200", - "phase": "decode", - "dtype": "bf16", - "T": 128, - "ep8_p50": 156.2, - "ep16_p50": 808.1, - "penalty_pct": 417.5 - }, - { - "sku": "mi355x", - "phase": "decode", - "dtype": "bf16", - "T": 1, - "ep8_p50": 40.6, - "ep16_p50": 360.4, - "penalty_pct": 788.7 - }, - { - "sku": "mi355x", - "phase": "decode", - "dtype": "bf16", - "T": 2, - "ep8_p50": 42.6, - "ep16_p50": 345.1, - "penalty_pct": 710.9 - }, - { - "sku": "mi355x", - "phase": "decode", - "dtype": "bf16", - "T": 4, - "ep8_p50": 42.2, - "ep16_p50": 352.8, - "penalty_pct": 736.8 - }, - { - "sku": "mi355x", - "phase": "decode", - "dtype": "bf16", - "T": 8, - "ep8_p50": 42.5, - "ep16_p50": 430.7, - "penalty_pct": 912.9 - } - ], - "skewPenalty": [ - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.694, - "p99_amplification": 0.867 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 2, - "p50_amplification": 0.695, - "p99_amplification": 0.811 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 4, - "p50_amplification": 0.697, - "p99_amplification": 0.683 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.716, - "p99_amplification": 0.76 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 16, - "p50_amplification": 0.716, - "p99_amplification": 0.881 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.743, - "p99_amplification": 0.837 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 64, - "p50_amplification": 0.718, - "p99_amplification": 0.756 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.813, - "p99_amplification": 0.898 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.698, - "p99_amplification": 0.753 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.723, - "p99_amplification": 0.798 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.76, - "p99_amplification": 0.82 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.828, - "p99_amplification": 0.816 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 1, - "p50_amplification": 0.722, - "p99_amplification": 0.819 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 2, - "p50_amplification": 0.707, - "p99_amplification": 0.777 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 4, - "p50_amplification": 0.705, - "p99_amplification": 0.634 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 8, - "p50_amplification": 0.713, - "p99_amplification": 0.806 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 16, - "p50_amplification": 0.709, - "p99_amplification": 0.817 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 32, - "p50_amplification": 0.705, - "p99_amplification": 0.887 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 64, - "p50_amplification": 0.707, - "p99_amplification": 0.736 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.736, - "p99_amplification": 0.729 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 1, - "p50_amplification": 0.699, - "p99_amplification": 0.752 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 8, - "p50_amplification": 0.699, - "p99_amplification": 0.708 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 32, - "p50_amplification": 0.68, - "p99_amplification": 0.77 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.737, - "p99_amplification": 0.823 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 1, - "p50_amplification": 21.723, - "p99_amplification": 36.695 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 2, - "p50_amplification": 21.728, - "p99_amplification": 38.053 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 4, - "p50_amplification": 21.606, - "p99_amplification": 31.919 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 8, - "p50_amplification": 21.676, - "p99_amplification": 57.264 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 16, - "p50_amplification": 21.27, - "p99_amplification": 35.187 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 32, - "p50_amplification": 17.906, - "p99_amplification": 53.04 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 64, - "p50_amplification": 16.017, - "p99_amplification": 28.424 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 128, - "p50_amplification": 14.456, - "p99_amplification": 24.57 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 1, - "p50_amplification": 0.707, - "p99_amplification": 0.797 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 2, - "p50_amplification": 0.712, - "p99_amplification": 0.752 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 4, - "p50_amplification": 0.722, - "p99_amplification": 0.662 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 8, - "p50_amplification": 0.739, - "p99_amplification": 0.785 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 16, - "p50_amplification": 0.767, - "p99_amplification": 0.905 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 32, - "p50_amplification": 0.708, - "p99_amplification": 0.772 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 64, - "p50_amplification": 0.788, - "p99_amplification": 0.832 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 128, - "p50_amplification": 0.833, - "p99_amplification": 0.85 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 1, - "p50_amplification": 0.685, - "p99_amplification": 0.747 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 2, - "p50_amplification": 0.69, - "p99_amplification": 0.712 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 4, - "p50_amplification": 0.694, - "p99_amplification": 0.609 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 8, - "p50_amplification": 0.715, - "p99_amplification": 0.804 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 16, - "p50_amplification": 0.722, - "p99_amplification": 0.739 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 32, - "p50_amplification": 0.681, - "p99_amplification": 0.713 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 64, - "p50_amplification": 0.777, - "p99_amplification": 0.867 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 128, - "p50_amplification": 0.744, - "p99_amplification": 0.791 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 1, - "p50_amplification": 0.697, - "p99_amplification": 0.741 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 2, - "p50_amplification": 0.703, - "p99_amplification": 0.718 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 4, - "p50_amplification": 0.717, - "p99_amplification": 0.623 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 8, - "p50_amplification": 0.721, - "p99_amplification": 0.745 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 16, - "p50_amplification": 0.723, - "p99_amplification": 0.868 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 32, - "p50_amplification": 0.746, - "p99_amplification": 0.763 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 64, - "p50_amplification": 0.716, - "p99_amplification": 0.866 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 128, - "p50_amplification": 0.823, - "p99_amplification": 0.912 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 1, - "p50_amplification": 0.712, - "p99_amplification": 0.809 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 2, - "p50_amplification": 0.716, - "p99_amplification": 0.706 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 4, - "p50_amplification": 0.734, - "p99_amplification": 0.686 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 8, - "p50_amplification": 0.74, - "p99_amplification": 0.87 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 16, - "p50_amplification": 0.796, - "p99_amplification": 0.781 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 32, - "p50_amplification": 0.702, - "p99_amplification": 0.751 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 64, - "p50_amplification": 0.796, - "p99_amplification": 0.801 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 128, - "p50_amplification": 0.762, - "p99_amplification": 0.77 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 1, - "p50_amplification": 0.714, - "p99_amplification": 0.778 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 2, - "p50_amplification": 0.72, - "p99_amplification": 0.825 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 4, - "p50_amplification": 0.757, - "p99_amplification": 0.868 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 8, - "p50_amplification": 0.741, - "p99_amplification": 0.849 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 16, - "p50_amplification": 0.806, - "p99_amplification": 0.813 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 32, - "p50_amplification": 0.703, - "p99_amplification": 0.776 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 64, - "p50_amplification": 0.807, - "p99_amplification": 0.86 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.763, - "p99_amplification": 0.785 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.688, - "p99_amplification": 0.915 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 2, - "p50_amplification": 0.69, - "p99_amplification": 0.807 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 4, - "p50_amplification": 0.705, - "p99_amplification": 0.895 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.721, - "p99_amplification": 0.745 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 16, - "p50_amplification": 0.718, - "p99_amplification": 0.756 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.746, - "p99_amplification": 0.765 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 64, - "p50_amplification": 0.722, - "p99_amplification": 0.759 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.817, - "p99_amplification": 0.83 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 1, - "p50_amplification": 0.7, - "p99_amplification": 0.781 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 2, - "p50_amplification": 0.712, - "p99_amplification": 0.796 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 4, - "p50_amplification": 0.707, - "p99_amplification": 0.634 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 8, - "p50_amplification": 0.723, - "p99_amplification": 0.838 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 16, - "p50_amplification": 0.731, - "p99_amplification": 0.855 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 32, - "p50_amplification": 0.694, - "p99_amplification": 0.779 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 64, - "p50_amplification": 0.791, - "p99_amplification": 0.841 - }, - { - "sku": "b300", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.755, - "p99_amplification": 0.76 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.829, - "p99_amplification": 0.934 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.921, - "p99_amplification": 0.942 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.095, - "p99_amplification": 1.103 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.8, - "p99_amplification": 0.839 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 256, - "p50_amplification": 0.829, - "p99_amplification": 0.848 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.931, - "p99_amplification": 0.942 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 1024, - "p50_amplification": 1.005, - "p99_amplification": 1.01 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.083, - "p99_amplification": 1.227 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 4096, - "p50_amplification": 1.146, - "p99_amplification": 1.14 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.737, - "p99_amplification": 0.836 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 512, - "p50_amplification": 0.81, - "p99_amplification": 0.873 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 2048, - "p50_amplification": 0.963, - "p99_amplification": 1.019 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.746, - "p99_amplification": 0.975 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 256, - "p50_amplification": 0.811, - "p99_amplification": 0.829 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 512, - "p50_amplification": 0.839, - "p99_amplification": 0.905 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 1024, - "p50_amplification": 0.9, - "p99_amplification": 0.935 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 2048, - "p50_amplification": 0.96, - "p99_amplification": 1.02 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 4096, - "p50_amplification": 1.018, - "p99_amplification": 1.098 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 128, - "p50_amplification": 0.753, - "p99_amplification": 0.786 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 256, - "p50_amplification": 0.844, - "p99_amplification": 0.875 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 512, - "p50_amplification": 0.866, - "p99_amplification": 0.913 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 1024, - "p50_amplification": 0.945, - "p99_amplification": 1.093 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 2048, - "p50_amplification": 0.983, - "p99_amplification": 1.102 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 4096, - "p50_amplification": 0.993, - "p99_amplification": 0.991 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 128, - "p50_amplification": 0.82, - "p99_amplification": 0.813 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 256, - "p50_amplification": 0.889, - "p99_amplification": 0.876 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 512, - "p50_amplification": 0.961, - "p99_amplification": 0.957 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 1024, - "p50_amplification": 1.028, - "p99_amplification": 1.021 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 2048, - "p50_amplification": 1.113, - "p99_amplification": 1.115 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 4096, - "p50_amplification": 1.176, - "p99_amplification": 1.271 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 128, - "p50_amplification": 0.758, - "p99_amplification": 0.804 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 256, - "p50_amplification": 0.841, - "p99_amplification": 0.85 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 512, - "p50_amplification": 0.871, - "p99_amplification": 1.015 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 1024, - "p50_amplification": 0.943, - "p99_amplification": 0.968 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 2048, - "p50_amplification": 0.956, - "p99_amplification": 1.001 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 4096, - "p50_amplification": 0.979, - "p99_amplification": 0.987 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 128, - "p50_amplification": 0.804, - "p99_amplification": 0.808 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 256, - "p50_amplification": 0.864, - "p99_amplification": 0.87 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 512, - "p50_amplification": 0.91, - "p99_amplification": 0.924 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 1024, - "p50_amplification": 1.004, - "p99_amplification": 1.015 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 2048, - "p50_amplification": 1.082, - "p99_amplification": 1.117 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 4096, - "p50_amplification": 1.146, - "p99_amplification": 1.182 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 128, - "p50_amplification": 0.758, - "p99_amplification": 0.82 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 256, - "p50_amplification": 0.874, - "p99_amplification": 0.915 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 512, - "p50_amplification": 0.87, - "p99_amplification": 0.884 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 1024, - "p50_amplification": 0.939, - "p99_amplification": 0.971 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 2048, - "p50_amplification": 0.971, - "p99_amplification": 1.051 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 4096, - "p50_amplification": 0.975, - "p99_amplification": 0.981 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.754, - "p99_amplification": 0.83 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 256, - "p50_amplification": 0.873, - "p99_amplification": 0.861 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 512, - "p50_amplification": 0.87, - "p99_amplification": 0.915 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 1024, - "p50_amplification": 0.941, - "p99_amplification": 0.95 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 2048, - "p50_amplification": 0.97, - "p99_amplification": 0.978 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 4096, - "p50_amplification": 0.974, - "p99_amplification": 0.962 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.809, - "p99_amplification": 0.826 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 256, - "p50_amplification": 0.866, - "p99_amplification": 0.862 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.917, - "p99_amplification": 0.952 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 1024, - "p50_amplification": 1.008, - "p99_amplification": 1.01 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.084, - "p99_amplification": 1.083 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 4096, - "p50_amplification": 1.146, - "p99_amplification": 1.136 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.745, - "p99_amplification": 0.954 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 256, - "p50_amplification": 0.866, - "p99_amplification": 0.889 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 512, - "p50_amplification": 0.866, - "p99_amplification": 0.903 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 1024, - "p50_amplification": 0.925, - "p99_amplification": 0.924 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 2048, - "p50_amplification": 0.968, - "p99_amplification": 1.102 - }, - { - "sku": "b300", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 4096, - "p50_amplification": 0.972, - "p99_amplification": 0.974 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.731, - "p99_amplification": 0.751 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 2, - "p50_amplification": 0.535, - "p99_amplification": 0.549 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 4, - "p50_amplification": 0.733, - "p99_amplification": 0.547 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.736, - "p99_amplification": 0.233 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 16, - "p50_amplification": 0.749, - "p99_amplification": 0.544 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.531, - "p99_amplification": 0.341 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 64, - "p50_amplification": 0.724, - "p99_amplification": 1.182 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.69, - "p99_amplification": 0.731 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.762, - "p99_amplification": 0.821 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.733, - "p99_amplification": 0.287 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.563, - "p99_amplification": 0.381 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.748, - "p99_amplification": 0.787 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 1, - "p50_amplification": 0.737, - "p99_amplification": 0.929 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 2, - "p50_amplification": 0.526, - "p99_amplification": 0.543 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 4, - "p50_amplification": 0.727, - "p99_amplification": 0.538 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 8, - "p50_amplification": 0.723, - "p99_amplification": 0.351 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 16, - "p50_amplification": 0.743, - "p99_amplification": 0.542 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 32, - "p50_amplification": 0.532, - "p99_amplification": 0.338 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 64, - "p50_amplification": 0.709, - "p99_amplification": 0.565 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.648, - "p99_amplification": 0.714 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 1, - "p50_amplification": 0.751, - "p99_amplification": 0.775 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 8, - "p50_amplification": 0.738, - "p99_amplification": 0.235 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 32, - "p50_amplification": 0.544, - "p99_amplification": 0.357 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.654, - "p99_amplification": 0.654 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 1, - "p50_amplification": 0.736, - "p99_amplification": 0.75 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 2, - "p50_amplification": 0.389, - "p99_amplification": 0.548 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 4, - "p50_amplification": 0.548, - "p99_amplification": 0.537 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 8, - "p50_amplification": 0.542, - "p99_amplification": 0.216 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 16, - "p50_amplification": 0.633, - "p99_amplification": 0.523 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 32, - "p50_amplification": 0.494, - "p99_amplification": 0.331 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 64, - "p50_amplification": 0.651, - "p99_amplification": 0.604 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy+eplb", - "T": 128, - "p50_amplification": 0.615, - "p99_amplification": 0.673 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 1, - "p50_amplification": 0.762, - "p99_amplification": 0.783 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 2, - "p50_amplification": 0.406, - "p99_amplification": 0.563 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 4, - "p50_amplification": 0.583, - "p99_amplification": 0.545 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 8, - "p50_amplification": 0.577, - "p99_amplification": 0.228 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 16, - "p50_amplification": 0.741, - "p99_amplification": 0.569 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 32, - "p50_amplification": 0.499, - "p99_amplification": 0.337 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 64, - "p50_amplification": 0.687, - "p99_amplification": 0.656 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild", - "T": 128, - "p50_amplification": 0.658, - "p99_amplification": 0.691 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 1, - "p50_amplification": 0.772, - "p99_amplification": 0.787 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 2, - "p50_amplification": 0.41, - "p99_amplification": 0.591 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 4, - "p50_amplification": 0.742, - "p99_amplification": 0.551 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 8, - "p50_amplification": 0.728, - "p99_amplification": 0.233 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 16, - "p50_amplification": 0.748, - "p99_amplification": 0.556 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 32, - "p50_amplification": 0.504, - "p99_amplification": 0.341 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 64, - "p50_amplification": 0.709, - "p99_amplification": 0.581 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-mild+eplb", - "T": 128, - "p50_amplification": 0.61, - "p99_amplification": 0.673 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 1, - "p50_amplification": 0.75, - "p99_amplification": 0.764 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 2, - "p50_amplification": 0.395, - "p99_amplification": 0.544 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 4, - "p50_amplification": 0.559, - "p99_amplification": 0.522 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 8, - "p50_amplification": 0.57, - "p99_amplification": 0.224 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 16, - "p50_amplification": 0.612, - "p99_amplification": 1.13 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 32, - "p50_amplification": 0.487, - "p99_amplification": 0.339 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 64, - "p50_amplification": 0.693, - "p99_amplification": 0.585 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate", - "T": 128, - "p50_amplification": 0.676, - "p99_amplification": 0.711 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 1, - "p50_amplification": 0.546, - "p99_amplification": 0.695 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 2, - "p50_amplification": 0.389, - "p99_amplification": 0.494 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 4, - "p50_amplification": 0.566, - "p99_amplification": 0.554 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 8, - "p50_amplification": 0.569, - "p99_amplification": 0.221 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 16, - "p50_amplification": 0.637, - "p99_amplification": 0.525 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 32, - "p50_amplification": 0.497, - "p99_amplification": 0.338 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 64, - "p50_amplification": 0.656, - "p99_amplification": 0.587 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf-moderate+eplb", - "T": 128, - "p50_amplification": 0.634, - "p99_amplification": 0.676 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 1, - "p50_amplification": 0.551, - "p99_amplification": 1.339 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 2, - "p50_amplification": 0.381, - "p99_amplification": 0.491 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 4, - "p50_amplification": 0.554, - "p99_amplification": 0.534 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 8, - "p50_amplification": 0.723, - "p99_amplification": 0.325 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 16, - "p50_amplification": 0.64, - "p99_amplification": 0.525 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 32, - "p50_amplification": 0.497, - "p99_amplification": 0.342 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 64, - "p50_amplification": 0.648, - "p99_amplification": 1.883 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.614, - "p99_amplification": 0.686 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.759, - "p99_amplification": 0.771 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 2, - "p50_amplification": 0.394, - "p99_amplification": 0.583 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 4, - "p50_amplification": 0.536, - "p99_amplification": 0.655 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.714, - "p99_amplification": 0.239 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 16, - "p50_amplification": 0.737, - "p99_amplification": 0.552 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.457, - "p99_amplification": 0.336 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 64, - "p50_amplification": 0.683, - "p99_amplification": 0.637 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.651, - "p99_amplification": 0.725 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 1, - "p50_amplification": 0.546, - "p99_amplification": 0.579 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 2, - "p50_amplification": 0.392, - "p99_amplification": 0.434 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 4, - "p50_amplification": 0.559, - "p99_amplification": 0.54 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 8, - "p50_amplification": 0.528, - "p99_amplification": 0.187 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 16, - "p50_amplification": 0.64, - "p99_amplification": 0.539 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 32, - "p50_amplification": 0.448, - "p99_amplification": 0.333 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 64, - "p50_amplification": 0.675, - "p99_amplification": 0.592 - }, - { - "sku": "h100", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.61, - "p99_amplification": 0.671 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.778, - "p99_amplification": 0.802 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.999, - "p99_amplification": 1.045 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.186, - "p99_amplification": 1.129 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.774, - "p99_amplification": 0.778 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 256, - "p50_amplification": 0.882, - "p99_amplification": 1.161 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.993, - "p99_amplification": 0.998 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 1024, - "p50_amplification": 1.089, - "p99_amplification": 1.091 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.183, - "p99_amplification": 1.128 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 4096, - "p50_amplification": 1.23, - "p99_amplification": 1.21 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.73, - "p99_amplification": 0.741 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 512, - "p50_amplification": 0.987, - "p99_amplification": 1.026 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 2048, - "p50_amplification": 1.154, - "p99_amplification": 1.092 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.719, - "p99_amplification": 0.744 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 256, - "p50_amplification": 0.854, - "p99_amplification": 0.858 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 512, - "p50_amplification": 0.965, - "p99_amplification": 0.96 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 1024, - "p50_amplification": 1.079, - "p99_amplification": 1.082 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 2048, - "p50_amplification": 1.158, - "p99_amplification": 1.104 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 4096, - "p50_amplification": 1.199, - "p99_amplification": 1.176 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 128, - "p50_amplification": 0.687, - "p99_amplification": 0.714 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 256, - "p50_amplification": 0.787, - "p99_amplification": 0.795 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 512, - "p50_amplification": 0.839, - "p99_amplification": 0.834 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 1024, - "p50_amplification": 0.891, - "p99_amplification": 0.887 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 2048, - "p50_amplification": 0.933, - "p99_amplification": 0.898 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy+eplb", - "T": 4096, - "p50_amplification": 0.989, - "p99_amplification": 0.995 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 128, - "p50_amplification": 0.768, - "p99_amplification": 0.783 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 256, - "p50_amplification": 0.859, - "p99_amplification": 0.864 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 512, - "p50_amplification": 0.973, - "p99_amplification": 0.969 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 1024, - "p50_amplification": 1.088, - "p99_amplification": 1.083 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 2048, - "p50_amplification": 1.184, - "p99_amplification": 1.275 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild", - "T": 4096, - "p50_amplification": 1.241, - "p99_amplification": 1.216 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 128, - "p50_amplification": 0.698, - "p99_amplification": 0.719 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 256, - "p50_amplification": 0.796, - "p99_amplification": 0.792 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 512, - "p50_amplification": 0.84, - "p99_amplification": 0.847 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 1024, - "p50_amplification": 0.877, - "p99_amplification": 1.339 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 2048, - "p50_amplification": 0.913, - "p99_amplification": 0.87 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-mild+eplb", - "T": 4096, - "p50_amplification": 0.956, - "p99_amplification": 0.956 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 128, - "p50_amplification": 0.774, - "p99_amplification": 0.799 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 256, - "p50_amplification": 0.878, - "p99_amplification": 0.885 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 512, - "p50_amplification": 0.991, - "p99_amplification": 0.989 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 1024, - "p50_amplification": 1.093, - "p99_amplification": 1.157 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 2048, - "p50_amplification": 1.183, - "p99_amplification": 1.466 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate", - "T": 4096, - "p50_amplification": 1.231, - "p99_amplification": 1.253 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 128, - "p50_amplification": 0.709, - "p99_amplification": 0.732 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 256, - "p50_amplification": 0.777, - "p99_amplification": 0.79 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 512, - "p50_amplification": 0.836, - "p99_amplification": 0.831 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 1024, - "p50_amplification": 0.874, - "p99_amplification": 0.875 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 2048, - "p50_amplification": 0.912, - "p99_amplification": 0.871 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf-moderate+eplb", - "T": 4096, - "p50_amplification": 0.971, - "p99_amplification": 0.978 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.689, - "p99_amplification": 0.709 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 256, - "p50_amplification": 0.774, - "p99_amplification": 0.778 - }, - { - "sku": "h100", - "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 512, - "p50_amplification": 0.834, - "p99_amplification": 0.888 + { + "id": "cxr-f7274fdd", + "sku": "gb300", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 141.9, + "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned", + "epSize": 8 + } + ], + "llCrossover": [ + { + "sku": "gb200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 1024, - "p50_amplification": 0.875, - "p99_amplification": 0.876 + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 2048, - "p50_amplification": 0.918, - "p99_amplification": 0.873 + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 4096, - "p50_amplification": 0.985, - "p99_amplification": 0.997 + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.739, - "p99_amplification": 0.765 + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 256, - "p50_amplification": 0.854, - "p99_amplification": 0.86 + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.98, - "p99_amplification": 0.983 + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb200", "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 1024, - "p50_amplification": 1.095, - "p99_amplification": 1.097 + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.162, - "p99_amplification": 1.1 + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 4096, - "p50_amplification": 1.208, - "p99_amplification": 1.19 + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.687, - "p99_amplification": 0.708 + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 256, - "p50_amplification": 0.773, - "p99_amplification": 0.771 + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 512, - "p50_amplification": 0.858, - "p99_amplification": 0.857 + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 1024, - "p50_amplification": 0.943, - "p99_amplification": 0.944 + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 2048, - "p50_amplification": 1.023, - "p99_amplification": 0.968 + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 }, { - "sku": "h100", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 4096, - "p50_amplification": 1.069, - "p99_amplification": 1.055 - }, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + } + ], + "resourcePareto": [], + "topologyPenalty": [], + "skewPenalty": [ { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 1, - "p50_amplification": 0.58, - "p99_amplification": 0.69 + "p50_amplification": 1.018, + "p99_amplification": 1.026 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 2, - "p50_amplification": 0.642, - "p99_amplification": 0.807 + "p50_amplification": 1.015, + "p99_amplification": 0.927 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 4, - "p50_amplification": 0.651, - "p99_amplification": 1.004 + "p50_amplification": 1.041, + "p99_amplification": 1.125 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 8, - "p50_amplification": 0.652, - "p99_amplification": 0.798 + "p50_amplification": 1.017, + "p99_amplification": 1.1 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 16, - "p50_amplification": 0.646, - "p99_amplification": 0.648 + "p50_amplification": 0.991, + "p99_amplification": 0.985 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 32, - "p50_amplification": 0.671, - "p99_amplification": 0.956 + "p50_amplification": 0.999, + "p99_amplification": 0.995 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 64, - "p50_amplification": 0.712, - "p99_amplification": 0.882 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.783, - "p99_amplification": 0.789 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.578, - "p99_amplification": 0.711 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.648, - "p99_amplification": 0.886 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.661, - "p99_amplification": 0.791 + "p50_amplification": 0.968, + "p99_amplification": 1.062 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf", "T": 128, - "p50_amplification": 0.785, - "p99_amplification": 0.815 + "p50_amplification": 1.01, + "p99_amplification": 0.96 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy", "T": 1, - "p50_amplification": 0.525, - "p99_amplification": 0.897 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 2, - "p50_amplification": 0.57, - "p99_amplification": 0.76 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 4, - "p50_amplification": 0.655, - "p99_amplification": 0.753 + "p50_amplification": 0.998, + "p99_amplification": 0.975 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy", "T": 8, - "p50_amplification": 0.644, - "p99_amplification": 0.795 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 16, - "p50_amplification": 0.602, - "p99_amplification": 0.563 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 32, - "p50_amplification": 0.622, - "p99_amplification": 0.709 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 64, - "p50_amplification": 0.704, - "p99_amplification": 1.052 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 128, - "p50_amplification": 0.759, - "p99_amplification": 0.851 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 1, - "p50_amplification": 0.568, + "p50_amplification": 0.963, "p99_amplification": 1.025 }, { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 8, - "p50_amplification": 0.667, - "p99_amplification": 0.831 - }, - { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy", "T": 32, - "p50_amplification": 0.665, - "p99_amplification": 0.785 + "p50_amplification": 0.99, + "p99_amplification": 1.02 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy", "T": 128, - "p50_amplification": 0.766, - "p99_amplification": 0.783 + "p50_amplification": 0.976, + "p99_amplification": 0.977 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 1, - "p50_amplification": 0.56, - "p99_amplification": 0.545 + "p50_amplification": 1.058, + "p99_amplification": 3.623 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 2, - "p50_amplification": 0.627, - "p99_amplification": 0.538 + "p50_amplification": 1.041, + "p99_amplification": 3.305 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 4, - "p50_amplification": 0.665, - "p99_amplification": 0.738 + "p50_amplification": 1.067, + "p99_amplification": 3.523 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 8, - "p50_amplification": 0.662, - "p99_amplification": 0.729 + "p50_amplification": 1.046, + "p99_amplification": 3.605 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 16, - "p50_amplification": 0.611, - "p99_amplification": 0.559 + "p50_amplification": 1.11, + "p99_amplification": 3.612 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 32, - "p50_amplification": 0.656, - "p99_amplification": 0.853 + "p50_amplification": 1.033, + "p99_amplification": 3.524 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 64, - "p50_amplification": 0.684, - "p99_amplification": 0.778 + "p50_amplification": 1.037, + "p99_amplification": 3.106 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-heavy+eplb", "T": 128, - "p50_amplification": 0.75, - "p99_amplification": 0.711 + "p50_amplification": 1.025, + "p99_amplification": 2.904 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 1, - "p50_amplification": 0.598, - "p99_amplification": 0.616 + "p50_amplification": 0.985, + "p99_amplification": 1.001 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 2, - "p50_amplification": 0.636, - "p99_amplification": 0.653 + "p50_amplification": 0.971, + "p99_amplification": 0.93 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 4, - "p50_amplification": 0.685, - "p99_amplification": 0.767 + "p50_amplification": 1.003, + "p99_amplification": 1.048 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 8, - "p50_amplification": 0.673, - "p99_amplification": 0.707 + "p50_amplification": 0.994, + "p99_amplification": 1.056 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 16, - "p50_amplification": 0.637, - "p99_amplification": 0.506 + "p50_amplification": 0.983, + "p99_amplification": 0.942 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 32, - "p50_amplification": 0.679, - "p99_amplification": 0.718 + "p50_amplification": 0.994, + "p99_amplification": 0.991 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 64, - "p50_amplification": 0.73, - "p99_amplification": 0.78 + "p50_amplification": 1.008, + "p99_amplification": 0.974 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild", "T": 128, - "p50_amplification": 0.797, - "p99_amplification": 0.847 + "p50_amplification": 1.031, + "p99_amplification": 1.001 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 1, - "p50_amplification": 0.59, - "p99_amplification": 0.634 + "p50_amplification": 1.064, + "p99_amplification": 1.117 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 2, - "p50_amplification": 0.636, - "p99_amplification": 0.673 + "p50_amplification": 1.039, + "p99_amplification": 1 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 4, - "p50_amplification": 0.657, - "p99_amplification": 1.101 + "p50_amplification": 1.061, + "p99_amplification": 1.165 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 8, - "p50_amplification": 0.661, - "p99_amplification": 0.83 + "p50_amplification": 1.052, + "p99_amplification": 1.163 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 16, - "p50_amplification": 0.621, - "p99_amplification": 0.586 + "p50_amplification": 1.033, + "p99_amplification": 1.056 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 32, - "p50_amplification": 0.65, - "p99_amplification": 0.689 + "p50_amplification": 1.038, + "p99_amplification": 1.012 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 64, - "p50_amplification": 0.714, - "p99_amplification": 0.681 + "p50_amplification": 1.02, + "p99_amplification": 0.969 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-mild+eplb", "T": 128, - "p50_amplification": 0.738, - "p99_amplification": 0.869 + "p50_amplification": 1.023, + "p99_amplification": 0.976 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 1, - "p50_amplification": 0.59, - "p99_amplification": 0.753 + "p50_amplification": 0.997, + "p99_amplification": 1.009 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 2, - "p50_amplification": 0.638, - "p99_amplification": 0.75 + "p50_amplification": 0.992, + "p99_amplification": 0.91 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 4, - "p50_amplification": 0.661, - "p99_amplification": 0.788 + "p50_amplification": 1.012, + "p99_amplification": 1.043 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 8, - "p50_amplification": 0.654, - "p99_amplification": 0.708 + "p50_amplification": 0.994, + "p99_amplification": 1.049 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 16, - "p50_amplification": 0.611, - "p99_amplification": 0.54 + "p50_amplification": 0.972, + "p99_amplification": 0.955 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 32, - "p50_amplification": 0.691, - "p99_amplification": 0.799 + "p50_amplification": 0.996, + "p99_amplification": 1.008 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 64, - "p50_amplification": 0.732, - "p99_amplification": 0.808 + "p50_amplification": 0.969, + "p99_amplification": 0.942 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate", "T": 128, - "p50_amplification": 0.789, - "p99_amplification": 0.812 + "p50_amplification": 1.025, + "p99_amplification": 0.98 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 1, - "p50_amplification": 0.582, - "p99_amplification": 0.644 + "p50_amplification": 1.012, + "p99_amplification": 1.032 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 2, - "p50_amplification": 0.644, - "p99_amplification": 0.809 + "p50_amplification": 0.994, + "p99_amplification": 0.963 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 4, - "p50_amplification": 0.683, - "p99_amplification": 0.889 + "p50_amplification": 1.011, + "p99_amplification": 1.135 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 8, - "p50_amplification": 0.661, - "p99_amplification": 0.875 + "p50_amplification": 1.009, + "p99_amplification": 1.016 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 16, - "p50_amplification": 0.605, - "p99_amplification": 0.654 + "p50_amplification": 0.993, + "p99_amplification": 0.926 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 32, - "p50_amplification": 0.663, - "p99_amplification": 0.977 + "p50_amplification": 0.998, + "p99_amplification": 0.992 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 64, - "p50_amplification": 0.715, - "p99_amplification": 0.942 + "p50_amplification": 0.999, + "p99_amplification": 0.943 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf-moderate+eplb", "T": 128, - "p50_amplification": 0.758, - "p99_amplification": 0.885 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 1, - "p50_amplification": 0.584, - "p99_amplification": 0.649 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 2, - "p50_amplification": 0.642, - "p99_amplification": 0.694 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 4, - "p50_amplification": 0.665, - "p99_amplification": 0.703 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 8, - "p50_amplification": 0.674, - "p99_amplification": 1.103 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 16, - "p50_amplification": 0.637, - "p99_amplification": 0.594 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 32, - "p50_amplification": 0.675, - "p99_amplification": 0.747 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 64, - "p50_amplification": 0.688, - "p99_amplification": 0.676 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.75, - "p99_amplification": 0.84 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 1, - "p50_amplification": 0.601, - "p99_amplification": 0.766 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 2, - "p50_amplification": 0.644, - "p99_amplification": 0.735 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 4, - "p50_amplification": 0.658, - "p99_amplification": 0.737 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 8, - "p50_amplification": 0.667, - "p99_amplification": 0.861 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 16, - "p50_amplification": 0.644, - "p99_amplification": 0.664 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 32, - "p50_amplification": 0.672, - "p99_amplification": 0.756 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 64, - "p50_amplification": 0.714, - "p99_amplification": 0.683 - }, - { - "sku": "h200", - "ep": 8, - "phase": "decode", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.788, - "p99_amplification": 0.927 + "p50_amplification": 1, + "p99_amplification": 0.959 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 1, - "p50_amplification": 0.59, - "p99_amplification": 0.634 + "p50_amplification": 0.983, + "p99_amplification": 0.998 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 2, - "p50_amplification": 0.637, - "p99_amplification": 0.678 + "p50_amplification": 0.965, + "p99_amplification": 0.905 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 4, - "p50_amplification": 0.704, - "p99_amplification": 1.069 + "p50_amplification": 1.004, + "p99_amplification": 1.031 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 8, - "p50_amplification": 0.66, - "p99_amplification": 0.846 + "p50_amplification": 0.987, + "p99_amplification": 0.982 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 16, - "p50_amplification": 0.634, - "p99_amplification": 0.688 + "p50_amplification": 0.988, + "p99_amplification": 0.93 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 32, - "p50_amplification": 0.663, - "p99_amplification": 0.758 + "p50_amplification": 0.99, + "p99_amplification": 0.982 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 64, - "p50_amplification": 0.748, - "p99_amplification": 0.887 + "p50_amplification": 0.994, + "p99_amplification": 0.939 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "decode", "routing": "zipf+eplb", "T": 128, - "p50_amplification": 0.741, - "p99_amplification": 0.813 - }, - { - "sku": "h200", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 128, - "p50_amplification": 0.767, - "p99_amplification": 0.631 - }, - { - "sku": "h200", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 512, - "p50_amplification": 0.995, - "p99_amplification": 1.016 - }, - { - "sku": "h200", - "ep": 8, - "phase": "prefill", - "routing": "zipf", - "T": 2048, - "p50_amplification": 1.148, - "p99_amplification": 1.232 + "p50_amplification": 0.994, + "p99_amplification": 0.95 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf", "T": 128, - "p50_amplification": 0.765, - "p99_amplification": 0.61 + "p50_amplification": 1.008, + "p99_amplification": 2.891 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf", "T": 256, - "p50_amplification": 0.917, - "p99_amplification": 0.93 + "p50_amplification": 0.996, + "p99_amplification": 2.73 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf", "T": 512, - "p50_amplification": 0.986, - "p99_amplification": 0.993 + "p50_amplification": 0.981, + "p99_amplification": 2.16 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf", "T": 1024, - "p50_amplification": 1.083, - "p99_amplification": 1.177 + "p50_amplification": 1.054, + "p99_amplification": 1.848 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf", "T": 2048, - "p50_amplification": 1.149, - "p99_amplification": 1.171 + "p50_amplification": 1.138, + "p99_amplification": 1.581 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf", "T": 4096, - "p50_amplification": 1.234, - "p99_amplification": 1.18 + "p50_amplification": 1.195, + "p99_amplification": 1.393 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", - "routing": "zipf-heavy", + "routing": "zipf", "T": 128, - "p50_amplification": 0.737, - "p99_amplification": 0.548 + "p50_amplification": 0.997, + "p99_amplification": 0.989 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", - "routing": "zipf-heavy", + "routing": "zipf", "T": 512, - "p50_amplification": 0.973, - "p99_amplification": 1.093 + "p50_amplification": 1, + "p99_amplification": 0.985 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", - "routing": "zipf-heavy", + "routing": "zipf", "T": 2048, - "p50_amplification": 1.133, - "p99_amplification": 1.172 + "p50_amplification": 1.148, + "p99_amplification": 1.151 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy", "T": 128, - "p50_amplification": 0.74, - "p99_amplification": 0.552 - }, - { - "sku": "h200", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 256, - "p50_amplification": 0.836, - "p99_amplification": 0.844 + "p50_amplification": 0.938, + "p99_amplification": 0.93 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy", "T": 512, - "p50_amplification": 0.965, - "p99_amplification": 1.01 - }, - { - "sku": "h200", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 1024, - "p50_amplification": 1.064, - "p99_amplification": 1.207 + "p50_amplification": 0.874, + "p99_amplification": 0.87 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy", "T": 2048, - "p50_amplification": 1.132, - "p99_amplification": 1.098 - }, - { - "sku": "h200", - "ep": 8, - "phase": "prefill", - "routing": "zipf-heavy", - "T": 4096, - "p50_amplification": 1.212, - "p99_amplification": 1.306 + "p50_amplification": 1.006, + "p99_amplification": 1.016 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy+eplb", "T": 128, - "p50_amplification": 0.738, - "p99_amplification": 0.519 + "p50_amplification": 1, + "p99_amplification": 2.961 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy+eplb", "T": 256, - "p50_amplification": 0.783, - "p99_amplification": 0.724 + "p50_amplification": 1.004, + "p99_amplification": 2.712 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy+eplb", "T": 512, - "p50_amplification": 0.856, - "p99_amplification": 0.893 + "p50_amplification": 0.983, + "p99_amplification": 2.311 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy+eplb", "T": 1024, - "p50_amplification": 0.903, - "p99_amplification": 0.938 + "p50_amplification": 1.007, + "p99_amplification": 1.885 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy+eplb", "T": 2048, - "p50_amplification": 0.925, - "p99_amplification": 1.013 + "p50_amplification": 1.013, + "p99_amplification": 1.555 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-heavy+eplb", "T": 4096, - "p50_amplification": 0.999, - "p99_amplification": 0.969 + "p50_amplification": 1.02, + "p99_amplification": 1.311 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild", "T": 128, - "p50_amplification": 0.792, - "p99_amplification": 0.664 + "p50_amplification": 1.038, + "p99_amplification": 0.987 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild", "T": 256, - "p50_amplification": 0.882, - "p99_amplification": 0.841 + "p50_amplification": 1.039, + "p99_amplification": 1.031 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild", "T": 512, - "p50_amplification": 0.993, - "p99_amplification": 1.149 + "p50_amplification": 1.048, + "p99_amplification": 1.043 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild", "T": 1024, - "p50_amplification": 1.076, - "p99_amplification": 1.025 + "p50_amplification": 1.101, + "p99_amplification": 1.084 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild", "T": 2048, - "p50_amplification": 1.162, - "p99_amplification": 1.17 + "p50_amplification": 1.145, + "p99_amplification": 1.152 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild", "T": 4096, - "p50_amplification": 1.251, - "p99_amplification": 1.262 + "p50_amplification": 1.207, + "p99_amplification": 1.213 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild+eplb", "T": 128, - "p50_amplification": 0.739, - "p99_amplification": 0.586 + "p50_amplification": 1.007, + "p99_amplification": 0.973 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild+eplb", "T": 256, - "p50_amplification": 0.802, - "p99_amplification": 0.721 + "p50_amplification": 1.011, + "p99_amplification": 0.996 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild+eplb", "T": 512, - "p50_amplification": 0.862, - "p99_amplification": 0.99 + "p50_amplification": 1.001, + "p99_amplification": 0.993 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild+eplb", "T": 1024, - "p50_amplification": 0.891, - "p99_amplification": 0.833 + "p50_amplification": 1.004, + "p99_amplification": 1.008 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild+eplb", "T": 2048, - "p50_amplification": 0.907, - "p99_amplification": 0.896 + "p50_amplification": 1.012, + "p99_amplification": 1.012 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-mild+eplb", "T": 4096, - "p50_amplification": 0.959, - "p99_amplification": 0.942 + "p50_amplification": 1.006, + "p99_amplification": 1.005 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate", "T": 128, - "p50_amplification": 0.77, - "p99_amplification": 0.566 + "p50_amplification": 1.002, + "p99_amplification": 0.971 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate", "T": 256, - "p50_amplification": 0.87, - "p99_amplification": 0.812 + "p50_amplification": 0.982, + "p99_amplification": 0.973 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate", "T": 512, - "p50_amplification": 0.992, - "p99_amplification": 1.026 + "p50_amplification": 0.973, + "p99_amplification": 0.974 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate", "T": 1024, - "p50_amplification": 1.079, - "p99_amplification": 0.978 + "p50_amplification": 1.041, + "p99_amplification": 1.045 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate", "T": 2048, - "p50_amplification": 1.148, - "p99_amplification": 1.463 + "p50_amplification": 1.13, + "p99_amplification": 1.134 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate", "T": 4096, - "p50_amplification": 1.234, - "p99_amplification": 1.254 + "p50_amplification": 1.188, + "p99_amplification": 1.197 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate+eplb", "T": 128, - "p50_amplification": 0.737, - "p99_amplification": 0.695 + "p50_amplification": 1.003, + "p99_amplification": 0.958 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate+eplb", "T": 256, - "p50_amplification": 0.781, - "p99_amplification": 0.813 + "p50_amplification": 1.002, + "p99_amplification": 0.995 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate+eplb", "T": 512, - "p50_amplification": 0.85, - "p99_amplification": 0.909 + "p50_amplification": 0.987, + "p99_amplification": 0.998 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate+eplb", "T": 1024, - "p50_amplification": 0.887, - "p99_amplification": 0.955 + "p50_amplification": 1.002, + "p99_amplification": 1.012 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate+eplb", "T": 2048, - "p50_amplification": 0.902, - "p99_amplification": 0.906 + "p50_amplification": 1.008, + "p99_amplification": 1.011 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf-moderate+eplb", "T": 4096, - "p50_amplification": 0.975, - "p99_amplification": 0.943 + "p50_amplification": 1.004, + "p99_amplification": 1.002 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf+eplb", "T": 128, - "p50_amplification": 0.743, - "p99_amplification": 0.551 + "p50_amplification": 0.993, + "p99_amplification": 0.988 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf+eplb", "T": 256, - "p50_amplification": 0.777, - "p99_amplification": 0.778 + "p50_amplification": 0.997, + "p99_amplification": 1.015 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf+eplb", "T": 512, - "p50_amplification": 0.848, - "p99_amplification": 0.949 + "p50_amplification": 0.989, + "p99_amplification": 0.984 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf+eplb", "T": 1024, - "p50_amplification": 0.886, - "p99_amplification": 0.844 + "p50_amplification": 0.997, + "p99_amplification": 1.002 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf+eplb", "T": 2048, - "p50_amplification": 0.904, - "p99_amplification": 0.901 + "p50_amplification": 1.012, + "p99_amplification": 1.01 }, { - "sku": "h200", + "sku": "gb200", "ep": 8, "phase": "prefill", "routing": "zipf+eplb", "T": 4096, - "p50_amplification": 0.967, - "p99_amplification": 0.918 + "p50_amplification": 1.004, + "p99_amplification": 1.004 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", + "phase": "decode", "routing": "zipf", - "T": 128, - "p50_amplification": 0.757, - "p99_amplification": 0.578 + "T": 1, + "p50_amplification": 0.993, + "p99_amplification": 0.848 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", + "phase": "decode", "routing": "zipf", - "T": 256, - "p50_amplification": 0.88, - "p99_amplification": 0.846 + "T": 2, + "p50_amplification": 0.979, + "p99_amplification": 0.897 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", + "phase": "decode", "routing": "zipf", - "T": 512, - "p50_amplification": 0.993, - "p99_amplification": 0.981 + "T": 4, + "p50_amplification": 1, + "p99_amplification": 1.016 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", + "phase": "decode", "routing": "zipf", - "T": 1024, - "p50_amplification": 1.092, - "p99_amplification": 0.982 + "T": 8, + "p50_amplification": 1.015, + "p99_amplification": 1.004 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", + "phase": "decode", "routing": "zipf", - "T": 2048, - "p50_amplification": 1.145, - "p99_amplification": 1.152 + "T": 16, + "p50_amplification": 1.013, + "p99_amplification": 0.923 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", + "phase": "decode", "routing": "zipf", - "T": 4096, - "p50_amplification": 1.224, - "p99_amplification": 1.225 + "T": 32, + "p50_amplification": 1.005, + "p99_amplification": 1.004 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 128, - "p50_amplification": 0.735, - "p99_amplification": 0.579 + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.995, + "p99_amplification": 1.031 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 256, - "p50_amplification": 0.789, - "p99_amplification": 0.801 + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.013, + "p99_amplification": 1.011 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 512, - "p50_amplification": 0.87, - "p99_amplification": 1.02 + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.962, + "p99_amplification": 1.296 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 1024, - "p50_amplification": 0.918, - "p99_amplification": 0.902 + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.999, + "p99_amplification": 1.418 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 2048, - "p50_amplification": 0.974, - "p99_amplification": 1.088 + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.01, + "p99_amplification": 1.571 }, { - "sku": "h200", + "sku": "gb300", "ep": 8, - "phase": "prefill", - "routing": "zipf+eplb", - "T": 4096, - "p50_amplification": 1.052, - "p99_amplification": 1.156 + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.972, + "p99_amplification": 1.487 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf", + "routing": "zipf-heavy+eplb", "T": 1, - "p50_amplification": 0.963, - "p99_amplification": 0.992 + "p50_amplification": 1.036, + "p99_amplification": 0.875 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf", + "routing": "zipf-heavy+eplb", "T": 2, - "p50_amplification": 0.955, - "p99_amplification": 0.961 + "p50_amplification": 1.012, + "p99_amplification": 1.008 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf", + "routing": "zipf-heavy+eplb", "T": 4, - "p50_amplification": 0.979, - "p99_amplification": 0.957 + "p50_amplification": 1.021, + "p99_amplification": 1.099 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf", + "routing": "zipf-heavy+eplb", "T": 8, - "p50_amplification": 0.986, - "p99_amplification": 1.042 + "p50_amplification": 1.037, + "p99_amplification": 0.996 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf", + "routing": "zipf-heavy+eplb", "T": 16, - "p50_amplification": 0.98, - "p99_amplification": 1.009 - }, - { - "sku": "mi355x", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 1, - "p50_amplification": 0.945, - "p99_amplification": 0.974 - }, - { - "sku": "mi355x", - "ep": 8, - "phase": "decode", - "routing": "zipf-heavy", - "T": 2, - "p50_amplification": 0.951, - "p99_amplification": 0.976 + "p50_amplification": 1.06, + "p99_amplification": 1.008 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf-heavy", - "T": 4, - "p50_amplification": 0.977, - "p99_amplification": 0.969 + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.055, + "p99_amplification": 1.108 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf-heavy", - "T": 8, - "p50_amplification": 0.972, - "p99_amplification": 0.952 + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.016, + "p99_amplification": 1.145 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf-heavy", - "T": 16, - "p50_amplification": 0.97, - "p99_amplification": 0.925 + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.02, + "p99_amplification": 1.04 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf+eplb", + "routing": "zipf-mild", "T": 1, - "p50_amplification": 0.969, - "p99_amplification": 0.978 + "p50_amplification": 0.947, + "p99_amplification": 0.787 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf+eplb", + "routing": "zipf-mild", "T": 2, - "p50_amplification": 0.98, - "p99_amplification": 0.984 + "p50_amplification": 0.937, + "p99_amplification": 0.874 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf+eplb", + "routing": "zipf-mild", "T": 4, - "p50_amplification": 0.993, - "p99_amplification": 0.968 + "p50_amplification": 0.946, + "p99_amplification": 1.009 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, "phase": "decode", - "routing": "zipf+eplb", + "routing": "zipf-mild", "T": 8, - "p50_amplification": 0.984, - "p99_amplification": 0.99 + "p50_amplification": 0.966, + "p99_amplification": 0.924 }, { - "sku": "mi355x", + "sku": "gb300", "ep": 8, - "phase": "decode", - "routing": "zipf+eplb", - "T": 16, - "p50_amplification": 0.992, - "p99_amplification": 0.99 - } - ] - }, - "nccl": [ - { - "id": "cxn-a8203ce9", - "identity": "nccl|b300|all_gather|b300-nvlink-island|nvlink|8|nccl-tests-v1", - "op": "all_gather", - "sku": "b300", - "runner": "b300-nv_03", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "e6eafb7204b78dd3", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 186.922, - "status": "valid", - "valid": true, - "colorKey": "b300_a8203ce9", - "label": "B300 · b300-nvlink-island · nvlink (ws8)", - "generatedAt": "2026-06-27T11:18:41.342024+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T11:18:41.342024+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 27.36, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 27.36, - "inPlaceUs": 27.26, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 26.88, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 26.88, - "inPlaceUs": 26.89, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 27.11, - "algBandwidthGbps": 0.02, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 27.11, - "inPlaceUs": 27.07, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 26.64, - "algBandwidthGbps": 0.04, - "busBandwidthGbps": 0.03, - "outOfPlaceUs": 26.64, - "inPlaceUs": 26.87, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 27.03, - "algBandwidthGbps": 0.08, - "busBandwidthGbps": 0.07, - "outOfPlaceUs": 27.03, - "inPlaceUs": 26.8, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 26.95, - "algBandwidthGbps": 0.15, - "busBandwidthGbps": 0.13, - "outOfPlaceUs": 26.95, - "inPlaceUs": 27.51, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 26.84, - "algBandwidthGbps": 0.31, - "busBandwidthGbps": 0.27, - "outOfPlaceUs": 27.05, - "inPlaceUs": 26.84, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 27.2, - "algBandwidthGbps": 0.6, - "busBandwidthGbps": 0.53, - "outOfPlaceUs": 27.2, - "inPlaceUs": 26.86, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 26.68, - "algBandwidthGbps": 1.23, - "busBandwidthGbps": 1.07, - "outOfPlaceUs": 26.98, - "inPlaceUs": 26.68, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 26.75, - "algBandwidthGbps": 2.45, - "busBandwidthGbps": 2.14, - "outOfPlaceUs": 26.89, - "inPlaceUs": 26.75, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 27.63, - "algBandwidthGbps": 4.74, - "busBandwidthGbps": 4.15, - "outOfPlaceUs": 27.63, - "inPlaceUs": 27.81, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 28.34, - "algBandwidthGbps": 9.25, - "busBandwidthGbps": 8.09, - "outOfPlaceUs": 28.34, - "inPlaceUs": 28.46, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 29.45, - "algBandwidthGbps": 17.8, - "busBandwidthGbps": 15.58, - "outOfPlaceUs": 29.49, - "inPlaceUs": 29.45, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 31.36, - "algBandwidthGbps": 33.43, - "busBandwidthGbps": 29.25, - "outOfPlaceUs": 31.51, - "inPlaceUs": 31.36, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 35.8, - "algBandwidthGbps": 58.58, - "busBandwidthGbps": 51.26, - "outOfPlaceUs": 35.94, - "inPlaceUs": 35.8, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 36.17, - "algBandwidthGbps": 115.95, - "busBandwidthGbps": 101.45, - "outOfPlaceUs": 36.29, - "inPlaceUs": 36.17, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 36.99, - "algBandwidthGbps": 226.76, - "busBandwidthGbps": 198.42, - "outOfPlaceUs": 37.02, - "inPlaceUs": 36.99, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 47.07, - "algBandwidthGbps": 356.41, - "busBandwidthGbps": 311.86, - "outOfPlaceUs": 47.08, - "inPlaceUs": 47.07, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 74.95, - "algBandwidthGbps": 447.68, - "busBandwidthGbps": 391.72, - "outOfPlaceUs": 75.78, - "inPlaceUs": 74.95, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 138.64, - "algBandwidthGbps": 484.06, - "busBandwidthGbps": 423.55, - "outOfPlaceUs": 139.26, - "inPlaceUs": 138.64, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 211.47, - "algBandwidthGbps": 634.68, - "busBandwidthGbps": 555.34, - "outOfPlaceUs": 211.47, - "inPlaceUs": 211.53, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 399.32, - "algBandwidthGbps": 672.24, - "busBandwidthGbps": 588.21, - "outOfPlaceUs": 399.32, - "inPlaceUs": 399.95, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 779.11, - "algBandwidthGbps": 689.08, - "busBandwidthGbps": 602.95, - "outOfPlaceUs": 779.96, - "inPlaceUs": 779.11, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 1532.87, - "algBandwidthGbps": 700.48, - "busBandwidthGbps": 612.92, - "outOfPlaceUs": 1533.45, - "inPlaceUs": 1532.87, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 3010.48, - "algBandwidthGbps": 713.34, - "busBandwidthGbps": 624.17, - "outOfPlaceUs": 3010.48, - "inPlaceUs": 3011.29, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 5911.41, - "algBandwidthGbps": 726.55, - "busBandwidthGbps": 635.74, - "outOfPlaceUs": 5949.57, - "inPlaceUs": 5911.41, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 11675.3, - "algBandwidthGbps": 735.74, - "busBandwidthGbps": 643.77, - "outOfPlaceUs": 11728.1, - "inPlaceUs": 11675.3, - "correct": true - } - ] - }, - { - "id": "cxn-17454439", - "identity": "nccl|h100|all_gather|h100-nvlink-island|nvlink|8|nccl-tests-v1", - "op": "all_gather", - "sku": "h100", - "runner": "h100-dgxc-slurm_09", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "dacea770825df094", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 110.587, - "status": "valid", - "valid": true, - "colorKey": "h100_17454439", - "label": "H100 · h100-nvlink-island · nvlink (ws8)", - "generatedAt": "2026-06-27T11:18:57.699787+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T11:18:57.699787+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 40.4, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 40.4, - "inPlaceUs": 39.34, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 38.62, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 38.62, - "inPlaceUs": 38.09, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 38.41, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 38.41, - "inPlaceUs": 38.32, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 38.68, - "algBandwidthGbps": 0.03, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 38.68, - "inPlaceUs": 37.58, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 37.29, - "algBandwidthGbps": 0.05, - "busBandwidthGbps": 0.05, - "outOfPlaceUs": 37.29, - "inPlaceUs": 37.12, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 37.53, - "algBandwidthGbps": 0.11, - "busBandwidthGbps": 0.1, - "outOfPlaceUs": 37.53, - "inPlaceUs": 37.17, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 37.52, - "algBandwidthGbps": 0.22, - "busBandwidthGbps": 0.19, - "outOfPlaceUs": 37.52, - "inPlaceUs": 37.53, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 37.13, - "algBandwidthGbps": 0.44, - "busBandwidthGbps": 0.39, - "outOfPlaceUs": 37.13, - "inPlaceUs": 37.09, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 37.43, - "algBandwidthGbps": 0.88, - "busBandwidthGbps": 0.77, - "outOfPlaceUs": 37.43, - "inPlaceUs": 37.42, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 37.64, - "algBandwidthGbps": 1.74, - "busBandwidthGbps": 1.52, - "outOfPlaceUs": 37.64, - "inPlaceUs": 37.63, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 38.19, - "algBandwidthGbps": 3.43, - "busBandwidthGbps": 3, - "outOfPlaceUs": 38.48, - "inPlaceUs": 38.19, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 39.66, - "algBandwidthGbps": 6.61, - "busBandwidthGbps": 5.78, - "outOfPlaceUs": 39.66, - "inPlaceUs": 40.15, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 41.79, - "algBandwidthGbps": 12.55, - "busBandwidthGbps": 10.98, - "outOfPlaceUs": 42.17, - "inPlaceUs": 41.79, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 43.89, - "algBandwidthGbps": 23.89, - "busBandwidthGbps": 20.9, - "outOfPlaceUs": 45.09, - "inPlaceUs": 43.89, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 44.32, - "algBandwidthGbps": 47.31, - "busBandwidthGbps": 41.4, - "outOfPlaceUs": 44.55, - "inPlaceUs": 44.32, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 44.97, - "algBandwidthGbps": 93.27, - "busBandwidthGbps": 81.61, - "outOfPlaceUs": 44.97, - "inPlaceUs": 45, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 45.6, - "algBandwidthGbps": 183.98, - "busBandwidthGbps": 160.98, - "outOfPlaceUs": 46.08, - "inPlaceUs": 45.6, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 67.94, - "algBandwidthGbps": 246.95, - "busBandwidthGbps": 216.08, - "outOfPlaceUs": 70.1, - "inPlaceUs": 67.94, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 122.71, - "algBandwidthGbps": 273.44, - "busBandwidthGbps": 239.26, - "outOfPlaceUs": 125.34, - "inPlaceUs": 122.71, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 206.56, - "algBandwidthGbps": 324.88, - "busBandwidthGbps": 284.27, - "outOfPlaceUs": 210.98, - "inPlaceUs": 206.56, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 390.25, - "algBandwidthGbps": 343.93, - "busBandwidthGbps": 300.94, - "outOfPlaceUs": 396.19, - "inPlaceUs": 390.25, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 728.52, - "algBandwidthGbps": 368.47, - "busBandwidthGbps": 322.41, - "outOfPlaceUs": 733.59, - "inPlaceUs": 728.52, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 1394.3, - "algBandwidthGbps": 385.05, - "busBandwidthGbps": 336.92, - "outOfPlaceUs": 1397.39, - "inPlaceUs": 1394.3, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 2705.03, - "algBandwidthGbps": 396.94, - "busBandwidthGbps": 347.33, - "outOfPlaceUs": 2729.3, - "inPlaceUs": 2705.03, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 5306.37, - "algBandwidthGbps": 404.7, - "busBandwidthGbps": 354.11, - "outOfPlaceUs": 5374.68, - "inPlaceUs": 5306.37, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 10451.7, - "algBandwidthGbps": 410.93, - "busBandwidthGbps": 359.57, - "outOfPlaceUs": 10616.4, - "inPlaceUs": 10451.7, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 20734.1, - "algBandwidthGbps": 414.29, - "busBandwidthGbps": 362.5, - "outOfPlaceUs": 21013.2, - "inPlaceUs": 20734.1, - "correct": true - } - ] - }, - { - "id": "cxn-cc1fe619", - "identity": "nccl|h200|all_gather|h200-nvlink-island|nvlink|8|nccl-tests-v1", - "op": "all_gather", - "sku": "h200", - "runner": "h200-dgxc-slurm_2", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "e2f081a269356db7", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 111.028, - "status": "valid", - "valid": true, - "colorKey": "h200_cc1fe619", - "label": "H200 · h200-nvlink-island · nvlink (ws8)", - "generatedAt": "2026-06-27T11:19:06.426368+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T11:19:06.426368+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 40.82, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 40.82, - "inPlaceUs": 37.72, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 39.21, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 39.21, - "inPlaceUs": 38.7, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 39.32, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 39.32, - "inPlaceUs": 38.61, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 38.35, - "algBandwidthGbps": 0.03, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 38.35, - "inPlaceUs": 37.52, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 38.87, - "algBandwidthGbps": 0.05, - "busBandwidthGbps": 0.05, - "outOfPlaceUs": 38.87, - "inPlaceUs": 37.95, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 39.96, - "algBandwidthGbps": 0.1, - "busBandwidthGbps": 0.09, - "outOfPlaceUs": 39.96, - "inPlaceUs": 38.92, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 38.44, - "algBandwidthGbps": 0.21, - "busBandwidthGbps": 0.19, - "outOfPlaceUs": 38.44, - "inPlaceUs": 38.97, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 39.02, - "algBandwidthGbps": 0.42, - "busBandwidthGbps": 0.37, - "outOfPlaceUs": 39.02, - "inPlaceUs": 38.61, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 37.96, - "algBandwidthGbps": 0.86, - "busBandwidthGbps": 0.76, - "outOfPlaceUs": 37.96, - "inPlaceUs": 39.63, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 38.78, - "algBandwidthGbps": 1.69, - "busBandwidthGbps": 1.48, - "outOfPlaceUs": 38.78, - "inPlaceUs": 38.91, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 39.77, - "algBandwidthGbps": 3.3, - "busBandwidthGbps": 2.88, - "outOfPlaceUs": 39.77, - "inPlaceUs": 40.11, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 40.4, - "algBandwidthGbps": 6.49, - "busBandwidthGbps": 5.68, - "outOfPlaceUs": 40.56, - "inPlaceUs": 40.4, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 42.21, - "algBandwidthGbps": 12.42, - "busBandwidthGbps": 10.87, - "outOfPlaceUs": 42.21, - "inPlaceUs": 48.64, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 44.25, - "algBandwidthGbps": 23.7, - "busBandwidthGbps": 20.73, - "outOfPlaceUs": 46.55, - "inPlaceUs": 44.25, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 44.67, - "algBandwidthGbps": 46.95, - "busBandwidthGbps": 41.08, - "outOfPlaceUs": 45.93, - "inPlaceUs": 44.67, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 45.73, - "algBandwidthGbps": 91.71, - "busBandwidthGbps": 80.25, - "outOfPlaceUs": 45.73, - "inPlaceUs": 50.3, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 49.87, - "algBandwidthGbps": 168.19, - "busBandwidthGbps": 147.17, - "outOfPlaceUs": 49.87, - "inPlaceUs": 49.89, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 66.01, - "algBandwidthGbps": 254.16, - "busBandwidthGbps": 222.39, - "outOfPlaceUs": 66.91, - "inPlaceUs": 66.01, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 119.98, - "algBandwidthGbps": 279.66, - "busBandwidthGbps": 244.7, - "outOfPlaceUs": 123.43, - "inPlaceUs": 119.98, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 203.19, - "algBandwidthGbps": 330.27, - "busBandwidthGbps": 288.99, - "outOfPlaceUs": 207.29, - "inPlaceUs": 203.19, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 376.8, - "algBandwidthGbps": 356.2, - "busBandwidthGbps": 311.68, - "outOfPlaceUs": 380.65, - "inPlaceUs": 376.8, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 719.69, - "algBandwidthGbps": 372.99, - "busBandwidthGbps": 326.36, - "outOfPlaceUs": 725.33, - "inPlaceUs": 719.69, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 1381.87, - "algBandwidthGbps": 388.51, - "busBandwidthGbps": 339.95, - "outOfPlaceUs": 1395.46, - "inPlaceUs": 1381.87, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 2707.03, - "algBandwidthGbps": 396.65, - "busBandwidthGbps": 347.07, - "outOfPlaceUs": 2726.86, - "inPlaceUs": 2707.03, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 5309.69, - "algBandwidthGbps": 404.45, - "busBandwidthGbps": 353.89, - "outOfPlaceUs": 5364.37, - "inPlaceUs": 5309.69, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 10464.7, - "algBandwidthGbps": 410.42, - "busBandwidthGbps": 359.12, - "outOfPlaceUs": 10637.1, - "inPlaceUs": 10464.7, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 20742.5, - "algBandwidthGbps": 414.12, - "busBandwidthGbps": 362.36, - "outOfPlaceUs": 21038.3, - "inPlaceUs": 20742.5, - "correct": true - } - ] - }, - { - "id": "cxn-e1de3b53", - "identity": "nccl|mi355x|all_gather|mi355x-xgmi|xgmi|8|nccl-tests-v1", - "op": "all_gather", - "sku": "mi355x", - "runner": "mi355x-amds_01", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "8f8417874bf37410", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 114.277, - "status": "valid", - "valid": true, - "colorKey": "mi355x_e1de3b53", - "label": "MI355X · mi355x-xgmi · xgmi (ws8)", - "generatedAt": "2026-06-29T02:39:13.078018+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T02:39:13.078018+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 187.5, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 187.5, - "inPlaceUs": 204.9, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 176.3, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 176.3, - "inPlaceUs": 202.7, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 176.5, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 176.5, - "inPlaceUs": 204, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 173.8, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 173.8, - "inPlaceUs": 170.9, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 177.9, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 177.9, - "inPlaceUs": 171.6, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 175.4, - "algBandwidthGbps": 0.02, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 175.4, - "inPlaceUs": 171.3, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 173.1, - "algBandwidthGbps": 0.05, - "busBandwidthGbps": 0.04, - "outOfPlaceUs": 210, - "inPlaceUs": 173.1, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 172.7, - "algBandwidthGbps": 0.09, - "busBandwidthGbps": 0.08, - "outOfPlaceUs": 210.5, - "inPlaceUs": 172.7, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 173.2, - "algBandwidthGbps": 0.19, - "busBandwidthGbps": 0.17, - "outOfPlaceUs": 210.5, - "inPlaceUs": 173.2, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 177.8, - "algBandwidthGbps": 0.37, - "busBandwidthGbps": 0.32, - "outOfPlaceUs": 215.9, - "inPlaceUs": 177.8, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 126.3, - "algBandwidthGbps": 1.04, - "busBandwidthGbps": 0.91, - "outOfPlaceUs": 223.8, - "inPlaceUs": 126.3, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 136.7, - "algBandwidthGbps": 1.92, - "busBandwidthGbps": 1.68, - "outOfPlaceUs": 139.9, - "inPlaceUs": 136.7, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 137.9, - "algBandwidthGbps": 3.8, - "busBandwidthGbps": 3.33, - "outOfPlaceUs": 140.8, - "inPlaceUs": 137.9, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 124.5, - "algBandwidthGbps": 8.42, - "busBandwidthGbps": 7.37, - "outOfPlaceUs": 124.5, - "inPlaceUs": 142, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 129.4, - "algBandwidthGbps": 16.21, - "busBandwidthGbps": 14.18, - "outOfPlaceUs": 129.4, - "inPlaceUs": 148.6, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 140.7, - "algBandwidthGbps": 29.82, - "busBandwidthGbps": 26.09, - "outOfPlaceUs": 140.7, - "inPlaceUs": 158.2, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 141.3, - "algBandwidthGbps": 59.35, - "busBandwidthGbps": 51.93, - "outOfPlaceUs": 141.3, - "inPlaceUs": 158.8, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 88.34, - "algBandwidthGbps": 189.91, - "busBandwidthGbps": 166.17, - "outOfPlaceUs": 128.5, - "inPlaceUs": 88.34, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 104.3, - "algBandwidthGbps": 321.59, - "busBandwidthGbps": 281.39, - "outOfPlaceUs": 142.4, - "inPlaceUs": 104.3, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 174.1, - "algBandwidthGbps": 385.36, - "busBandwidthGbps": 337.19, - "outOfPlaceUs": 174.1, - "inPlaceUs": 174.4, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 320.4, - "algBandwidthGbps": 418.92, - "busBandwidthGbps": 366.55, - "outOfPlaceUs": 320.9, - "inPlaceUs": 320.4, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 616.6, - "algBandwidthGbps": 435.32, - "busBandwidthGbps": 380.9, - "outOfPlaceUs": 625.8, - "inPlaceUs": 616.6, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 1206.3, - "algBandwidthGbps": 445.05, - "busBandwidthGbps": 389.42, - "outOfPlaceUs": 1207.7, - "inPlaceUs": 1206.3, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 2396.4, - "algBandwidthGbps": 448.06, - "busBandwidthGbps": 392.06, - "outOfPlaceUs": 2396.4, - "inPlaceUs": 2399, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 4736.7, - "algBandwidthGbps": 453.37, - "busBandwidthGbps": 396.7, - "outOfPlaceUs": 4750.2, - "inPlaceUs": 4736.7, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 9395.7, - "algBandwidthGbps": 457.12, - "busBandwidthGbps": 399.98, - "outOfPlaceUs": 9395.7, - "inPlaceUs": 9416.8, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 18643, - "algBandwidthGbps": 460.77, - "busBandwidthGbps": 403.17, - "outOfPlaceUs": 18643, - "inPlaceUs": 18899, - "correct": true - } - ] - }, - { - "id": "cxn-940e3e1c", - "identity": "nccl|b300|all_reduce|b300-nvlink-island|nvlink|8|nccl-tests-v1", - "op": "all_reduce", - "sku": "b300", - "runner": "b300-nv_03", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "139076c9959b0653", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 218.816, - "status": "valid", - "valid": true, - "colorKey": "b300_940e3e1c", - "label": "B300 · b300-nvlink-island · nvlink (ws8)", - "generatedAt": "2026-06-27T11:18:24.142157+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T11:18:24.142157+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 8, - "dtype": "float", - "latencyUs": 28.3, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 28.3, - "inPlaceUs": 27.59, - "correct": true - }, - { - "sizeBytes": 16, - "dtype": "float", - "latencyUs": 27.27, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 27.27, - "inPlaceUs": 27.06, - "correct": true - }, - { - "sizeBytes": 32, - "dtype": "float", - "latencyUs": 27.25, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 27.25, - "inPlaceUs": 27.3, - "correct": true - }, - { - "sizeBytes": 64, - "dtype": "float", - "latencyUs": 27.32, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 27.32, - "inPlaceUs": 27.28, - "correct": true - }, - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 27.42, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 27.42, - "inPlaceUs": 27.59, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 27.26, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 27.26, - "inPlaceUs": 27.32, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 27.16, - "algBandwidthGbps": 0.02, - "busBandwidthGbps": 0.03, - "outOfPlaceUs": 27.16, - "inPlaceUs": 27.38, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 27.33, - "algBandwidthGbps": 0.04, - "busBandwidthGbps": 0.07, - "outOfPlaceUs": 27.33, - "inPlaceUs": 27.14, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 27.36, - "algBandwidthGbps": 0.07, - "busBandwidthGbps": 0.13, - "outOfPlaceUs": 27.36, - "inPlaceUs": 27.33, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 27.3, - "algBandwidthGbps": 0.15, - "busBandwidthGbps": 0.26, - "outOfPlaceUs": 27.3, - "inPlaceUs": 27.35, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 27.52, - "algBandwidthGbps": 0.3, - "busBandwidthGbps": 0.52, - "outOfPlaceUs": 27.52, - "inPlaceUs": 27.59, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 27.64, - "algBandwidthGbps": 0.59, - "busBandwidthGbps": 1.04, - "outOfPlaceUs": 27.64, - "inPlaceUs": 27.61, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 27.76, - "algBandwidthGbps": 1.18, - "busBandwidthGbps": 2.07, - "outOfPlaceUs": 27.76, - "inPlaceUs": 27.85, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 27.7, - "algBandwidthGbps": 2.37, - "busBandwidthGbps": 4.14, - "outOfPlaceUs": 28.19, - "inPlaceUs": 27.7, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 28.21, - "algBandwidthGbps": 4.65, - "busBandwidthGbps": 8.13, - "outOfPlaceUs": 28.59, - "inPlaceUs": 28.21, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 28.56, - "algBandwidthGbps": 9.18, - "busBandwidthGbps": 16.06, - "outOfPlaceUs": 29.16, - "inPlaceUs": 28.56, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 29.89, - "algBandwidthGbps": 17.54, - "busBandwidthGbps": 30.7, - "outOfPlaceUs": 29.89, - "inPlaceUs": 29.93, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 32.16, - "algBandwidthGbps": 32.61, - "busBandwidthGbps": 57.06, - "outOfPlaceUs": 32.16, - "inPlaceUs": 32.67, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 37.47, - "algBandwidthGbps": 55.97, - "busBandwidthGbps": 97.94, - "outOfPlaceUs": 37.47, - "inPlaceUs": 38.07, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 56.79, - "algBandwidthGbps": 73.86, - "busBandwidthGbps": 129.26, - "outOfPlaceUs": 56.88, - "inPlaceUs": 56.79, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 77.08, - "algBandwidthGbps": 108.83, - "busBandwidthGbps": 190.45, - "outOfPlaceUs": 78.24, - "inPlaceUs": 77.08, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 104.77, - "algBandwidthGbps": 160.14, - "busBandwidthGbps": 280.24, - "outOfPlaceUs": 106.93, - "inPlaceUs": 104.77, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 166.18, - "algBandwidthGbps": 201.91, - "busBandwidthGbps": 353.34, - "outOfPlaceUs": 168.44, - "inPlaceUs": 166.18, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 274.52, - "algBandwidthGbps": 244.46, - "busBandwidthGbps": 427.8, - "outOfPlaceUs": 274.52, - "inPlaceUs": 275.23, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 391.34, - "algBandwidthGbps": 342.97, - "busBandwidthGbps": 600.19, - "outOfPlaceUs": 391.34, - "inPlaceUs": 392.6, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 711.09, - "algBandwidthGbps": 377.5, - "busBandwidthGbps": 660.62, - "outOfPlaceUs": 711.09, - "inPlaceUs": 712.3, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 1324.96, - "algBandwidthGbps": 405.2, - "busBandwidthGbps": 709.1, - "outOfPlaceUs": 1324.96, - "inPlaceUs": 1327.33, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 2553.96, - "algBandwidthGbps": 420.42, - "busBandwidthGbps": 735.74, - "outOfPlaceUs": 2558.96, - "inPlaceUs": 2553.96, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 4571.5, - "algBandwidthGbps": 469.75, - "busBandwidthGbps": 822.07, - "outOfPlaceUs": 4576.46, - "inPlaceUs": 4571.5, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 9024.56, - "algBandwidthGbps": 475.92, - "busBandwidthGbps": 832.86, - "outOfPlaceUs": 9034.78, - "inPlaceUs": 9024.56, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 17971.9, - "algBandwidthGbps": 477.96, - "busBandwidthGbps": 836.44, - "outOfPlaceUs": 17991.5, - "inPlaceUs": 17971.9, - "correct": true - } - ] - }, - { - "id": "cxn-fd5a787b", - "identity": "allreduce-fw|b300|flashinfer-oneshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "b300", - "runner": "b300-nv_11", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "81bfaa10f5beda36", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "b300_fd5a787b", - "label": "B300 · flashinfer-oneshot (fw-AR · ws8)", - "generatedAt": "2026-06-28T01:47:48.908164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T01:47:48.908164+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "dtype": "bf16", - "latencyUs": 11.661, - "algBandwidthGbps": 0.351, - "busBandwidthGbps": 0.615, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "bf16", - "latencyUs": 11.601, - "algBandwidthGbps": 1.412, - "busBandwidthGbps": 2.472, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 12.381, - "algBandwidthGbps": 5.293, - "busBandwidthGbps": 9.263, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 14.274, - "algBandwidthGbps": 18.365, - "busBandwidthGbps": 32.139, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 23.854, - "algBandwidthGbps": 43.958, - "busBandwidthGbps": 76.926, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 75.394, - "algBandwidthGbps": 55.632, - "busBandwidthGbps": 97.356, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 244.644, - "algBandwidthGbps": 68.578, - "busBandwidthGbps": 120.011, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 956.149, - "algBandwidthGbps": 70.187, - "busBandwidthGbps": 122.827, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-087af4ad", - "identity": "allreduce-fw|b300|flashinfer-twoshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "b300", - "runner": "b300-nv_11", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "183298dcd11c3e1e", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "b300_087af4ad", - "label": "B300 · flashinfer-twoshot (fw-AR · ws8)", - "generatedAt": "2026-06-28T01:47:48.908164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T01:47:48.908164+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 385.191, - "algBandwidthGbps": 0.17, - "busBandwidthGbps": 0.298, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 118.644, - "algBandwidthGbps": 2.209, - "busBandwidthGbps": 3.867, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 47.46, - "algBandwidthGbps": 22.094, - "busBandwidthGbps": 38.664, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 43.002, - "algBandwidthGbps": 97.537, - "busBandwidthGbps": 170.69, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 90.81, - "algBandwidthGbps": 184.75, - "busBandwidthGbps": 323.313, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 353.165, - "algBandwidthGbps": 190.021, - "busBandwidthGbps": 332.537, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-345c72e8", - "identity": "allreduce-fw|b300|nccl|b300-nvlink-island|nvlink|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "b300", - "runner": "b300-nv_11", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "9c254fab92b5fac7", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "b300_345c72e8", - "label": "B300 · nccl (fw-AR · ws8)", - "generatedAt": "2026-06-28T01:47:48.908164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T01:47:48.908164+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 1024, - "dtype": "bf16", - "latencyUs": 51, - "algBandwidthGbps": 0.02, - "busBandwidthGbps": 0.035, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "bf16", - "latencyUs": 29.788, - "algBandwidthGbps": 0.138, - "busBandwidthGbps": 0.241, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "bf16", - "latencyUs": 25.746, - "algBandwidthGbps": 0.636, - "busBandwidthGbps": 1.114, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 43.559, - "algBandwidthGbps": 1.505, - "busBandwidthGbps": 2.633, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 27.737, - "algBandwidthGbps": 9.451, - "busBandwidthGbps": 16.539, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 29.05, - "algBandwidthGbps": 36.096, - "busBandwidthGbps": 63.168, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 52.692, - "algBandwidthGbps": 79.601, - "busBandwidthGbps": 139.301, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 95.558, - "algBandwidthGbps": 175.571, - "busBandwidthGbps": 307.25, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 302.87, - "algBandwidthGbps": 221.577, - "busBandwidthGbps": 387.759, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-18cb0223", - "identity": "allreduce-fw|h100|flashinfer-oneshot|h100-nvlink-island|nvlink|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "h100", - "runner": "h100-dgxc-slurm_17", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "2876f45736ca183e", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "h100_18cb0223", - "label": "H100 · flashinfer-oneshot (fw-AR · ws8)", - "generatedAt": "2026-06-28T01:47:32.393320+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T01:47:32.393320+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "dtype": "bf16", - "latencyUs": 19.209, - "algBandwidthGbps": 0.213, - "busBandwidthGbps": 0.373, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "bf16", - "latencyUs": 18.247, - "algBandwidthGbps": 0.898, - "busBandwidthGbps": 1.571, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 18.258, - "algBandwidthGbps": 3.589, - "busBandwidthGbps": 6.282, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 17.969, - "algBandwidthGbps": 14.589, - "busBandwidthGbps": 25.531, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 32.62, - "algBandwidthGbps": 32.145, - "busBandwidthGbps": 56.254, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 119.14, - "algBandwidthGbps": 35.205, - "busBandwidthGbps": 61.609, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 464.128, - "algBandwidthGbps": 36.148, - "busBandwidthGbps": 63.259, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 1854.815, - "algBandwidthGbps": 36.181, - "busBandwidthGbps": 63.317, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-5a5e256d", - "identity": "allreduce-fw|h100|flashinfer-twoshot|h100-nvlink-island|nvlink|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "h100", - "runner": "h100-dgxc-slurm_17", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "3914980c40380611", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "h100_5a5e256d", - "label": "H100 · flashinfer-twoshot (fw-AR · ws8)", - "generatedAt": "2026-06-28T01:47:32.393320+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T01:47:32.393320+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 261.726, - "algBandwidthGbps": 0.25, - "busBandwidthGbps": 0.438, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 85.069, - "algBandwidthGbps": 3.082, - "busBandwidthGbps": 5.393, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 36.265, - "algBandwidthGbps": 28.914, - "busBandwidthGbps": 50.6, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 53.77, - "algBandwidthGbps": 78.004, - "busBandwidthGbps": 136.507, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 144.773, - "algBandwidthGbps": 115.886, - "busBandwidthGbps": 202.801, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 584.195, - "algBandwidthGbps": 114.874, - "busBandwidthGbps": 201.03, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-4676ac48", - "identity": "nccl|h100|all_reduce|h100-nvlink-island|nvlink|8|nccl-tests-v1", - "op": "all_reduce", - "sku": "h100", - "runner": "h100-dgxc-slurm_09", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "059665d8b168a0d7", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 145.585, - "status": "valid", - "valid": true, - "colorKey": "h100_4676ac48", - "label": "H100 · h100-nvlink-island · nvlink (ws8)", - "generatedAt": "2026-06-27T11:18:41.017727+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T11:18:41.017727+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 8, - "dtype": "float", - "latencyUs": 108.66, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 108.66, - "inPlaceUs": 38.99, - "correct": true - }, - { - "sizeBytes": 16, - "dtype": "float", - "latencyUs": 39.33, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 39.33, - "inPlaceUs": 38.7, - "correct": true - }, - { - "sizeBytes": 32, - "dtype": "float", - "latencyUs": 73.95, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 73.95, - "inPlaceUs": 38.72, - "correct": true - }, - { - "sizeBytes": 64, - "dtype": "float", - "latencyUs": 39.17, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 39.17, - "inPlaceUs": 38.71, - "correct": true - }, - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 39.12, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 39.12, - "inPlaceUs": 38.4, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 38.88, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 38.88, - "inPlaceUs": 38.41, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 39.08, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 39.08, - "inPlaceUs": 38.59, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 38.88, - "algBandwidthGbps": 0.03, - "busBandwidthGbps": 0.05, - "outOfPlaceUs": 38.88, - "inPlaceUs": 38.83, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 39.58, - "algBandwidthGbps": 0.05, - "busBandwidthGbps": 0.09, - "outOfPlaceUs": 39.58, - "inPlaceUs": 39.25, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 38.69, - "algBandwidthGbps": 0.11, - "busBandwidthGbps": 0.19, - "outOfPlaceUs": 38.94, - "inPlaceUs": 38.69, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 38.69, - "algBandwidthGbps": 0.21, - "busBandwidthGbps": 0.37, - "outOfPlaceUs": 38.69, - "inPlaceUs": 39.4, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 39.08, - "algBandwidthGbps": 0.42, - "busBandwidthGbps": 0.73, - "outOfPlaceUs": 39.08, - "inPlaceUs": 39.06, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 38.26, - "algBandwidthGbps": 0.86, - "busBandwidthGbps": 1.5, - "outOfPlaceUs": 39.3, - "inPlaceUs": 38.26, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 38.71, - "algBandwidthGbps": 1.69, - "busBandwidthGbps": 2.96, - "outOfPlaceUs": 38.95, - "inPlaceUs": 38.71, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 39.68, - "algBandwidthGbps": 3.3, - "busBandwidthGbps": 5.78, - "outOfPlaceUs": 40.2, - "inPlaceUs": 39.68, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 40.97, - "algBandwidthGbps": 6.4, - "busBandwidthGbps": 11.2, - "outOfPlaceUs": 41.31, - "inPlaceUs": 40.97, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 43.56, - "algBandwidthGbps": 12.04, - "busBandwidthGbps": 21.06, - "outOfPlaceUs": 43.56, - "inPlaceUs": 43.68, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 46.56, - "algBandwidthGbps": 22.52, - "busBandwidthGbps": 39.42, - "outOfPlaceUs": 46.76, - "inPlaceUs": 46.56, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 44.32, - "algBandwidthGbps": 47.32, - "busBandwidthGbps": 82.81, - "outOfPlaceUs": 44.44, - "inPlaceUs": 44.32, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 55.78, - "algBandwidthGbps": 75.19, - "busBandwidthGbps": 131.58, - "outOfPlaceUs": 56.11, - "inPlaceUs": 55.78, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 83.88, - "algBandwidthGbps": 100, - "busBandwidthGbps": 175.01, - "outOfPlaceUs": 85.22, - "inPlaceUs": 83.88, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 125.29, - "algBandwidthGbps": 133.91, - "busBandwidthGbps": 234.34, - "outOfPlaceUs": 125.73, - "inPlaceUs": 125.29, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 200.68, - "algBandwidthGbps": 167.2, - "busBandwidthGbps": 292.6, - "outOfPlaceUs": 200.82, - "inPlaceUs": 200.68, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 325, - "algBandwidthGbps": 206.49, - "busBandwidthGbps": 361.36, - "outOfPlaceUs": 325, - "inPlaceUs": 325.69, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 585.92, - "algBandwidthGbps": 229.07, - "busBandwidthGbps": 400.87, - "outOfPlaceUs": 585.97, - "inPlaceUs": 585.92, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 1110.23, - "algBandwidthGbps": 241.78, - "busBandwidthGbps": 423.12, - "outOfPlaceUs": 1111.7, - "inPlaceUs": 1110.23, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 2145.48, - "algBandwidthGbps": 250.23, - "busBandwidthGbps": 437.91, - "outOfPlaceUs": 2145.48, - "inPlaceUs": 2147.26, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 4026.19, - "algBandwidthGbps": 266.69, - "busBandwidthGbps": 466.71, - "outOfPlaceUs": 4026.19, - "inPlaceUs": 4031.14, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 7957.67, - "algBandwidthGbps": 269.86, - "busBandwidthGbps": 472.26, - "outOfPlaceUs": 7958.73, - "inPlaceUs": 7957.67, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 15778.7, - "algBandwidthGbps": 272.2, - "busBandwidthGbps": 476.35, - "outOfPlaceUs": 15778.7, - "inPlaceUs": 15787, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 31394.3, - "algBandwidthGbps": 273.61, - "busBandwidthGbps": 478.83, - "outOfPlaceUs": 31404.3, - "inPlaceUs": 31394.3, - "correct": true - } - ] - }, - { - "id": "cxn-ae07ad9c", - "identity": "allreduce-fw|h100|nccl|h100-nvlink-island|nvlink|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "h100", - "runner": "h100-dgxc-slurm_17", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "aa6fba4338779d59", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "h100_ae07ad9c", - "label": "H100 · nccl (fw-AR · ws8)", - "generatedAt": "2026-06-28T01:47:32.393320+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T01:47:32.393320+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 1024, - "dtype": "bf16", - "latencyUs": 32.458, - "algBandwidthGbps": 0.032, - "busBandwidthGbps": 0.055, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "bf16", - "latencyUs": 30.771, - "algBandwidthGbps": 0.133, - "busBandwidthGbps": 0.233, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "bf16", - "latencyUs": 31.116, - "algBandwidthGbps": 0.527, - "busBandwidthGbps": 0.921, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 61.512, - "algBandwidthGbps": 1.065, - "busBandwidthGbps": 1.864, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 30.758, - "algBandwidthGbps": 8.523, - "busBandwidthGbps": 14.915, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 33.86, - "algBandwidthGbps": 30.968, - "busBandwidthGbps": 54.194, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 84.309, - "algBandwidthGbps": 49.749, - "busBandwidthGbps": 87.061, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 118.376, - "algBandwidthGbps": 141.728, - "busBandwidthGbps": 248.024, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 322.062, - "algBandwidthGbps": 208.372, - "busBandwidthGbps": 364.652, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-83a9e484", - "identity": "nccl|h200|all_reduce|h200-nvlink-island|nvlink|8|nccl-tests-v1", - "op": "all_reduce", - "sku": "h200", - "runner": "h200-dgxc-slurm_2", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "9171bd1206f1d15c", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 147.096, - "status": "valid", - "valid": true, - "colorKey": "h200_83a9e484", - "label": "H200 · h200-nvlink-island · nvlink (ws8)", - "generatedAt": "2026-06-27T11:18:51.255960+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T11:18:51.255960+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 8, - "dtype": "float", - "latencyUs": 46.19, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 46.19, - "inPlaceUs": 45.4, - "correct": true - }, - { - "sizeBytes": 16, - "dtype": "float", - "latencyUs": 46.3, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 46.3, - "inPlaceUs": 49.15, - "correct": true - }, - { - "sizeBytes": 32, - "dtype": "float", - "latencyUs": 43.48, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 43.48, - "inPlaceUs": 41.06, - "correct": true - }, - { - "sizeBytes": 64, - "dtype": "float", - "latencyUs": 40.84, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 40.84, - "inPlaceUs": 40.88, - "correct": true - }, - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 39.89, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 39.89, - "inPlaceUs": 43.96, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 43.77, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 43.77, - "inPlaceUs": 44.61, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 40.31, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 40.31, - "inPlaceUs": 41.46, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 40.55, - "algBandwidthGbps": 0.03, - "busBandwidthGbps": 0.04, - "outOfPlaceUs": 40.55, - "inPlaceUs": 41.64, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 40.83, - "algBandwidthGbps": 0.05, - "busBandwidthGbps": 0.09, - "outOfPlaceUs": 43.27, - "inPlaceUs": 40.83, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 41.81, - "algBandwidthGbps": 0.1, - "busBandwidthGbps": 0.17, - "outOfPlaceUs": 41.81, - "inPlaceUs": 41.67, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 41.55, - "algBandwidthGbps": 0.2, - "busBandwidthGbps": 0.35, - "outOfPlaceUs": 41.69, - "inPlaceUs": 41.55, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 40.78, - "algBandwidthGbps": 0.4, - "busBandwidthGbps": 0.7, - "outOfPlaceUs": 44.62, - "inPlaceUs": 40.78, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 40.5, - "algBandwidthGbps": 0.81, - "busBandwidthGbps": 1.42, - "outOfPlaceUs": 41.35, - "inPlaceUs": 40.5, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 41.2, - "algBandwidthGbps": 1.59, - "busBandwidthGbps": 2.78, - "outOfPlaceUs": 45.65, - "inPlaceUs": 41.2, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 44.7, - "algBandwidthGbps": 2.93, - "busBandwidthGbps": 5.13, - "outOfPlaceUs": 44.7, - "inPlaceUs": 45.23, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 42.45, - "algBandwidthGbps": 6.18, - "busBandwidthGbps": 10.81, - "outOfPlaceUs": 43.35, - "inPlaceUs": 42.45, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 45.27, - "algBandwidthGbps": 11.58, - "busBandwidthGbps": 20.27, - "outOfPlaceUs": 45.27, - "inPlaceUs": 50.92, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 47.39, - "algBandwidthGbps": 22.13, - "busBandwidthGbps": 38.72, - "outOfPlaceUs": 47.39, - "inPlaceUs": 48.94, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 45.86, - "algBandwidthGbps": 45.73, - "busBandwidthGbps": 80.02, - "outOfPlaceUs": 48.22, - "inPlaceUs": 45.86, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 54.76, - "algBandwidthGbps": 76.6, - "busBandwidthGbps": 134.05, - "outOfPlaceUs": 54.92, - "inPlaceUs": 54.76, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 82.71, - "algBandwidthGbps": 101.42, - "busBandwidthGbps": 177.49, - "outOfPlaceUs": 83.49, - "inPlaceUs": 82.71, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 124.23, - "algBandwidthGbps": 135.05, - "busBandwidthGbps": 236.34, - "outOfPlaceUs": 125.83, - "inPlaceUs": 124.23, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 199.57, - "algBandwidthGbps": 168.13, - "busBandwidthGbps": 294.23, - "outOfPlaceUs": 199.57, - "inPlaceUs": 199.89, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 321.61, - "algBandwidthGbps": 208.67, - "busBandwidthGbps": 365.16, - "outOfPlaceUs": 321.61, - "inPlaceUs": 322.55, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 571.39, - "algBandwidthGbps": 234.9, - "busBandwidthGbps": 411.07, - "outOfPlaceUs": 573.19, - "inPlaceUs": 571.39, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 1073.14, - "algBandwidthGbps": 250.14, - "busBandwidthGbps": 437.75, - "outOfPlaceUs": 1073.14, - "inPlaceUs": 1076.11, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 2090.9, - "algBandwidthGbps": 256.77, - "busBandwidthGbps": 449.34, - "outOfPlaceUs": 2091.74, - "inPlaceUs": 2090.9, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 4010.65, - "algBandwidthGbps": 267.72, - "busBandwidthGbps": 468.51, - "outOfPlaceUs": 4013.31, - "inPlaceUs": 4010.65, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 7917.63, - "algBandwidthGbps": 271.23, - "busBandwidthGbps": 474.65, - "outOfPlaceUs": 7920.22, - "inPlaceUs": 7917.63, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 15691.2, - "algBandwidthGbps": 273.72, - "busBandwidthGbps": 479.01, - "outOfPlaceUs": 15691.2, - "inPlaceUs": 15701, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 31241, - "algBandwidthGbps": 274.96, - "busBandwidthGbps": 481.17, - "outOfPlaceUs": 31280.5, - "inPlaceUs": 31241, - "correct": true - } - ] - }, - { - "id": "cxn-be6147f8", - "identity": "nccl|mi355x|all_reduce|mi355x-xgmi|xgmi|8|nccl-tests-v1", - "op": "all_reduce", - "sku": "mi355x", - "runner": "mi355x-amds_01", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "643cf957198f1634", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 132.947, - "status": "valid", - "valid": true, - "colorKey": "mi355x_be6147f8", - "label": "MI355X · mi355x-xgmi · xgmi (ws8)", - "generatedAt": "2026-06-29T02:37:18.096029+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T02:37:18.096029+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 8, - "dtype": "float", - "latencyUs": 70.23, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 70.23, - "inPlaceUs": 55.94, - "correct": true - }, - { - "sizeBytes": 16, - "dtype": "float", - "latencyUs": 58.71, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 58.71, - "inPlaceUs": 59.19, - "correct": true - }, - { - "sizeBytes": 32, - "dtype": "float", - "latencyUs": 58.37, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 58.37, - "inPlaceUs": 58.26, - "correct": true - }, - { - "sizeBytes": 64, - "dtype": "float", - "latencyUs": 58.11, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 58.11, - "inPlaceUs": 67.17, - "correct": true - }, - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 58.88, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 58.88, - "inPlaceUs": 68.44, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 57.68, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 57.68, - "inPlaceUs": 68.1, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 57.2, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 57.2, - "inPlaceUs": 68.38, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 42.33, - "algBandwidthGbps": 0.02, - "busBandwidthGbps": 0.04, - "outOfPlaceUs": 42.33, - "inPlaceUs": 56.59, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 56.27, - "algBandwidthGbps": 0.04, - "busBandwidthGbps": 0.06, - "outOfPlaceUs": 56.27, - "inPlaceUs": 57.42, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 56.73, - "algBandwidthGbps": 0.07, - "busBandwidthGbps": 0.13, - "outOfPlaceUs": 56.73, - "inPlaceUs": 57.52, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 46.68, - "algBandwidthGbps": 0.18, - "busBandwidthGbps": 0.31, - "outOfPlaceUs": 56.91, - "inPlaceUs": 46.68, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 56.73, - "algBandwidthGbps": 0.29, - "busBandwidthGbps": 0.51, - "outOfPlaceUs": 56.73, - "inPlaceUs": 58.19, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 57.45, - "algBandwidthGbps": 0.57, - "busBandwidthGbps": 1, - "outOfPlaceUs": 57.45, - "inPlaceUs": 58.35, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 48.73, - "algBandwidthGbps": 1.34, - "busBandwidthGbps": 2.35, - "outOfPlaceUs": 57.81, - "inPlaceUs": 48.73, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 61.94, - "algBandwidthGbps": 2.12, - "busBandwidthGbps": 3.7, - "outOfPlaceUs": 61.94, - "inPlaceUs": 72.74, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 62.66, - "algBandwidthGbps": 4.18, - "busBandwidthGbps": 7.32, - "outOfPlaceUs": 71.87, - "inPlaceUs": 62.66, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 72.34, - "algBandwidthGbps": 7.25, - "busBandwidthGbps": 12.68, - "outOfPlaceUs": 73.07, - "inPlaceUs": 72.34, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 65.42, - "algBandwidthGbps": 16.03, - "busBandwidthGbps": 28.05, - "outOfPlaceUs": 65.42, - "inPlaceUs": 76.09, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 72.46, - "algBandwidthGbps": 28.94, - "busBandwidthGbps": 50.65, - "outOfPlaceUs": 73.93, - "inPlaceUs": 72.46, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 62.19, - "algBandwidthGbps": 67.44, - "busBandwidthGbps": 118.02, - "outOfPlaceUs": 63.37, - "inPlaceUs": 62.19, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 76.29, - "algBandwidthGbps": 109.96, - "busBandwidthGbps": 192.43, - "outOfPlaceUs": 80.46, - "inPlaceUs": 76.29, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 119, - "algBandwidthGbps": 141.02, - "busBandwidthGbps": 246.78, - "outOfPlaceUs": 119, - "inPlaceUs": 127.6, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 183, - "algBandwidthGbps": 183.36, - "busBandwidthGbps": 320.89, - "outOfPlaceUs": 184.4, - "inPlaceUs": 183, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 323.3, - "algBandwidthGbps": 207.56, - "busBandwidthGbps": 363.23, - "outOfPlaceUs": 323.9, - "inPlaceUs": 323.3, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 613.1, - "algBandwidthGbps": 218.91, - "busBandwidthGbps": 383.09, - "outOfPlaceUs": 623.4, - "inPlaceUs": 613.1, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 1191.6, - "algBandwidthGbps": 225.28, - "busBandwidthGbps": 394.24, - "outOfPlaceUs": 1191.6, - "inPlaceUs": 1192.7, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 2349.2, - "algBandwidthGbps": 228.54, - "busBandwidthGbps": 399.94, - "outOfPlaceUs": 2349.2, - "inPlaceUs": 2353.4, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 4668.6, - "algBandwidthGbps": 229.99, - "busBandwidthGbps": 402.49, - "outOfPlaceUs": 4668.6, - "inPlaceUs": 4671.6, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 9245.8, - "algBandwidthGbps": 232.27, - "busBandwidthGbps": 406.47, - "outOfPlaceUs": 9245.8, - "inPlaceUs": 9250.8, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 18524, - "algBandwidthGbps": 231.86, - "busBandwidthGbps": 405.76, - "outOfPlaceUs": 18543, - "inPlaceUs": 18524, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 37129, - "algBandwidthGbps": 231.36, - "busBandwidthGbps": 404.87, - "outOfPlaceUs": 37129, - "inPlaceUs": 37136, - "correct": true - } - ] - }, - { - "id": "cxn-300783f6", - "identity": "allreduce-fw|mi355x|nccl|mi355x-xgmi|xgmi|8|allreduce-fw-v1", - "op": "all_reduce", - "sku": "mi355x", - "runner": "mi355x-amds_02", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "nodes": null, - "dtype": "bf16", - "comparisonClass": null, - "comparisonKey": "cffcc3132d487de4", - "measurementContract": "allreduce-fw-v1", - "avgBusBandwidthGbps": null, - "status": "valid", - "valid": true, - "colorKey": "mi355x_300783f6", - "label": "MI355X · nccl (fw-AR · ws8)", - "generatedAt": "2026-06-28T05:14:21.326557+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T05:14:21.326557+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 1024, - "dtype": "bf16", - "latencyUs": 43.632, - "algBandwidthGbps": 0.023, - "busBandwidthGbps": 0.041, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "bf16", - "latencyUs": 28.193, - "algBandwidthGbps": 0.145, - "busBandwidthGbps": 0.254, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "bf16", - "latencyUs": 26.58, - "algBandwidthGbps": 0.616, - "busBandwidthGbps": 1.079, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "bf16", - "latencyUs": 26.654, - "algBandwidthGbps": 2.459, - "busBandwidthGbps": 4.303, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "bf16", - "latencyUs": 28.918, - "algBandwidthGbps": 9.065, - "busBandwidthGbps": 15.864, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "bf16", - "latencyUs": 35.083, - "algBandwidthGbps": 29.889, - "busBandwidthGbps": 52.305, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "bf16", - "latencyUs": 56.62, - "algBandwidthGbps": 74.078, - "busBandwidthGbps": 129.636, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "bf16", - "latencyUs": 115.37, - "algBandwidthGbps": 145.42, - "busBandwidthGbps": 254.486, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "bf16", - "latencyUs": 361.633, - "algBandwidthGbps": 185.572, - "busBandwidthGbps": 324.75, - "outOfPlaceUs": null, - "inPlaceUs": null, - "correct": true - } - ] - }, - { - "id": "cxn-35eb6655", - "identity": "nccl|mi355x|alltoall|mi355x-xgmi|xgmi|8|nccl-tests-v1", - "op": "alltoall", - "sku": "mi355x", - "runner": "mi355x-amds_01", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "67a9b0532a278ee9", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 108.705, - "status": "valid", - "valid": true, - "colorKey": "mi355x_35eb6655", - "label": "MI355X · mi355x-xgmi · xgmi (ws8)", - "generatedAt": "2026-06-29T02:42:52.989210+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T02:42:52.989210+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 35.84, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 35.84, - "inPlaceUs": 77.06, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 49.55, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0, - "outOfPlaceUs": 49.55, - "inPlaceUs": 72.32, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 49.25, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 49.25, - "inPlaceUs": 82.36, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 38.87, - "algBandwidthGbps": 0.03, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 38.87, - "inPlaceUs": 71.67, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 38.2, - "algBandwidthGbps": 0.05, - "busBandwidthGbps": 0.05, - "outOfPlaceUs": 38.2, - "inPlaceUs": 81.13, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 38.76, - "algBandwidthGbps": 0.11, - "busBandwidthGbps": 0.09, - "outOfPlaceUs": 38.76, - "inPlaceUs": 71.12, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 48.95, - "algBandwidthGbps": 0.17, - "busBandwidthGbps": 0.15, - "outOfPlaceUs": 48.95, - "inPlaceUs": 70.91, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 50.41, - "algBandwidthGbps": 0.32, - "busBandwidthGbps": 0.28, - "outOfPlaceUs": 50.41, - "inPlaceUs": 81.47, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 50.09, - "algBandwidthGbps": 0.65, - "busBandwidthGbps": 0.57, - "outOfPlaceUs": 50.09, - "inPlaceUs": 71.15, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 49.62, - "algBandwidthGbps": 1.32, - "busBandwidthGbps": 1.16, - "outOfPlaceUs": 49.62, - "inPlaceUs": 82.19, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 49.18, - "algBandwidthGbps": 2.67, - "busBandwidthGbps": 2.33, - "outOfPlaceUs": 49.18, - "inPlaceUs": 75.31, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 62.15, - "algBandwidthGbps": 4.22, - "busBandwidthGbps": 3.69, - "outOfPlaceUs": 62.15, - "inPlaceUs": 80.96, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 62.11, - "algBandwidthGbps": 8.44, - "busBandwidthGbps": 7.39, - "outOfPlaceUs": 62.11, - "inPlaceUs": 90.5, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 53.07, - "algBandwidthGbps": 19.76, - "busBandwidthGbps": 17.29, - "outOfPlaceUs": 53.07, - "inPlaceUs": 72.69, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 63.56, - "algBandwidthGbps": 32.99, - "busBandwidthGbps": 28.87, - "outOfPlaceUs": 63.56, - "inPlaceUs": 85.67, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 77.11, - "algBandwidthGbps": 54.39, - "busBandwidthGbps": 47.59, - "outOfPlaceUs": 77.11, - "inPlaceUs": 91.21, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 69.63, - "algBandwidthGbps": 120.48, - "busBandwidthGbps": 105.42, - "outOfPlaceUs": 69.63, - "inPlaceUs": 83.26, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 71.3, - "algBandwidthGbps": 235.3, - "busBandwidthGbps": 205.89, - "outOfPlaceUs": 71.3, - "inPlaceUs": 93.33, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 104.3, - "algBandwidthGbps": 321.79, - "busBandwidthGbps": 281.57, - "outOfPlaceUs": 104.3, - "inPlaceUs": 108.6, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 195.8, - "algBandwidthGbps": 342.66, - "busBandwidthGbps": 299.83, - "outOfPlaceUs": 195.8, - "inPlaceUs": 196.2, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 365.1, - "algBandwidthGbps": 367.59, - "busBandwidthGbps": 321.64, - "outOfPlaceUs": 365.1, - "inPlaceUs": 365.9, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 697.9, - "algBandwidthGbps": 384.61, - "busBandwidthGbps": 336.54, - "outOfPlaceUs": 698.5, - "inPlaceUs": 697.9, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 1353.3, - "algBandwidthGbps": 396.7, - "busBandwidthGbps": 347.11, - "outOfPlaceUs": 1353.3, - "inPlaceUs": 1355.9, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 2675.6, - "algBandwidthGbps": 401.32, - "busBandwidthGbps": 351.15, - "outOfPlaceUs": 2675.6, - "inPlaceUs": 2679, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 5296.7, - "algBandwidthGbps": 405.43, - "busBandwidthGbps": 354.76, - "outOfPlaceUs": 5301, - "inPlaceUs": 5296.7, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 10543, - "algBandwidthGbps": 407.38, - "busBandwidthGbps": 356.46, - "outOfPlaceUs": 10543, - "inPlaceUs": 10668, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 21021, - "algBandwidthGbps": 408.63, - "busBandwidthGbps": 357.55, - "outOfPlaceUs": 21021, - "inPlaceUs": 21415, - "correct": true - } - ] - }, - { - "id": "cxn-9383336f", - "identity": "nccl|mi355x|reduce_scatter|mi355x-xgmi|xgmi|8|nccl-tests-v1", - "op": "reduce_scatter", - "sku": "mi355x", - "runner": "mi355x-amds_01", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "worldSize": 8, - "nodes": 1, - "dtype": "float", - "comparisonClass": "standardized", - "comparisonKey": "fd5d1a361a3ebfa3", - "measurementContract": "nccl-tests-v1", - "avgBusBandwidthGbps": 116.588, - "status": "valid", - "valid": true, - "colorKey": "mi355x_9383336f", - "label": "MI355X · mi355x-xgmi · xgmi (ws8)", - "generatedAt": "2026-06-29T02:40:54.838353+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T02:40:54.838353+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 128, - "dtype": "float", - "latencyUs": 63.68, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 63.68, - "inPlaceUs": 71.14, - "correct": true - }, - { - "sizeBytes": 256, - "dtype": "float", - "latencyUs": 58.12, - "algBandwidthGbps": 0, - "busBandwidthGbps": 0, - "outOfPlaceUs": 58.12, - "inPlaceUs": 69.38, - "correct": true - }, - { - "sizeBytes": 512, - "dtype": "float", - "latencyUs": 59.18, - "algBandwidthGbps": 0.01, - "busBandwidthGbps": 0.01, - "outOfPlaceUs": 59.18, - "inPlaceUs": 70.39, - "correct": true - }, - { - "sizeBytes": 1024, - "dtype": "float", - "latencyUs": 58.61, - "algBandwidthGbps": 0.02, - "busBandwidthGbps": 0.02, - "outOfPlaceUs": 58.61, - "inPlaceUs": 59.64, - "correct": true - }, - { - "sizeBytes": 2048, - "dtype": "float", - "latencyUs": 58.93, - "algBandwidthGbps": 0.03, - "busBandwidthGbps": 0.03, - "outOfPlaceUs": 58.93, - "inPlaceUs": 60.04, - "correct": true - }, - { - "sizeBytes": 4096, - "dtype": "float", - "latencyUs": 59.87, - "algBandwidthGbps": 0.07, - "busBandwidthGbps": 0.06, - "outOfPlaceUs": 59.87, - "inPlaceUs": 59.13, - "correct": true - }, - { - "sizeBytes": 8192, - "dtype": "float", - "latencyUs": 59.51, - "algBandwidthGbps": 0.14, - "busBandwidthGbps": 0.12, - "outOfPlaceUs": 68.98, - "inPlaceUs": 59.51, - "correct": true - }, - { - "sizeBytes": 16384, - "dtype": "float", - "latencyUs": 59.64, - "algBandwidthGbps": 0.27, - "busBandwidthGbps": 0.24, - "outOfPlaceUs": 69.54, - "inPlaceUs": 59.64, - "correct": true - }, - { - "sizeBytes": 32768, - "dtype": "float", - "latencyUs": 59.88, - "algBandwidthGbps": 0.55, - "busBandwidthGbps": 0.48, - "outOfPlaceUs": 70.63, - "inPlaceUs": 59.88, - "correct": true - }, - { - "sizeBytes": 65536, - "dtype": "float", - "latencyUs": 61.5, - "algBandwidthGbps": 1.07, - "busBandwidthGbps": 0.93, - "outOfPlaceUs": 72.73, - "inPlaceUs": 61.5, - "correct": true - }, - { - "sizeBytes": 131072, - "dtype": "float", - "latencyUs": 65.15, - "algBandwidthGbps": 2.01, - "busBandwidthGbps": 1.76, - "outOfPlaceUs": 74.45, - "inPlaceUs": 65.15, - "correct": true - }, - { - "sizeBytes": 262144, - "dtype": "float", - "latencyUs": 69.72, - "algBandwidthGbps": 3.76, - "busBandwidthGbps": 3.29, - "outOfPlaceUs": 70.1, - "inPlaceUs": 69.72, - "correct": true - }, - { - "sizeBytes": 524288, - "dtype": "float", - "latencyUs": 68.53, - "algBandwidthGbps": 7.65, - "busBandwidthGbps": 6.69, - "outOfPlaceUs": 68.53, - "inPlaceUs": 68.85, - "correct": true - }, - { - "sizeBytes": 1048576, - "dtype": "float", - "latencyUs": 72, - "algBandwidthGbps": 14.56, - "busBandwidthGbps": 12.74, - "outOfPlaceUs": 72, - "inPlaceUs": 83.69, - "correct": true - }, - { - "sizeBytes": 2097152, - "dtype": "float", - "latencyUs": 76.95, - "algBandwidthGbps": 27.25, - "busBandwidthGbps": 23.85, - "outOfPlaceUs": 76.95, - "inPlaceUs": 86.59, - "correct": true - }, - { - "sizeBytes": 4194304, - "dtype": "float", - "latencyUs": 76.39, - "algBandwidthGbps": 54.91, - "busBandwidthGbps": 48.04, - "outOfPlaceUs": 76.39, - "inPlaceUs": 87.44, - "correct": true - }, - { - "sizeBytes": 8388608, - "dtype": "float", - "latencyUs": 78.25, - "algBandwidthGbps": 107.21, - "busBandwidthGbps": 93.8, - "outOfPlaceUs": 78.25, - "inPlaceUs": 89.31, - "correct": true - }, - { - "sizeBytes": 16777216, - "dtype": "float", - "latencyUs": 82.07, - "algBandwidthGbps": 204.42, - "busBandwidthGbps": 178.87, - "outOfPlaceUs": 82.07, - "inPlaceUs": 86.71, - "correct": true - }, - { - "sizeBytes": 33554432, - "dtype": "float", - "latencyUs": 115.6, - "algBandwidthGbps": 290.35, - "busBandwidthGbps": 254.06, - "outOfPlaceUs": 117.1, - "inPlaceUs": 115.6, - "correct": true - }, - { - "sizeBytes": 67108864, - "dtype": "float", - "latencyUs": 187.1, - "algBandwidthGbps": 358.71, - "busBandwidthGbps": 313.88, - "outOfPlaceUs": 192.5, - "inPlaceUs": 187.1, - "correct": true - }, - { - "sizeBytes": 134217728, - "dtype": "float", - "latencyUs": 323.5, - "algBandwidthGbps": 414.87, - "busBandwidthGbps": 363.02, - "outOfPlaceUs": 342.8, - "inPlaceUs": 323.5, - "correct": true - }, - { - "sizeBytes": 268435456, - "dtype": "float", - "latencyUs": 604.1, - "algBandwidthGbps": 444.38, - "busBandwidthGbps": 388.83, - "outOfPlaceUs": 658.6, - "inPlaceUs": 604.1, - "correct": true - }, - { - "sizeBytes": 536870912, - "dtype": "float", - "latencyUs": 1173.6, - "algBandwidthGbps": 457.48, - "busBandwidthGbps": 400.29, - "outOfPlaceUs": 1259.9, - "inPlaceUs": 1173.6, - "correct": true - }, - { - "sizeBytes": 1073741824, - "dtype": "float", - "latencyUs": 2311.8, - "algBandwidthGbps": 464.46, - "busBandwidthGbps": 406.4, - "outOfPlaceUs": 2481, - "inPlaceUs": 2311.8, - "correct": true - }, - { - "sizeBytes": 2147483648, - "dtype": "float", - "latencyUs": 4567.5, - "algBandwidthGbps": 470.17, - "busBandwidthGbps": 411.4, - "outOfPlaceUs": 4853.2, - "inPlaceUs": 4567.5, - "correct": true - }, - { - "sizeBytes": 4294967296, - "dtype": "float", - "latencyUs": 9220.5, - "algBandwidthGbps": 465.81, - "busBandwidthGbps": 407.58, - "outOfPlaceUs": 9610.1, - "inPlaceUs": 9220.5, - "correct": true - }, - { - "sizeBytes": 8589934592, - "dtype": "float", - "latencyUs": 19037, - "algBandwidthGbps": 451.22, - "busBandwidthGbps": 394.82, - "outOfPlaceUs": 19087, - "inPlaceUs": 19037, - "correct": true - } - ] - } - ], - "offload": [ - { - "id": "cxt-2254035a", - "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pageable|us", - "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", - "family": "offload", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "d2h", - "subtype": "pageable", - "valid": true, - "status": "valid", - "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", - "peakBandwidthGbps": 57.71, - "latencyUnit": "us", - "colorKey": "b300_2254035a", - "label": "B300 · d2h · pageable", - "generatedAt": "2026-06-27T13:14:13.476946+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:13.476946+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.319, - "latency": 12.8224, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 1.197, - "latency": 13.6896, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.07, - "latency": 16.1008, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 10.171, - "latency": 25.7744, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 16.232, - "latency": 64.5984, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 22.845, - "latency": 183.6016, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 25.057, - "latency": 669.5584, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 25.741, - "latency": 2607.0801, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 25.884, - "latency": 10370.5231, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-ec9c695d", - "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pinned|us", - "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", - "family": "offload", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "d2h", - "subtype": "pinned", - "valid": true, - "status": "valid", - "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", - "peakBandwidthGbps": 57.71, - "latencyUnit": "us", - "colorKey": "b300_ec9c695d", - "label": "B300 · d2h · pinned", - "generatedAt": "2026-06-27T13:14:13.476946+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:13.476946+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 1.314, - "latency": 3.1168, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 4.911, - "latency": 3.336, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 16.26, - "latency": 4.0304, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 35.371, - "latency": 7.4112, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 49.656, - "latency": 21.1168, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 55.179, - "latency": 76.0128, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 56.698, - "latency": 295.9056, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 57.243, - "latency": 1172.3568, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 57.376, - "latency": 4678.5118, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-0325201a", - "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pageable|us", - "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", - "family": "offload", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "h2d", - "subtype": "pageable", - "valid": true, - "status": "valid", - "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", - "peakBandwidthGbps": 57.71, - "latencyUnit": "us", - "colorKey": "b300_0325201a", - "label": "B300 · h2d · pageable", - "generatedAt": "2026-06-27T13:14:13.476946+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:13.476946+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.48, - "latency": 8.5408, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 1.73, - "latency": 9.4704, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.35, - "latency": 15.0656, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 11.573, - "latency": 22.6512, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 19.272, - "latency": 54.408, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 32.974, - "latency": 127.2, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 38.009, - "latency": 441.4016, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 39.678, - "latency": 1691.3168, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 40.13, - "latency": 6689.2288, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-6112e71d", - "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pinned|us", - "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", - "family": "offload", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "h2d", - "subtype": "pinned", - "valid": true, - "status": "valid", - "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", - "peakBandwidthGbps": 57.71, - "latencyUnit": "us", - "colorKey": "b300_6112e71d", - "label": "B300 · h2d · pinned", - "generatedAt": "2026-06-27T13:14:13.476946+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:13.476946+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 1.204, - "latency": 3.4032, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 4.481, - "latency": 3.656, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 15.087, - "latency": 4.344, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 32.966, - "latency": 7.952, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 49.231, - "latency": 21.2992, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 55.149, - "latency": 76.0544, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 57.026, - "latency": 294.2016, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 57.572, - "latency": 1165.6432, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 57.71, - "latency": 4651.4656, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-cdb189fe", - "identity": "offload|h100|h100-nvlink-island|nvlink|d2h|pageable|us", - "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", - "family": "offload", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "d2h", - "subtype": "pageable", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", - "peakBandwidthGbps": 55.199, - "latencyUnit": "us", - "colorKey": "h100_cdb189fe", - "label": "H100 · d2h · pageable", - "generatedAt": "2026-06-27T13:13:55.178101+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:13:55.178101+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.291, - "latency": 14.0992, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 1.049, - "latency": 15.6128, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 2.957, - "latency": 22.1648, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 6.642, - "latency": 39.4656, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 9.239, - "latency": 113.4928, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 12.413, - "latency": 337.9072, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 13.847, - "latency": 1211.6448, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 14.279, - "latency": 4699.8737, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 14.534, - "latency": 18469.5724, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-0606c0a1", - "identity": "offload|h100|h100-nvlink-island|nvlink|d2h|pinned|us", - "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", - "family": "offload", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "d2h", - "subtype": "pinned", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", - "peakBandwidthGbps": 55.199, - "latencyUnit": "us", - "colorKey": "h100_0606c0a1", - "label": "H100 · d2h · pinned", - "generatedAt": "2026-06-27T13:13:55.178101+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:13:55.178101+00:00", - "sha": null - }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.862, - "latency": 4.7504, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 3.537, - "latency": 4.632, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 13.999, - "latency": 4.6816, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 34.756, - "latency": 7.5424, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 46.25, - "latency": 22.672, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 52.69, - "latency": 79.6032, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 54.319, - "latency": 308.8672, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 54.768, - "latency": 1225.3216, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 54.899, - "latency": 4889.6255, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-6119c3de", - "identity": "offload|h100|h100-nvlink-island|nvlink|h2d|pageable|us", - "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", - "family": "offload", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "h2d", - "subtype": "pageable", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", - "peakBandwidthGbps": 55.199, - "latencyUnit": "us", - "colorKey": "h100_6119c3de", - "label": "H100 · h2d · pageable", - "generatedAt": "2026-06-27T13:13:55.178101+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:13:55.178101+00:00", - "sha": null + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.975, + "p99_amplification": 0.916 }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.367, - "latency": 11.168, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 1.141, - "latency": 14.3536, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 2.491, - "latency": 26.3136, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 8.311, - "latency": 31.5408, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 12.373, - "latency": 84.7456, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 16.967, - "latency": 247.208, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 18.73, - "latency": 895.7264, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 19.219, - "latency": 3491.8175, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 19.362, - "latency": 13864.0869, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-60747181", - "identity": "offload|h100|h100-nvlink-island|nvlink|h2d|pinned|us", - "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink", - "family": "offload", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "h2d", - "subtype": "pinned", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)", - "peakBandwidthGbps": 55.199, - "latencyUnit": "us", - "colorKey": "h100_60747181", - "label": "H100 · h2d · pinned", - "generatedAt": "2026-06-27T13:13:55.178101+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:13:55.178101+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.996, + "p99_amplification": 1.07 }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.64, - "latency": 6.3984, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 3.19, - "latency": 5.136, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 12.693, - "latency": 5.1632, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 33.464, - "latency": 7.8336, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 47.39, - "latency": 22.1264, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 52.967, - "latency": 79.1872, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 54.546, - "latency": 307.5808, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 55.077, - "latency": 1218.4512, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 55.199, - "latency": 4863.0142, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-5472a2f0", - "identity": "offload|h200|h200-nvlink-island|nvlink|d2h|pageable|us", - "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", - "family": "offload", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "d2h", - "subtype": "pageable", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", - "peakBandwidthGbps": 54.738, - "latencyUnit": "us", - "colorKey": "h200_5472a2f0", - "label": "H200 · d2h · pageable", - "generatedAt": "2026-06-27T13:14:28.000433+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.000433+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.993, + "p99_amplification": 0.994 }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.349, - "latency": 11.7232, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 1.05, - "latency": 15.5984, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 3.61, - "latency": 18.1552, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 6.584, - "latency": 39.8176, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 8.775, - "latency": 119.4976, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 13.542, - "latency": 309.7312, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 15.692, - "latency": 1069.1856, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 15.898, - "latency": 4221.0976, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 16.284, - "latency": 16484.2148, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-a653b433", - "identity": "offload|h200|h200-nvlink-island|nvlink|d2h|pinned|us", - "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", - "family": "offload", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "d2h", - "subtype": "pinned", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", - "peakBandwidthGbps": 54.738, - "latencyUnit": "us", - "colorKey": "h200_a653b433", - "label": "H200 · d2h · pinned", - "generatedAt": "2026-06-27T13:14:28.000433+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.000433+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.012, + "p99_amplification": 1.005 }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 1.066, - "latency": 3.8416, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 4.38, - "latency": 3.7408, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 16.019, - "latency": 4.0912, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 36.28, - "latency": 7.2256, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 46.925, - "latency": 22.3456, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 50.673, - "latency": 82.7712, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 53.181, - "latency": 315.4752, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 53.519, - "latency": 1253.9344, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 51.961, - "latency": 5166.0847, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-88606cb0", - "identity": "offload|h200|h200-nvlink-island|nvlink|h2d|pageable|us", - "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", - "family": "offload", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "h2d", - "subtype": "pageable", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", - "peakBandwidthGbps": 54.738, - "latencyUnit": "us", - "colorKey": "h200_88606cb0", - "label": "H200 · h2d · pageable", - "generatedAt": "2026-06-27T13:14:28.000433+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.000433+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.035, + "p99_amplification": 0.851 }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.408, - "latency": 10.048, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 1.057, - "latency": 15.5072, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 2.766, - "latency": 23.6976, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 9.51, - "latency": 27.5664, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 13.367, - "latency": 78.4464, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 18.167, - "latency": 230.8736, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 20.785, - "latency": 807.1696, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 21.442, - "latency": 3129.8529, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 21.303, - "latency": 12600.544, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-543138f3", - "identity": "offload|h200|h200-nvlink-island|nvlink|h2d|pinned|us", - "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink", - "family": "offload", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "h2d", - "subtype": "pinned", - "valid": true, - "status": "valid", - "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", - "peakBandwidthGbps": 54.738, - "latencyUnit": "us", - "colorKey": "h200_543138f3", - "label": "H200 · h2d · pinned", - "generatedAt": "2026-06-27T13:14:28.000433+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.000433+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.04, + "p99_amplification": 1.09 }, - "rows": [ - { - "sizeBytes": 4096, - "bandwidthGbps": 0.882, - "latency": 4.6464, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16384, - "bandwidthGbps": 4.6, - "latency": 3.5616, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 16.773, - "latency": 3.9072, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 35.219, - "latency": 7.4432, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 47.742, - "latency": 21.9632, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 52.79, - "latency": 79.4528, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 54.738, - "latency": 306.4976, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 53.864, - "latency": 1245.9056, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 54.328, - "latency": 4940.9775, - "sizeClass": null, - "correct": null - } - ] - } - ], - "copyEngine": [ - { - "id": "cxt-6e3131b7", - "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|copy-engine|us", - "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 33743.395, - "latencyUnit": "us", - "colorKey": "b300_6e3131b7", - "label": "B300 · dtod · copy-engine", - "generatedAt": "2026-06-27T13:14:14.567612+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:14.567612+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.022, + "p99_amplification": 1.054 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 7.729, - "latency": 8.4789, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 31.851, - "latency": 8.2304, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 131.475, - "latency": 7.9755, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 506.069, - "latency": 8.288, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2092.131, - "latency": 8.0192, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8232.735, - "latency": 8.1515, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 33743.395, - "latency": 7.9552, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-214329f7", - "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|sm|us", - "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 33743.395, - "latencyUnit": "us", - "colorKey": "b300_214329f7", - "label": "B300 · dtod · sm", - "generatedAt": "2026-06-27T13:14:14.567612+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:14.567612+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.035, + "p99_amplification": 1.032 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 7.772, - "latency": 8.432, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 31.011, - "latency": 8.4533, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 127.139, - "latency": 8.2475, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 515.355, - "latency": 8.1387, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2004.925, - "latency": 8.368, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8245.683, - "latency": 8.1387, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 32844.98, - "latency": 8.1728, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-64e7ea33", - "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|copy-engine|us", - "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 33743.395, - "latencyUnit": "us", - "colorKey": "b300_64e7ea33", - "label": "B300 · htod · copy-engine", - "generatedAt": "2026-06-27T13:14:14.567612+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:14.567612+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.041, + "p99_amplification": 1.026 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 8.922, - "latency": 7.3451, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 35.159, - "latency": 7.456, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 139.617, - "latency": 7.5104, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 525.479, - "latency": 7.9819, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2004.925, - "latency": 8.368, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8211.245, - "latency": 8.1728, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 32556.046, - "latency": 8.2453, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-4b3f523b", - "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|sm|us", - "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 33743.395, - "latencyUnit": "us", - "colorKey": "b300_4b3f523b", - "label": "B300 · htod · sm", - "generatedAt": "2026-06-27T13:14:14.567612+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:14.567612+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.043, + "p99_amplification": 1.015 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 7.918, - "latency": 8.2773, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 31.703, - "latency": 8.2688, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 127.9, - "latency": 8.1984, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 509.743, - "latency": 8.2283, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2022.716, - "latency": 8.2944, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8166.48, - "latency": 8.2176, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 32413.478, - "latency": 8.2816, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-ff8a9f33", - "identity": "copy-engine|h100|h100-nvlink-island|nvlink|dtod|copy-engine|us", - "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 18918.827, - "latencyUnit": "us", - "colorKey": "h100_ff8a9f33", - "label": "H100 · dtod · copy-engine", - "generatedAt": "2026-06-27T13:14:03.281164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:03.281164+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.021, + "p99_amplification": 1.052 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 4.208, - "latency": 15.5744, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 17.496, - "latency": 14.9835, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 70.967, - "latency": 14.7755, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 283.154, - "latency": 14.8128, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1120.673, - "latency": 14.9707, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4411.651, - "latency": 15.2117, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 17557.959, - "latency": 15.2885, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-b4f7093b", - "identity": "copy-engine|h100|h100-nvlink-island|nvlink|dtod|sm|us", - "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 18918.827, - "latencyUnit": "us", - "colorKey": "h100_b4f7093b", - "label": "H100 · dtod · sm", - "generatedAt": "2026-06-27T13:14:03.281164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:03.281164+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.017, + "p99_amplification": 1.017 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 4.26, - "latency": 15.3856, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 17.366, - "latency": 15.0955, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 68.961, - "latency": 15.2053, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 274.803, - "latency": 15.2629, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1060.954, - "latency": 15.8133, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4300.674, - "latency": 15.6043, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 17342.584, - "latency": 15.4784, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-08d3e6b7", - "identity": "copy-engine|h100|h100-nvlink-island|nvlink|htod|copy-engine|us", - "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 18918.827, - "latencyUnit": "us", - "colorKey": "h100_08d3e6b7", - "label": "H100 · htod · copy-engine", - "generatedAt": "2026-06-27T13:14:03.281164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:03.281164+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.967, + "p99_amplification": 0.834 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 4.543, - "latency": 14.4267, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 18.364, - "latency": 14.2752, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 73.192, - "latency": 14.3264, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 290.776, - "latency": 14.4245, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1184.475, - "latency": 14.1643, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4735.759, - "latency": 14.1707, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 18918.827, - "latency": 14.1888, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-8afae0f7", - "identity": "copy-engine|h100|h100-nvlink-island|nvlink|htod|sm|us", - "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 18918.827, - "latencyUnit": "us", - "colorKey": "h100_8afae0f7", - "label": "H100 · htod · sm", - "generatedAt": "2026-06-27T13:14:03.281164+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:03.281164+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.949, + "p99_amplification": 0.94 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 4.217, - "latency": 15.5403, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 17.286, - "latency": 15.1648, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 69.404, - "latency": 15.1083, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 272.31, - "latency": 15.4027, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1106.715, - "latency": 15.1595, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4303.322, - "latency": 15.5947, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 17472.627, - "latency": 15.3632, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-568b3ed1", - "identity": "copy-engine|h200|h200-nvlink-island|nvlink|dtod|copy-engine|us", - "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 21990.41, - "latencyUnit": "us", - "colorKey": "h200_568b3ed1", - "label": "H200 · dtod · copy-engine", - "generatedAt": "2026-06-27T13:14:32.919518+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:32.919518+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.969, + "p99_amplification": 1.009 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 3.796, - "latency": 17.264, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 16.942, - "latency": 15.4731, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 68.576, - "latency": 15.2907, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 252.563, - "latency": 16.6069, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1049.556, - "latency": 15.9851, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 3952.168, - "latency": 16.9803, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 17644.131, - "latency": 15.2139, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-d2f1fcf5", - "identity": "copy-engine|h200|h200-nvlink-island|nvlink|dtod|sm|us", - "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 21990.41, - "latencyUnit": "us", - "colorKey": "h200_d2f1fcf5", - "label": "H200 · dtod · sm", - "generatedAt": "2026-06-27T13:14:32.919518+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:32.919518+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.99, + "p99_amplification": 0.988 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 3.494, - "latency": 18.7584, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 15.049, - "latency": 17.4197, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 63.181, - "latency": 16.5963, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 241.207, - "latency": 17.3888, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 959.414, - "latency": 17.4869, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4005.001, - "latency": 16.7563, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 16321.308, - "latency": 16.4469, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-16dd6dad", - "identity": "copy-engine|h200|h200-nvlink-island|nvlink|htod|copy-engine|us", - "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 21990.41, - "latencyUnit": "us", - "colorKey": "h200_16dd6dad", - "label": "H200 · htod · copy-engine", - "generatedAt": "2026-06-27T13:14:32.919518+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:32.919518+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.996, + "p99_amplification": 1.053 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 4.478, - "latency": 14.6357, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 19.382, - "latency": 13.5253, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 73.252, - "latency": 14.3147, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 300.417, - "latency": 13.9616, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1322.624, - "latency": 12.6848, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 5426.008, - "latency": 12.368, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 21990.41, - "latency": 12.2069, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-f87dced9", - "identity": "copy-engine|h200|h200-nvlink-island|nvlink|htod|sm|us", - "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink", - "family": "copy-engine", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 21990.41, - "latencyUnit": "us", - "colorKey": "h200_f87dced9", - "label": "H200 · htod · sm", - "generatedAt": "2026-06-27T13:14:32.919518+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:32.919518+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.977, + "p99_amplification": 0.965 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 4.164, - "latency": 15.7387, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 16.954, - "latency": 15.4624, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 65.654, - "latency": 15.9712, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 283.195, - "latency": 14.8107, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1151.185, - "latency": 14.5739, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4718.356, - "latency": 14.2229, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 18381.29, - "latency": 14.6037, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-0f7ea2f3", - "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|dtod|copy-engine|us", - "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", - "family": "copy-engine", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "dtod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 27738.291, - "latencyUnit": "us", - "colorKey": "mi355x_0f7ea2f3", - "label": "MI355X · dtod · copy-engine", - "generatedAt": "2026-06-29T00:49:25.677922+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T00:49:25.677922+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.942, + "p99_amplification": 0.978 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 6.29, - "latency": 10.4187, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 25.583, - "latency": 10.2468, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 101.083, - "latency": 10.3734, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 390.576, - "latency": 10.7388, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1611.113, - "latency": 10.4134, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 6501.915, - "latency": 10.3214, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 26296.406, - "latency": 10.2081, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-a301ee7b", - "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|dtod|sm|us", - "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", - "family": "copy-engine", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "dtod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 27738.291, - "latencyUnit": "us", - "colorKey": "mi355x_a301ee7b", - "label": "MI355X · dtod · sm", - "generatedAt": "2026-06-29T00:49:25.677922+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T00:49:25.677922+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.988, + "p99_amplification": 0.979 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 5.974, - "latency": 10.9707, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 23.442, - "latency": 11.1828, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 92.466, - "latency": 11.3401, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 380.375, - "latency": 11.0268, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1520.029, - "latency": 11.0374, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 6138.698, - "latency": 10.9321, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 23990.086, - "latency": 11.1894, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-30a30277", - "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|htod|copy-engine|us", - "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", - "family": "copy-engine", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "htod", - "subtype": "copy-engine", - "valid": true, - "status": "valid", - "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 27738.291, - "latencyUnit": "us", - "colorKey": "mi355x_30a30277", - "label": "MI355X · htod · copy-engine", - "generatedAt": "2026-06-29T00:49:25.677922+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T00:49:25.677922+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.016, + "p99_amplification": 1.001 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 7.111, - "latency": 9.2161, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 28.79, - "latency": 9.1054, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 118.831, - "latency": 8.8241, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 492.98, - "latency": 8.5081, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1921.041, - "latency": 8.7334, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 7489.773, - "latency": 8.9601, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 27738.291, - "latency": 9.6774, - "sizeClass": null, - "correct": null - } - ] - }, - { - "id": "cxt-da4cda37", - "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|htod|sm|us", - "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi", - "family": "copy-engine", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "htod", - "subtype": "sm", - "valid": true, - "status": "valid", - "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no", - "peakBandwidthGbps": 27738.291, - "latencyUnit": "us", - "colorKey": "mi355x_da4cda37", - "label": "MI355X · htod · sm", - "generatedAt": "2026-06-29T00:49:25.677922+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T00:49:25.677922+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.962, + "p99_amplification": 0.934 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 6.239, - "latency": 10.5041, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 24.106, - "latency": 10.8748, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 97.985, - "latency": 10.7014, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 394.892, - "latency": 10.6214, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1607.819, - "latency": 10.4348, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 6392.071, - "latency": 10.4988, - "sizeClass": null, - "correct": null - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 25471.24, - "latency": 10.5388, - "sizeClass": null, - "correct": null - } - ] - } - ], - "kvCache": [ - { - "id": "cxt-72e44191", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtod-local", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_72e44191", - "label": "B300 · dtod-local · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.963, + "p99_amplification": 0.966 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 4.86, - "latency": 0.00337, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 18.31, - "latency": 0.00358, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 79.48, - "latency": 0.0033, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 315.89, - "latency": 0.00332, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 1140.42, - "latency": 0.00368, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2696.03, - "latency": 0.00622, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 2724.4, - "latency": 0.02463, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 3189.99, - "latency": 0.08415, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-0198272e", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtod-local", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_0198272e", - "label": "B300 · dtod-local · paged/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.005, + "p99_amplification": 0.991 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.27, - "latency": 0.005, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 13.15, - "latency": 0.00498, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 13.46, - "latency": 0.01948, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 13.76, - "latency": 0.07619, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 13.84, - "latency": 0.30311, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 13.87, - "latency": 1.20968, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 13.83, - "latency": 4.85211, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 13.89, - "latency": 19.32599, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-65e093de", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtod-remote", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_65e093de", - "label": "B300 · dtod-remote · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.01, + "p99_amplification": 1.073 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.08, - "latency": 0.01514, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.52, - "latency": 0.01451, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 17.43, - "latency": 0.01504, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 67.07, - "latency": 0.01563, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 205.84, - "latency": 0.02038, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 409.12, - "latency": 0.04101, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 644.24, - "latency": 0.10417, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 736.42, - "latency": 0.36451, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-502d7923", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtod-remote", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_502d7923", - "label": "B300 · dtod-remote · paged/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.022, + "p99_amplification": 0.985 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.11, - "latency": 0.01473, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.35, - "latency": 0.01507, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 4.3, - "latency": 0.06098, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 4.27, - "latency": 0.24556, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 4.26, - "latency": 0.98559, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 4.24, - "latency": 3.9593, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4.27, - "latency": 15.72352, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 4.25, - "latency": 63.14588, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-0560494f", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_0560494f", - "label": "B300 · dtoh · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.003, + "p99_amplification": 1.254 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.29, - "latency": 0.01266, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.53, - "latency": 0.01447, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 10.95, - "latency": 0.02394, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 17.12, - "latency": 0.06125, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 23.25, - "latency": 0.18038, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 25.14, - "latency": 0.66728, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 25.77, - "latency": 2.60365, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 26.05, - "latency": 10.30309, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-ce77da1a", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|contiguous/pinned|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "contiguous/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_ce77da1a", - "label": "B300 · dtoh · contiguous/pinned", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.996, + "p99_amplification": 1.004 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 4.69, - "latency": 0.00349, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 15.49, - "latency": 0.00423, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 34.21, - "latency": 0.00766, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 49.37, - "latency": 0.02124, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 55.18, - "latency": 0.07601, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 56.69, - "latency": 0.29592, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 57.26, - "latency": 1.17204, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 57.37, - "latency": 4.67905, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-46a8e034", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|paged/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_46a8e034", - "label": "B300 · dtoh · paged/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.932, + "p99_amplification": 0.825 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.15, - "latency": 0.01424, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.09, - "latency": 0.01604, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 4.13, - "latency": 0.06348, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 4.08, - "latency": 0.25721, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 4.12, - "latency": 1.01899, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 4.13, - "latency": 4.05933, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 4.12, - "latency": 16.28391, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 4.08, - "latency": 65.79932, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-74b14d7d", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|paged/pinned|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "paged/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_74b14d7d", - "label": "B300 · dtoh · paged/pinned", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.919, + "p99_amplification": 1.162 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.97, - "latency": 0.00413, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 11.6, - "latency": 0.00565, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 11.71, - "latency": 0.02239, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 11.85, - "latency": 0.08852, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 15.07, - "latency": 0.27834, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 16.21, - "latency": 1.0351, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 16, - "latency": 4.19304, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 15.3, - "latency": 17.54518, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-a39a3977", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_a39a3977", - "label": "B300 · htod · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.912, + "p99_amplification": 0.912 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 4.67, - "latency": 0.00351, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 10.64, - "latency": 0.00616, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 28.03, - "latency": 0.00935, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 39.84, - "latency": 0.02632, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 40.9, - "latency": 0.10256, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 43.53, - "latency": 0.38545, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 40.29, - "latency": 1.66584, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 40.43, - "latency": 6.6389, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-4dc90462", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|contiguous/pinned|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "contiguous/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_4dc90462", - "label": "B300 · htod · contiguous/pinned", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.937, + "p99_amplification": 1.21 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 4.92, - "latency": 0.00333, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 15.9, - "latency": 0.00412, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 35.21, - "latency": 0.00745, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 49.53, - "latency": 0.02117, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 54.66, - "latency": 0.07673, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 57.11, - "latency": 0.29375, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 57.27, - "latency": 1.1717, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 57.29, - "latency": 4.68587, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-1baaf76c", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|paged/memcpy|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_1baaf76c", - "label": "B300 · htod · paged/memcpy", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.967, + "p99_amplification": 0.919 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.07, - "latency": 0.00534, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 12.18, - "latency": 0.00538, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 13.09, - "latency": 0.02003, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 13.07, - "latency": 0.08021, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 12.88, - "latency": 0.32552, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 12.74, - "latency": 1.31673, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 13.44, - "latency": 4.99481, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 13.46, - "latency": 19.93861, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-95e0eff5", - "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|paged/pinned|ms", - "cohortIdentity": "kv-cache|b300|nvlink", - "family": "kv-cache", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "paged/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "b300_95e0eff5", - "label": "B300 · htod · paged/pinned", - "generatedAt": "2026-06-27T13:14:28.674652+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:28.674652+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.987, + "p99_amplification": 1.244 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.85, - "latency": 0.00425, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 11.28, - "latency": 0.00581, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 11.35, - "latency": 0.0231, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 11.41, - "latency": 0.0919, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 14.68, - "latency": 0.28572, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 15.48, - "latency": 1.08353, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 15.4, - "latency": 4.35678, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 15.59, - "latency": 17.21665, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-ac86e5b5", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtod-local", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_ac86e5b5", - "label": "H100 · dtod-local · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.979, + "p99_amplification": 0.986 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.08, - "latency": 0.00532, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 12.22, - "latency": 0.00536, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 51.29, - "latency": 0.00511, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 203.7, - "latency": 0.00515, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 830.27, - "latency": 0.00505, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 1848.69, - "latency": 0.00908, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 1404.25, - "latency": 0.04779, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 1496.83, - "latency": 0.17934, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-5cd440fa", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtod-local", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_5cd440fa", - "label": "H100 · dtod-local · paged/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.969, + "p99_amplification": 1.191 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.87, - "latency": 0.00875, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 7.19, - "latency": 0.00912, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 7.74, - "latency": 0.03385, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 8.12, - "latency": 0.12917, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 8.12, - "latency": 0.51673, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 8.12, - "latency": 2.06732, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8.14, - "latency": 8.24075, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 8.16, - "latency": 32.8879, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-ea4a3eaa", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtod-remote", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_ea4a3eaa", - "label": "H100 · dtod-remote · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.021, + "p99_amplification": 1.03 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.05, - "latency": 0.01566, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.14, - "latency": 0.01583, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 16.78, - "latency": 0.01562, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 60.4, - "latency": 0.01736, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 135.27, - "latency": 0.03101, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 287.64, - "latency": 0.05833, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 355.4, - "latency": 0.18883, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 386.28, - "latency": 0.69492, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-0ce612f7", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtod-remote", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_0ce612f7", - "label": "H100 · dtod-remote · paged/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.004, + "p99_amplification": 1.182 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.012, + "p99_amplification": 1.077 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.047, + "p99_amplification": 1.128 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 0.81, - "latency": 0.02017, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 3.39, - "latency": 0.01935, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 3.46, - "latency": 0.07571, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 3.5, - "latency": 0.29964, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 3.52, - "latency": 1.19198, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 3.51, - "latency": 4.78335, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 3.51, - "latency": 19.11805, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 3.51, - "latency": 76.49081, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-9514aa3b", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_9514aa3b", - "label": "H100 · dtoh · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.129, + "p99_amplification": 1.165 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.14, - "latency": 0.01435, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 3.21, - "latency": 0.02041, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 6.95, - "latency": 0.03775, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 9.46, - "latency": 0.11089, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 12.75, - "latency": 0.32908, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 14.17, - "latency": 1.18418, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 14.68, - "latency": 4.57034, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 14.78, - "latency": 18.15827, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-51d4ebbe", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|contiguous/pinned|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "contiguous/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_51d4ebbe", - "label": "H100 · dtoh · contiguous/pinned", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.187, + "p99_amplification": 1.194 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.63, - "latency": 0.00452, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 14.61, - "latency": 0.00449, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 34.97, - "latency": 0.0075, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 45.56, - "latency": 0.02301, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 52.87, - "latency": 0.07934, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 54.46, - "latency": 0.30805, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 54.81, - "latency": 1.22436, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 54.92, - "latency": 4.88742, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-53eb5188", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_53eb5188", - "label": "H100 · dtoh · paged/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.039, + "p99_amplification": 1.018 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 0.95, - "latency": 0.01729, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 2.79, - "latency": 0.02345, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 2.8, - "latency": 0.0936, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 2.83, - "latency": 0.37049, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 2.84, - "latency": 1.47709, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2.84, - "latency": 5.91534, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 2.8, - "latency": 23.94517, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 2.81, - "latency": 95.42213, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-58b1ef69", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|paged/pinned|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "paged/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_58b1ef69", - "label": "H100 · dtoh · paged/pinned", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.016, + "p99_amplification": 1.013 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 2.02, - "latency": 0.00812, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 8.26, - "latency": 0.00793, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 8.59, - "latency": 0.03052, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 8.8, - "latency": 0.11912, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 8.89, - "latency": 0.47188, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 8.94, - "latency": 1.87628, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8.99, - "latency": 7.46602, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 8.97, - "latency": 29.91576, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-f0ce2a63", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_f0ce2a63", - "label": "H100 · htod · contiguous/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.143, + "p99_amplification": 1.163 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 2.21, - "latency": 0.00742, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 3.56, - "latency": 0.0184, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 12.81, - "latency": 0.02046, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 16.18, - "latency": 0.06483, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 14.77, - "latency": 0.28404, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 15.24, - "latency": 1.10071, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 19.35, - "latency": 3.46895, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 19.37, - "latency": 13.85634, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-90f1ea66", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|contiguous/pinned|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "contiguous/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_90f1ea66", - "label": "H100 · htod · contiguous/pinned", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.01, + "p99_amplification": 0.998 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 3.54, - "latency": 0.00463, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 10.95, - "latency": 0.00598, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 20.37, - "latency": 0.01287, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 24.67, - "latency": 0.0425, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 45.61, - "latency": 0.09197, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 19, - "latency": 0.88291, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 15.38, - "latency": 4.36425, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 26.93, - "latency": 9.96701, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-e2eccf00", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_e2eccf00", - "label": "H100 · htod · paged/memcpy", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.927, + "p99_amplification": 0.91 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.48, - "latency": 0.01107, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.89, - "latency": 0.01341, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 5.26, - "latency": 0.04985, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 5.25, - "latency": 0.19989, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 4.87, - "latency": 0.86178, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 4.88, - "latency": 3.43634, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 5.29, - "latency": 12.69012, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 5.3, - "latency": 50.67481, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-9d427921", - "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|paged/pinned|ms", - "cohortIdentity": "kv-cache|h100|nvlink", - "family": "kv-cache", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "paged/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h100_9d427921", - "label": "H100 · htod · paged/pinned", - "generatedAt": "2026-06-27T13:14:31.575969+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:14:31.575969+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.01, + "p99_amplification": 1.028 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.62, - "latency": 0.01012, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 7.91, - "latency": 0.00828, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 8.25, - "latency": 0.03177, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 8.56, - "latency": 0.12251, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 8.72, - "latency": 0.48117, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 8.69, - "latency": 1.93067, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 8.76, - "latency": 7.66475, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 8.8, - "latency": 30.51378, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-da427647", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtod-local", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_da427647", - "label": "H200 · dtod-local · contiguous/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.042, + "p99_amplification": 1.14 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 2.92, - "latency": 0.00561, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 11.4, - "latency": 0.00575, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 50.9, - "latency": 0.00515, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 203.07, - "latency": 0.00516, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 808.09, - "latency": 0.00519, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 2577.62, - "latency": 0.00651, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 1942.65, - "latency": 0.03455, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 2094.13, - "latency": 0.12818, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-e86f4c3c", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtod-local", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_e86f4c3c", - "label": "H200 · dtod-local · paged/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.036, + "p99_amplification": 1.076 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.88, - "latency": 0.0087, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 7.13, - "latency": 0.00919, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 7.76, - "latency": 0.03377, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 8.12, - "latency": 0.12919, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 8.12, - "latency": 0.51648, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 8.12, - "latency": 2.06665, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 7.82, - "latency": 8.58219, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 8.37, - "latency": 32.07343, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-a92baae0", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtod-remote", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_a92baae0", - "label": "H200 · dtod-remote · contiguous/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.022, + "p99_amplification": 1.105 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.32, - "latency": 0.01245, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.88, - "latency": 0.01342, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 18.4, - "latency": 0.01425, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 50.51, - "latency": 0.02076, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 157.92, - "latency": 0.02656, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 283.17, - "latency": 0.05925, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 357.67, - "latency": 0.18763, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 386.41, - "latency": 0.69468, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-a09960ed", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtod-remote", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_a09960ed", - "label": "H200 · dtod-remote · paged/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.026, + "p99_amplification": 1.114 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.02, - "latency": 0.01611, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.09, - "latency": 0.01602, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 4.28, - "latency": 0.06122, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 4.15, - "latency": 0.25284, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 4.16, - "latency": 1.0074, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 4.38, - "latency": 3.83027, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 3.98, - "latency": 16.86224, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 4.35, - "latency": 61.70685, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-5a06e0c5", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_5a06e0c5", - "label": "H200 · dtoh · contiguous/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.025, + "p99_amplification": 1.068 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.33, - "latency": 0.01232, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 3.39, - "latency": 0.01935, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 7.01, - "latency": 0.03738, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 9.35, - "latency": 0.11209, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 12.26, - "latency": 0.34211, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 13.63, - "latency": 1.2306, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 14, - "latency": 4.79503, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 14.17, - "latency": 18.94882, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-196034c4", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|contiguous/pinned|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "contiguous/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_196034c4", - "label": "H200 · dtoh · contiguous/pinned", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.025, + "p99_amplification": 1.045 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 4.66, - "latency": 0.00352, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 11.72, - "latency": 0.00559, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 36.36, - "latency": 0.00721, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 47.93, - "latency": 0.02188, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 52.92, - "latency": 0.07926, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 54.32, - "latency": 0.30887, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 54.42, - "latency": 1.2332, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 54.68, - "latency": 4.90889, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-1edeeeca", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_1edeeeca", - "label": "H200 · dtoh · paged/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.032, + "p99_amplification": 1.244 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.12, - "latency": 0.01465, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 3.05, - "latency": 0.02151, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 3.1, - "latency": 0.08467, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 3.2, - "latency": 0.32818, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 3.07, - "latency": 1.3646, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 3.14, - "latency": 5.3446, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 3.12, - "latency": 21.51246, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 3.1, - "latency": 86.61224, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-19277faf", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|paged/pinned|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "dtoh", - "subtype": "paged/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_19277faf", - "label": "H200 · dtoh · paged/pinned", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.03, + "p99_amplification": 1.015 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 2.57, - "latency": 0.00638, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 6.21, - "latency": 0.01056, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 6.34, - "latency": 0.04137, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 6.39, - "latency": 0.16406, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 9.68, - "latency": 0.4333, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 11.16, - "latency": 1.50278, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 10.75, - "latency": 6.24109, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 11.08, - "latency": 24.21999, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-0cec247d", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|contiguous/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "contiguous/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_0cec247d", - "label": "H200 · htod · contiguous/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.049, + "p99_amplification": 1.02 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 2.07, - "latency": 0.00793, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.33, - "latency": 0.01513, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 13.97, - "latency": 0.01876, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 17.88, - "latency": 0.05865, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 14.91, - "latency": 0.28129, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 15.65, - "latency": 1.07179, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 20.77, - "latency": 3.23166, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 20.84, - "latency": 12.88331, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-541fa51c", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|contiguous/pinned|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "contiguous/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_541fa51c", - "label": "H200 · htod · contiguous/pinned", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.095, + "p99_amplification": 1.099 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.139, + "p99_amplification": 1.152 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.201, + "p99_amplification": 1.205 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.004, + "p99_amplification": 1.275 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 4.25, - "latency": 0.00386, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 10.91, - "latency": 0.00601, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 34.78, - "latency": 0.00754, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 47.66, - "latency": 0.022, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 53.14, - "latency": 0.07893, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 54.49, - "latency": 0.30792, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 54.39, - "latency": 1.23395, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 55.4, - "latency": 4.84562, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-59482272", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|paged/memcpy|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "paged/memcpy", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_59482272", - "label": "H200 · htod · paged/memcpy", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.004, + "p99_amplification": 1.011 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 1.23, - "latency": 0.01335, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 4.57, - "latency": 0.01434, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 5.08, - "latency": 0.05156, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 4.95, - "latency": 0.21203, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 4.73, - "latency": 0.8865, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 4.77, - "latency": 3.51835, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 5.3, - "latency": 12.65221, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 5.32, - "latency": 50.43789, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-a5a8f197", - "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|paged/pinned|ms", - "cohortIdentity": "kv-cache|h200|nvlink", - "family": "kv-cache", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "htod", - "subtype": "paged/pinned", - "valid": true, - "status": "valid", - "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "h200_a5a8f197", - "label": "H200 · htod · paged/pinned", - "generatedAt": "2026-06-27T13:15:06.269124+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:15:06.269124+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.003, + "p99_amplification": 1.002 }, - "rows": [ - { - "sizeBytes": 16384, - "bandwidthGbps": 2.01, - "latency": 0.00814, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 65536, - "bandwidthGbps": 5.86, - "latency": 0.01117, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 6.21, - "latency": 0.04221, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 6.39, - "latency": 0.16417, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 9.51, - "latency": 0.44121, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 10.54, - "latency": 1.59134, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 10.79, - "latency": 6.22042, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 10.65, - "latency": 25.1967, - "sizeClass": "prefill", - "correct": true - } - ] - }, - { - "id": "cxt-3fe4f8ad", - "identity": "kv-cache|mi355x|mi355x-xgmi|xgmi|dtod-remote|contiguous/rccl|ms", - "cohortIdentity": "kv-cache|mi355x|xgmi", - "family": "kv-cache", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "dtod-remote", - "subtype": "contiguous/rccl", - "valid": true, - "status": "valid", - "note": "wired: rccl", - "peakBandwidthGbps": null, - "latencyUnit": "ms", - "colorKey": "mi355x_3fe4f8ad", - "label": "MI355X · dtod-remote · contiguous/rccl", - "generatedAt": "2026-06-29T00:48:56.689585+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-29T00:48:56.689585+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.007, + "p99_amplification": 1.014 }, - "rows": [ - { - "sizeBytes": 65536, - "bandwidthGbps": 0.93, - "latency": 0.07018, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 262144, - "bandwidthGbps": 4.14, - "latency": 0.06326, - "sizeClass": "decode", - "correct": true - }, - { - "sizeBytes": 1048576, - "bandwidthGbps": 14.43, - "latency": 0.07267, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 46.03, - "latency": 0.09112, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 63.43, - "latency": 0.26449, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 69.8, - "latency": 0.96147, - "sizeClass": "prefill", - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 71.72, - "latency": 3.74303, - "sizeClass": "prefill", - "correct": true - } - ] - } - ], - "rlMesh": [ - { - "id": "cxt-e28663d4", - "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|paired|ms", - "cohortIdentity": "rl-mesh|b300|nvlink", - "family": "rl-mesh", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "generator_to_trainer", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 681.89, - "latencyUnit": "ms", - "colorKey": "b300_e28663d4", - "label": "B300 · gen->trn · paired", - "generatedAt": "2026-06-27T13:38:50.291192+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:50.291192+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.009, + "p99_amplification": 1.028 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 22.43, - "latency": 0.04675, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 210.03, - "latency": 0.01997, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 444.24, - "latency": 0.03777, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 613.35, - "latency": 0.10941, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 672.64, - "latency": 0.39908, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 681.89, - "latency": 1.57465, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-abc63f3d", - "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", - "cohortIdentity": "rl-mesh|b300|nvlink", - "family": "rl-mesh", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "generator_to_trainer", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 681.89, - "latencyUnit": "ms", - "colorKey": "b300_abc63f3d", - "label": "B300 · gen->trn · redistribute", - "generatedAt": "2026-06-27T13:38:50.291192+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:50.291192+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.005, + "p99_amplification": 1.007 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.02, - "latency": 44.24712, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 56.86, - "latency": 0.07377, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 168.78, - "latency": 0.0994, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 466.61, - "latency": 0.14382, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 565.6, - "latency": 0.4746, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 656.22, - "latency": 1.63626, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-08ab0854", - "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|paired|ms", - "cohortIdentity": "rl-mesh|b300|nvlink", - "family": "rl-mesh", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "trainer_to_generator", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 681.89, - "latencyUnit": "ms", - "colorKey": "b300_08ab0854", - "label": "B300 · trn->gen · paired", - "generatedAt": "2026-06-27T13:38:50.291192+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:50.291192+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.994, + "p99_amplification": 0.982 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 8.13, - "latency": 0.12892, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 161.07, - "latency": 0.02604, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 455.8, - "latency": 0.03681, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 613.96, - "latency": 0.10931, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 670.34, - "latency": 0.40045, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 681.46, - "latency": 1.57564, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-bea1bfbd", - "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", - "cohortIdentity": "rl-mesh|b300|nvlink", - "family": "rl-mesh", - "sku": "b300", - "topologyClass": "b300-nvlink-island", - "transport": "nvlink", - "operation": "trainer_to_generator", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 681.89, - "latencyUnit": "ms", - "colorKey": "b300_bea1bfbd", - "label": "B300 · trn->gen · redistribute", - "generatedAt": "2026-06-27T13:38:50.291192+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:50.291192+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.969, + "p99_amplification": 1.212 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.01, - "latency": 74.91642, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 66.21, - "latency": 0.06334, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 295.56, - "latency": 0.05676, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 581.82, - "latency": 0.11534, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 543.6, - "latency": 0.49381, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 659.57, - "latency": 1.62794, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-3e3f24d0", - "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|generator_to_trainer|paired|ms", - "cohortIdentity": "rl-mesh|h100|nvlink", - "family": "rl-mesh", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "generator_to_trainer", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 372.53, - "latencyUnit": "ms", - "colorKey": "h100_3e3f24d0", - "label": "H100 · gen->trn · paired", - "generatedAt": "2026-06-27T13:36:14.593136+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:36:14.593136+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.991, + "p99_amplification": 0.985 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 28.68, - "latency": 0.03656, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 119.47, - "latency": 0.03511, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 179.16, - "latency": 0.09364, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 333.15, - "latency": 0.20144, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 360.41, - "latency": 0.7448, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 372.22, - "latency": 2.88468, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-02dece19", - "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", - "cohortIdentity": "rl-mesh|h100|nvlink", - "family": "rl-mesh", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "generator_to_trainer", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 372.53, - "latencyUnit": "ms", - "colorKey": "h100_02dece19", - "label": "H100 · gen->trn · redistribute", - "generatedAt": "2026-06-27T13:36:14.593136+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:36:14.593136+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.034, + "p99_amplification": 1.121 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.33, - "latency": 3.20924, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 26.07, - "latency": 0.16087, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 105.25, - "latency": 0.1594, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 306.68, - "latency": 0.21882, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 332.52, - "latency": 0.80728, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 324.51, - "latency": 3.30884, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-40b74430", - "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|trainer_to_generator|paired|ms", - "cohortIdentity": "rl-mesh|h100|nvlink", - "family": "rl-mesh", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "trainer_to_generator", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 372.53, - "latencyUnit": "ms", - "colorKey": "h100_40b74430", - "label": "H100 · trn->gen · paired", - "generatedAt": "2026-06-27T13:36:14.593136+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:36:14.593136+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.124, + "p99_amplification": 1.132 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 19.34, - "latency": 0.05421, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 102.79, - "latency": 0.04081, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 282.95, - "latency": 0.05929, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 331.36, - "latency": 0.20252, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 359.85, - "latency": 0.74597, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 372.53, - "latency": 2.88228, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-3f787c79", - "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", - "cohortIdentity": "rl-mesh|h100|nvlink", - "family": "rl-mesh", - "sku": "h100", - "topologyClass": "h100-nvlink-island", - "transport": "nvlink", - "operation": "trainer_to_generator", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 372.53, - "latencyUnit": "ms", - "colorKey": "h100_3f787c79", - "label": "H100 · trn->gen · redistribute", - "generatedAt": "2026-06-27T13:36:14.593136+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:36:14.593136+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.194, + "p99_amplification": 1.197 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.02, - "latency": 42.89165, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 26.19, - "latency": 0.16012, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 107.67, - "latency": 0.15583, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 313.63, - "latency": 0.21398, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 324.4, - "latency": 0.82748, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 336.39, - "latency": 3.19197, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-3051cd1a", - "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|generator_to_trainer|paired|ms", - "cohortIdentity": "rl-mesh|h200|nvlink", - "family": "rl-mesh", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "generator_to_trainer", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 369.31, - "latencyUnit": "ms", - "colorKey": "h200_3051cd1a", - "label": "H200 · gen->trn · paired", - "generatedAt": "2026-06-27T13:38:51.710797+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:51.710797+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.007, + "p99_amplification": 0.993 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 23.89, - "latency": 0.0439, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 156.49, - "latency": 0.0268, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 293.07, - "latency": 0.05725, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 335.17, - "latency": 0.20023, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 358.96, - "latency": 0.74782, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 369.31, - "latency": 2.90744, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-71059d57", - "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", - "cohortIdentity": "rl-mesh|h200|nvlink", - "family": "rl-mesh", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "generator_to_trainer", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 369.31, - "latencyUnit": "ms", - "colorKey": "h200_71059d57", - "label": "H200 · gen->trn · redistribute", - "generatedAt": "2026-06-27T13:38:51.710797+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:51.710797+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.008, + "p99_amplification": 0.989 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.04, - "latency": 25.02575, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 39.38, - "latency": 0.10651, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 155.89, - "latency": 0.10762, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 245.8, - "latency": 0.27303, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 330.18, - "latency": 0.81301, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 342.63, - "latency": 3.1338, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-c6f0b6b2", - "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|trainer_to_generator|paired|ms", - "cohortIdentity": "rl-mesh|h200|nvlink", - "family": "rl-mesh", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "trainer_to_generator", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 369.31, - "latencyUnit": "ms", - "colorKey": "h200_c6f0b6b2", - "label": "H200 · trn->gen · paired", - "generatedAt": "2026-06-27T13:38:51.710797+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:51.710797+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.014, + "p99_amplification": 1.148 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 4.34, - "latency": 0.24155, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 61.01, - "latency": 0.06874, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 288.23, - "latency": 0.05821, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 333.03, - "latency": 0.20151, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 358.08, - "latency": 0.74964, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 369.21, - "latency": 2.90821, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-494c6e3f", - "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", - "cohortIdentity": "rl-mesh|h200|nvlink", - "family": "rl-mesh", - "sku": "h200", - "topologyClass": "h200-nvlink-island", - "transport": "nvlink", - "operation": "trainer_to_generator", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 369.31, - "latencyUnit": "ms", - "colorKey": "h200_494c6e3f", - "label": "H200 · trn->gen · redistribute", - "generatedAt": "2026-06-27T13:38:51.710797+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-27T13:38:51.710797+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.994, + "p99_amplification": 1.003 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.02, - "latency": 56.31775, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 33.42, - "latency": 0.12549, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 99.65, - "latency": 0.16836, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 180.83, - "latency": 0.37112, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 260.28, - "latency": 1.03132, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 340.94, - "latency": 3.14936, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-2963cf1c", - "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|generator_to_trainer|paired|ms", - "cohortIdentity": "rl-mesh|mi355x|xgmi", - "family": "rl-mesh", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "generator_to_trainer", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 71.79, - "latencyUnit": "ms", - "colorKey": "mi355x_2963cf1c", - "label": "MI355X · gen->trn · paired", - "generatedAt": "2026-06-28T05:12:36.633047+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T05:12:36.633047+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.009, + "p99_amplification": 1.078 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 14.01, - "latency": 0.07485, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 45.33, - "latency": 0.09253, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 62.8, - "latency": 0.26717, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 69.38, - "latency": 0.9672, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 71.12, - "latency": 3.77445, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 71.62, - "latency": 14.99269, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-687aa675", - "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|generator_to_trainer|redistribute|ms", - "cohortIdentity": "rl-mesh|mi355x|xgmi", - "family": "rl-mesh", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "generator_to_trainer", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 71.79, - "latencyUnit": "ms", - "colorKey": "mi355x_687aa675", - "label": "MI355X · gen->trn · redistribute", - "generatedAt": "2026-06-28T05:12:36.633047+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T05:12:36.633047+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.004, + "p99_amplification": 1.004 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.09, - "latency": 12.00838, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 13.34, - "latency": 0.3144, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 48.16, - "latency": 0.34836, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 64.08, - "latency": 1.04724, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 69.45, - "latency": 3.8654, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 71.41, - "latency": 15.03625, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-0700747c", - "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|trainer_to_generator|paired|ms", - "cohortIdentity": "rl-mesh|mi355x|xgmi", - "family": "rl-mesh", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "trainer_to_generator", - "subtype": "paired", - "valid": true, - "status": "valid", - "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 71.79, - "latencyUnit": "ms", - "colorKey": "mi355x_0700747c", - "label": "MI355X · trn->gen · paired", - "generatedAt": "2026-06-28T05:12:36.633047+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T05:12:36.633047+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.014, + "p99_amplification": 1.007 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 10.53, - "latency": 0.0996, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 44.33, - "latency": 0.09462, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 62.58, - "latency": 0.2681, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 69.37, - "latency": 0.96746, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 71.32, - "latency": 3.76377, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 71.79, - "latency": 14.95774, - "sizeClass": null, - "correct": true - } - ] - }, - { - "id": "cxt-a10511d5", - "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|trainer_to_generator|redistribute|ms", - "cohortIdentity": "rl-mesh|mi355x|xgmi", - "family": "rl-mesh", - "sku": "mi355x", - "topologyClass": "mi355x-xgmi", - "transport": "xgmi", - "operation": "trainer_to_generator", - "subtype": "redistribute", - "valid": true, - "status": "valid", - "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4", - "peakBandwidthGbps": 71.79, - "latencyUnit": "ms", - "colorKey": "mi355x_a10511d5", - "label": "MI355X · trn->gen · redistribute", - "generatedAt": "2026-06-28T05:12:36.633047+00:00", - "run": { - "id": null, - "url": null, - "createdAt": "2026-06-28T05:12:36.633047+00:00", - "sha": null + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.01, + "p99_amplification": 1.013 }, - "rows": [ - { - "sizeBytes": 1048576, - "bandwidthGbps": 0.01, - "latency": 97.26006, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 4194304, - "bandwidthGbps": 14.75, - "latency": 0.28435, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 16777216, - "bandwidthGbps": 50.28, - "latency": 0.33368, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 67108864, - "bandwidthGbps": 65.3, - "latency": 1.02763, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 268435456, - "bandwidthGbps": 70.05, - "latency": 3.83224, - "sizeClass": null, - "correct": true - }, - { - "sizeBytes": 1073741824, - "bandwidthGbps": 71.74, - "latency": 14.96724, - "sizeClass": null, - "correct": true - } - ] - } - ], - "scannedRuns": 313, - "scannedArtifacts": 891, - "contributingRuns": 313, - "generatedAt": "2026-06-29T02:42:52.989Z" + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.003, + "p99_amplification": 0.994 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.992, + "p99_amplification": 0.999 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.006, + "p99_amplification": 1.016 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.004, + "p99_amplification": 1.003 + } + ] + }, + "nccl": [], + "offload": [], + "copyEngine": [], + "kvCache": [], + "rlMesh": [], + "scannedRuns": 4, + "scannedArtifacts": 4, + "contributingRuns": 4, + "generatedAt": "2026-06-29T14:13:22.285Z" } diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx index 9c90e219..caa42258 100644 --- a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx +++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx @@ -177,7 +177,7 @@ function displaySeriesLabel(item: CollectiveXSeries): string { } function backendFilterValue(item: CollectiveXSeries): string { - return collectiveXBackendLabel(item.backend, item.backendVersion); + return collectiveXBackendLabel(item.backend, item.backendVersion, item.shape.kernelGeneration); } function backendSortRank(value: string): number { @@ -274,7 +274,7 @@ export default function CollectiveXDisplay() { const [percentile, setPercentile] = useState('p99'); const [suite, setSuite] = useState('resource-constrained'); const [routing, setRouting] = useState('uniform'); - const [publication, setPublication] = useState('official-headline'); + const [publication, setPublication] = useState('all'); const [xAxis, setXAxis] = useState('tokens-per-rank'); const [yAxis, setYAxis] = useState('latency'); const [xScaleType, setXScaleType] = useState('log'); diff --git a/packages/app/src/components/collectivex/data.test.ts b/packages/app/src/components/collectivex/data.test.ts index ef95d335..8b200ed4 100644 --- a/packages/app/src/components/collectivex/data.test.ts +++ b/packages/app/src/components/collectivex/data.test.ts @@ -206,6 +206,7 @@ describe('normalizeCollectiveXDocument', () => { const series = normalized(v2); expect(series.label).toContain('deepep v2'); + expect(series.shape.kernelGeneration).toBe('v2'); expect(collectiveXSeriesLabel({ ...series, label: 'MI355X EP8 · deepep · bf16' })).toContain( 'deepep v2', ); diff --git a/packages/app/src/components/collectivex/data.ts b/packages/app/src/components/collectivex/data.ts index af0125a2..2b4c292f 100644 --- a/packages/app/src/components/collectivex/data.ts +++ b/packages/app/src/components/collectivex/data.ts @@ -18,6 +18,7 @@ import type { CollectiveXRunSource, CollectiveXScalingKind, CollectiveXScalingPoint, + CollectiveXShape, CollectiveXSummaryCard, CollectiveXSensitivity, CollectiveXSeries, @@ -180,14 +181,25 @@ function backendKernelGeneration(backend: string, version: string | null): strin return 'v1'; } -export function collectiveXBackendLabel(backend: string, version: string | null): string { - return backendKernelGeneration(backend, version) === 'v2' ? `${backend} v2` : backend; +export function collectiveXBackendLabel( + backend: string, + version: string | null, + kernelGeneration?: string | null, +): string { + const generation = kernelGeneration || backendKernelGeneration(backend, version); + return generation === 'v2' ? `${backend} v2` : backend; } export function collectiveXSeriesLabel( - series: Pick, + series: Pick & { + shape?: Pick; + }, ): string { - const backendLabel = collectiveXBackendLabel(series.backend, series.backendVersion); + const backendLabel = collectiveXBackendLabel( + series.backend, + series.backendVersion, + series.shape?.kernelGeneration, + ); if (backendLabel === series.backend) return series.label; const backendSegment = `· ${series.backend} ·`; @@ -381,6 +393,7 @@ function buildLabel(config: ReturnType): string { return `${config.sku.toUpperCase()} EP${config.epSize ?? '?'} · ${collectiveXBackendLabel( config.backend, config.backendVersion, + config.backendKernelGeneration, )} · ${config.dispatchDtype}${suffixes.length > 0 ? ` ${suffixes.join(' ')}` : ''}${routing}`; } @@ -437,6 +450,7 @@ export function normalizeCollectiveXDocument( unevenTokens: config.unevenTokens, eplbEnabled: config.eplbEnabled, dispatchDtype: config.dispatchDtype, + kernelGeneration: config.backendKernelGeneration, activationProfile: config.activationProfile, combineQuantMode: config.combineQuantMode, }, @@ -1187,7 +1201,11 @@ export function comparisonDifferences(series: CollectiveXSeries[]): string[] { new Set(series.map(getValue)).size > 1; if (different((item) => item.topologyClass)) warnings.push('topology'); - if (different((item) => collectiveXBackendLabel(item.backend, item.backendVersion))) { + if ( + different((item) => + collectiveXBackendLabel(item.backend, item.backendVersion, item.shape.kernelGeneration), + ) + ) { warnings.push('backend generation'); } if (different((item) => item.epSize)) warnings.push('EP degree'); diff --git a/packages/app/src/components/collectivex/types.ts b/packages/app/src/components/collectivex/types.ts index b6e4f088..1783b70d 100644 --- a/packages/app/src/components/collectivex/types.ts +++ b/packages/app/src/components/collectivex/types.ts @@ -55,6 +55,7 @@ export interface CollectiveXShape { unevenTokens: string; eplbEnabled: boolean; dispatchDtype: string; + kernelGeneration: string; activationProfile: string; combineQuantMode: string; } diff --git a/packages/app/src/lib/collectivex-snapshot.test.ts b/packages/app/src/lib/collectivex-snapshot.test.ts index 11c176d0..1272cfb5 100644 --- a/packages/app/src/lib/collectivex-snapshot.test.ts +++ b/packages/app/src/lib/collectivex-snapshot.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; const state = vi.hoisted(() => ({ documents: [] as unknown[], + entries: [] as { entryName: string; contents: string }[], })); vi.mock('@semianalysisai/inferencex-constants', () => ({ @@ -13,6 +14,12 @@ vi.mock('@semianalysisai/inferencex-constants', () => ({ vi.mock('adm-zip', () => ({ default: class MockAdmZip { getEntries() { + if (state.entries.length > 0) { + return state.entries.map((entry) => ({ + entryName: entry.entryName, + getData: () => Buffer.from(entry.contents), + })); + } return state.documents.map((document, index) => ({ entryName: `result-${index}.json`, getData: () => Buffer.from(JSON.stringify(document)), @@ -28,7 +35,7 @@ const originalFetch = globalThis.fetch; function workflowRun(id = 12345, status = 'completed') { return { id, - name: 'CollectiveX Experimental', + name: 'CollectiveX Sweep', head_branch: 'collectivex', head_sha: 'abc123', created_at: '2026-06-27T08:00:00Z', @@ -142,6 +149,7 @@ function artifactDownload() { beforeEach(() => { state.documents = [resultDocument()]; + state.entries = []; }); afterEach(() => { @@ -259,6 +267,49 @@ describe('generateCollectiveXSnapshot', () => { expect(snapshot.series[0].run.id).toBe('67890'); }); + it('loads branch-only aggregate artifacts with NDJSON entries', async () => { + const requestedUrls: string[] = []; + state.entries = [ + { + entryName: 'collectivex_ep.ndjson', + contents: `${JSON.stringify(resultDocument())}\n${JSON.stringify(ncclDocument())}\n`, + }, + ]; + globalThis.fetch = vi.fn((input) => { + const url = String(input); + requestedUrls.push(url); + if (url.includes('/actions/workflows/collectivex-experimental.yml/runs?')) { + return Promise.resolve({ + ok: false, + status: 404, + } as Response); + } + if (url.includes('/actions/runs?')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + } as Response); + } + if (url.includes('/actions/runs/12345/artifacts')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(artifactList('cxsweep-aggregate-deepep-v2-12345')), + } as Response); + } + if (url === 'https://api.github.com/artifacts/1/zip') { + return Promise.resolve(artifactDownload() as Response); + } + throw new Error(`Unexpected URL: ${url}`); + }); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(requestedUrls.some((url) => url.includes('/actions/runs?'))).toBe(true); + expect(snapshot.scannedArtifacts).toBe(1); + expect(snapshot.series).toHaveLength(1); + expect(snapshot.nccl).toHaveLength(1); + }); + it('retries a transient GitHub request failure before generating the snapshot', async () => { globalThis.fetch = vi .fn() diff --git a/packages/app/src/lib/collectivex-snapshot.ts b/packages/app/src/lib/collectivex-snapshot.ts index a26d7986..85d308ca 100644 --- a/packages/app/src/lib/collectivex-snapshot.ts +++ b/packages/app/src/lib/collectivex-snapshot.ts @@ -21,6 +21,7 @@ import type { import { extractZipEntries, type GithubArtifact, type GithubWorkflowRun } from './github-artifacts'; const WORKFLOW_FILE = 'collectivex-experimental.yml'; +const WORKFLOW_NAME = 'CollectiveX Sweep'; const WORKFLOW_BRANCH = 'collectivex'; const DEFAULT_MAX_DISCOVERY_RUNS = 500; const RUNS_PAGE_SIZE = 100; @@ -126,6 +127,9 @@ async function fetchCompletedCollectiveXRuns( `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/workflows/${WORKFLOW_FILE}/runs?${params}`, token, ); + if (response.status === 404 && page === 1) { + return fetchCompletedCollectiveXRunsByBranch(token, maxDiscoveryRuns); + } if (!response.ok) { throw new Error(`Failed to list CollectiveX workflow runs: ${response.status}`); } @@ -139,6 +143,36 @@ async function fetchCompletedCollectiveXRuns( return runs.slice(0, maxDiscoveryRuns); } +async function fetchCompletedCollectiveXRunsByBranch( + token: string, + maxDiscoveryRuns: number, +): Promise { + const runs: GithubWorkflowRun[] = []; + + for (let page = 1; runs.length < maxDiscoveryRuns; page++) { + const params = new URLSearchParams({ + branch: WORKFLOW_BRANCH, + status: 'success', + per_page: String(RUNS_PAGE_SIZE), + page: String(page), + }); + const response = await fetchCollectiveXGithub( + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs?${params}`, + token, + ); + if (!response.ok) { + throw new Error(`Failed to list CollectiveX workflow runs: ${response.status}`); + } + + const payload = (await response.json()) as GithubWorkflowRunsResponse; + const pageRuns = payload.workflow_runs ?? []; + runs.push(...pageRuns.filter((run) => run.name === WORKFLOW_NAME)); + if (pageRuns.length < RUNS_PAGE_SIZE) break; + } + + return runs.slice(0, maxDiscoveryRuns); +} + async function fetchSourceRun(runId: string, token: string): Promise { const response = await fetchCollectiveXGithub( `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}`, @@ -171,7 +205,7 @@ async function discoverRuns( } function isCollectiveXArtifact(artifact: GithubArtifact): boolean { - return artifact.name.startsWith('collectivex_'); + return artifact.name.startsWith('collectivex_') || artifact.name.startsWith('cxsweep-aggregate-'); } async function fetchRunArtifacts(runId: string, token: string): Promise { @@ -214,17 +248,32 @@ async function discoverArtifacts( } function parseArtifactDocuments(buffer: Buffer, artifactName: string): unknown[] { - return extractZipEntries( + const parseError = (entryName: string, error: unknown) => { + console.warn(`Failed to parse ${entryName} from ${artifactName}:`, error); + }; + const jsonDocuments = extractZipEntries( buffer, '.json', (_entryName, contents) => { const parsed = JSON.parse(contents) as unknown; return Array.isArray(parsed) ? parsed : [parsed]; }, - (entryName, error) => { - console.warn(`Failed to parse ${entryName} from ${artifactName}:`, error); + parseError, + ); + const ndjsonDocuments = extractZipEntries( + buffer, + '.ndjson', + (_entryName, contents) => { + const documents: unknown[] = []; + for (const line of contents.split(/\r?\n/u)) { + const trimmed = line.trim(); + if (trimmed) documents.push(JSON.parse(trimmed) as unknown); + } + return documents; }, + parseError, ); + return [...jsonDocuments, ...ndjsonDocuments]; } async function downloadAndNormalize( From ea075bc8ac70ea4b4c28b18424601e1aa09f1634 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Tue, 30 Jun 2026 15:37:10 +0000 Subject: [PATCH 09/23] chore: trigger CollectiveX data update for 28452161275 --- .github/collectivex-source-run.env | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/collectivex-source-run.env diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env new file mode 100644 index 00000000..b6b60a42 --- /dev/null +++ b/.github/collectivex-source-run.env @@ -0,0 +1,5 @@ +source_run_id=28452161275 +source_sha=81f42c9f33ee1c888b2ad6201a06c3a57b5977d3 +source_workflow=CollectiveX Sweep +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28452161275 +triggered_at=2026-06-30T15:37:09Z From 2cf1311abab404fc98e7f0211e34a3026f249d3c Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 04:47:09 +0000 Subject: [PATCH 10/23] chore: trigger CollectiveX data update for 28492830140 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index b6b60a42..aac78d80 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28452161275 -source_sha=81f42c9f33ee1c888b2ad6201a06c3a57b5977d3 +source_run_id=28492830140 +source_sha=f0a8370506024a51822b260e5fe03fa07dc79cc3 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28452161275 -triggered_at=2026-06-30T15:37:09Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28492830140 +triggered_at=2026-07-01T04:47:08Z From f59b2f4fc5aaf283fd3f2f7377ff594c05987733 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 06:54:08 +0000 Subject: [PATCH 11/23] chore: trigger CollectiveX data update for 28497305436 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index aac78d80..caa15607 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28492830140 -source_sha=f0a8370506024a51822b260e5fe03fa07dc79cc3 +source_run_id=28497305436 +source_sha=1bad7116b56eeb924d6844f56b20703137a77ce2 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28492830140 -triggered_at=2026-07-01T04:47:08Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28497305436 +triggered_at=2026-07-01T06:54:08Z From 9b4382475715e1cc0750ba1de837b1f1b2b5a5b5 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 07:56:51 +0000 Subject: [PATCH 12/23] chore: trigger CollectiveX data update for 28500528370 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index caa15607..0d78611a 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28497305436 -source_sha=1bad7116b56eeb924d6844f56b20703137a77ce2 +source_run_id=28500528370 +source_sha=ffe663ee445159d523c53a0827979926309908b3 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28497305436 -triggered_at=2026-07-01T06:54:08Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500528370 +triggered_at=2026-07-01T07:56:50Z From 21e5bba583d54f782ae3571f458497b325e2f8bc Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 09:00:19 +0000 Subject: [PATCH 13/23] chore: trigger CollectiveX data update for 28500063760 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index 0d78611a..e8b21894 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28500528370 -source_sha=ffe663ee445159d523c53a0827979926309908b3 +source_run_id=28500063760 +source_sha=689861b96cb67a15f87629216fdbb690d16c9da3 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500528370 -triggered_at=2026-07-01T07:56:50Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500063760 +triggered_at=2026-07-01T09:00:19Z From ab499be69e63a414ca7e853bea2a54fdf8757128 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 09:04:38 +0000 Subject: [PATCH 14/23] chore: trigger CollectiveX data update for 28500059839 --- .github/collectivex-source-run.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index e8b21894..9084467f 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28500063760 +source_run_id=28500059839 source_sha=689861b96cb67a15f87629216fdbb690d16c9da3 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500063760 -triggered_at=2026-07-01T09:00:19Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500059839 +triggered_at=2026-07-01T09:04:38Z From ebaa2802b43f1658f73cfb81865aa2c98a29eb01 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 09:21:26 +0000 Subject: [PATCH 15/23] chore: trigger CollectiveX data update for 28500524185 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index 9084467f..1753d595 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28500059839 -source_sha=689861b96cb67a15f87629216fdbb690d16c9da3 +source_run_id=28500524185 +source_sha=ffe663ee445159d523c53a0827979926309908b3 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500059839 -triggered_at=2026-07-01T09:04:38Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500524185 +triggered_at=2026-07-01T09:21:26Z From 4d99d8ecab283f53ca18aab4f3224d980abda036 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 15:09:29 +0000 Subject: [PATCH 16/23] chore: trigger CollectiveX data update for 28522872429 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index 1753d595..f9a0ca24 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28500524185 -source_sha=ffe663ee445159d523c53a0827979926309908b3 +source_run_id=28522872429 +source_sha=081cb90af2aa9ab714c7d04fdad413fad25683b3 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28500524185 -triggered_at=2026-07-01T09:21:26Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28522872429 +triggered_at=2026-07-01T15:09:28Z From 14e202fc62ef906ee699dd4b11be51fc35c2cd85 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 17:05:33 +0000 Subject: [PATCH 17/23] chore: trigger CollectiveX data update for 28530579787 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index f9a0ca24..1ab7a038 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28522872429 -source_sha=081cb90af2aa9ab714c7d04fdad413fad25683b3 +source_run_id=28530579787 +source_sha=85d6159826b6feb1aa3d67fb0c202830de7ef6fd source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28522872429 -triggered_at=2026-07-01T15:09:28Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28530579787 +triggered_at=2026-07-01T17:05:33Z From 47b061db9570bb569e5e92d810a3ffaab3d99730 Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 17:26:20 +0000 Subject: [PATCH 18/23] chore: trigger CollectiveX data update for 28531976125 --- .github/collectivex-source-run.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index 1ab7a038..c9e2fa11 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28530579787 +source_run_id=28531976125 source_sha=85d6159826b6feb1aa3d67fb0c202830de7ef6fd source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28530579787 -triggered_at=2026-07-01T17:05:33Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28531976125 +triggered_at=2026-07-01T17:26:20Z From 1474babf29ab2e4fa279891ccd4db1f081ad48ae Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 17:41:15 +0000 Subject: [PATCH 19/23] chore: trigger CollectiveX data update for 28535231056 --- .github/collectivex-source-run.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index c9e2fa11..db067d64 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28531976125 -source_sha=85d6159826b6feb1aa3d67fb0c202830de7ef6fd +source_run_id=28535231056 +source_sha=8cbd7c87d2b52f223b2e891b53dbbb4c65d76a83 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28531976125 -triggered_at=2026-07-01T17:26:20Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535231056 +triggered_at=2026-07-01T17:41:15Z From f19abe7966741b835357b86b15fb10e80569041a Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 17:42:04 +0000 Subject: [PATCH 20/23] chore: trigger CollectiveX data update for 28535221873 --- .github/collectivex-source-run.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index db067d64..b91c155f 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28535231056 +source_run_id=28535221873 source_sha=8cbd7c87d2b52f223b2e891b53dbbb4c65d76a83 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535231056 -triggered_at=2026-07-01T17:41:15Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535221873 +triggered_at=2026-07-01T17:42:04Z From ada26815b3dd2585a9f3a90418283e068eee14fc Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 17:55:13 +0000 Subject: [PATCH 21/23] chore: trigger CollectiveX data update for 28535235520 --- .github/collectivex-source-run.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index b91c155f..e4a6da15 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28535221873 +source_run_id=28535235520 source_sha=8cbd7c87d2b52f223b2e891b53dbbb4c65d76a83 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535221873 -triggered_at=2026-07-01T17:42:04Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535235520 +triggered_at=2026-07-01T17:55:12Z From f1f5eafb63bed3c11a1b80c1d0271b20b74c587b Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 18:55:00 +0000 Subject: [PATCH 22/23] chore: trigger CollectiveX data update for 28535226475 --- .github/collectivex-source-run.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index e4a6da15..31b9fa89 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28535235520 +source_run_id=28535226475 source_sha=8cbd7c87d2b52f223b2e891b53dbbb4c65d76a83 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535235520 -triggered_at=2026-07-01T17:55:12Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535226475 +triggered_at=2026-07-01T18:54:59Z From db663abf778dc8b7495a30d18c5ddd8398487bda Mon Sep 17 00:00:00 2001 From: InferenceX Data Bot Date: Wed, 1 Jul 2026 19:15:21 +0000 Subject: [PATCH 23/23] chore: trigger CollectiveX data update for 28534841204 --- .github/collectivex-source-run.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env index 31b9fa89..5db62c9a 100644 --- a/.github/collectivex-source-run.env +++ b/.github/collectivex-source-run.env @@ -1,5 +1,5 @@ -source_run_id=28535226475 +source_run_id=28534841204 source_sha=8cbd7c87d2b52f223b2e891b53dbbb4c65d76a83 source_workflow=CollectiveX Sweep -source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28535226475 -triggered_at=2026-07-01T18:54:59Z +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28534841204 +triggered_at=2026-07-01T19:15:20Z